Updates to slave catchup mode to use fake events
Addition of fake EPOLLOUT event mechanism New memlog feature for debugging purposes
This commit is contained in:
@ -52,7 +52,6 @@
|
||||
#include <skygw_utils.h>
|
||||
#include <log_manager.h>
|
||||
|
||||
|
||||
static uint32_t extract_field(uint8_t *src, int bits);
|
||||
static void encode_value(unsigned char *data, unsigned int value, int len);
|
||||
static int blr_slave_query(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue);
|
||||
@ -61,7 +60,7 @@ static void blr_slave_send_error(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, c
|
||||
static int blr_slave_send_timestamp(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave);
|
||||
static int blr_slave_register(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue);
|
||||
static int blr_slave_binlog_dump(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue);
|
||||
int blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave);
|
||||
int blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large);
|
||||
static uint8_t *blr_build_header(GWBUF *pkt, REP_HEADER *hdr);
|
||||
static int blr_slave_callback(DCB *dcb, DCB_REASON reason, void *data);
|
||||
|
||||
@ -567,7 +566,7 @@ uint32_t chksum;
|
||||
|
||||
slave->dcb->low_water = router->low_water;
|
||||
slave->dcb->high_water = router->high_water;
|
||||
dcb_add_callback(slave->dcb, DCB_REASON_LOW_WATER, blr_slave_callback, slave);
|
||||
// dcb_add_callback(slave->dcb, DCB_REASON_LOW_WATER, blr_slave_callback, slave);
|
||||
dcb_add_callback(slave->dcb, DCB_REASON_DRAINED, blr_slave_callback, slave);
|
||||
|
||||
if (slave->binlog_pos != router->binlog_position ||
|
||||
@ -576,7 +575,7 @@ uint32_t chksum;
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
slave->cstate &= ~CS_UPTODATE;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
rval = blr_slave_catchup(router, slave);
|
||||
rval = blr_slave_catchup(router, slave, true);
|
||||
}
|
||||
|
||||
return rval;
|
||||
@ -660,187 +659,107 @@ uint8_t *ptr;
|
||||
* We have a registered slave that is behind the current leading edge of the
|
||||
* binlog. We must replay the log entries to bring this node up to speed.
|
||||
*
|
||||
* There may be a large numebr of records to send to the slave, the process
|
||||
* There may be a large number of records to send to the slave, the process
|
||||
* is triggered by the slave COM_BINLOG_DUMP message and all the events must
|
||||
* be sent without receiving any new event. This measn there is no trigger into
|
||||
* MaxScale other than this initial message. However, if we simply send all the
|
||||
* events we end up with an extremely long write queue on the DCB and risk running
|
||||
* the server out of resources.
|
||||
* events we end up with an extremely long write queue on the DCB and risk
|
||||
* running the server out of resources.
|
||||
*
|
||||
* To resolve this the concept of high and low water marks within the DCB has been
|
||||
* added, with the ability for the DCB code to call user defined callbacks when the
|
||||
* write queue is completely drained, when it crosses above the high water mark and
|
||||
* when it crosses below the low water mark.
|
||||
*
|
||||
* The blr_slave_catchup routine will send binlog events to the slave until the high
|
||||
* water mark is reached, at which point it will return. Later, when a low water mark
|
||||
* callback is generated by the code that drains the DCB of data the blr_slave_catchup
|
||||
* routine will again be called to write more events. The process is repeated until
|
||||
* the slave has caught up with the master.
|
||||
* The slave catchup routine will send a burst of replication events per single
|
||||
* call. The paramter "long" control the number of events in the burst. The
|
||||
* short burst is intended to be used when the master receive an event and
|
||||
* needs to put the slave into catchup mode. This prevents the slave taking
|
||||
* too much tiem away from the thread that is processing the master events.
|
||||
*
|
||||
* Note: an additional check that the DCB is still above the low water mark is done
|
||||
* prior to the return from this function to allow for any delays due to the call to
|
||||
* the close system call, since this may cause thread rescheduling.
|
||||
* At the end ofthe burst a fake EPOLLOUT event is added to the poll event
|
||||
* queue. This ensures that the slave callback for processing DCB write drain
|
||||
* will be called and future catchup requests will be handle on another thread.
|
||||
*
|
||||
* @param router The binlog router
|
||||
* @param slave The slave that is behind
|
||||
* @param large Send a long or short burst of events
|
||||
* @return The number of bytes written
|
||||
*/
|
||||
int
|
||||
blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave)
|
||||
blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
|
||||
{
|
||||
GWBUF *head, *record;
|
||||
REP_HEADER hdr;
|
||||
int written, fd, rval = 1, burst = 0;
|
||||
int written, fd, rval = 1, burst;
|
||||
uint8_t *ptr;
|
||||
struct timespec req;
|
||||
|
||||
|
||||
if (large)
|
||||
burst = router->long_burst;
|
||||
else
|
||||
burst = router->short_burst;
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
slave->cstate &= ~CS_EXPECTCB;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
doitagain:
|
||||
/*
|
||||
* We have a slightly complex syncronisation mechansim here,
|
||||
* we need to make sure that we do not have multiple threads
|
||||
* running the catchup loop, but we need to be very careful
|
||||
* that we do not loose a call that is coming via a callback
|
||||
* call as this will stall the binlog catchup process.
|
||||
*
|
||||
* We don't want to simply use a traditional mutex here for
|
||||
* the loop, since this would block a MaxScale thread for
|
||||
* an unacceptable length of time.
|
||||
*
|
||||
* We have two status bits, the CS_READING that says we are
|
||||
* in the outer loop and the CS_INNERLOOP, to say we are in
|
||||
* the inner loop.
|
||||
*
|
||||
* If just CS_READING is set the other thread may be about to
|
||||
* enter the inner loop or may be about to exit the function
|
||||
* completely. Therefore we have to wait to see if CS_READING
|
||||
* is cleared or CS_INNERLOOP is set.
|
||||
*
|
||||
* If CS_READING gets cleared then this thread should proceed
|
||||
* into the loop.
|
||||
*
|
||||
* If CS_INNERLOOP get's set then this thread does not need to
|
||||
* proceed.
|
||||
*
|
||||
* If CS_READING is not set then this thread simply enters the
|
||||
* loop.
|
||||
*/
|
||||
req.tv_sec = 0;
|
||||
req.tv_nsec = 1000;
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
if (slave->cstate & CS_UPTODATE)
|
||||
while ((slave->cstate & (CS_HOLD|CS_BUSY)) == (CS_HOLD|CS_BUSY))
|
||||
{
|
||||
LOGIF(LM, (skygw_log_write(LOGFILE_MESSAGE,
|
||||
"blr_slave_catchup called with up to date slave %d at "
|
||||
"%s@%d. Reading position %s@%d\n",
|
||||
slave->serverid, slave->binlogfile,
|
||||
slave->binlog_pos, router->binlog_name,
|
||||
router->binlog_position)));
|
||||
slave->stats.n_alreadyupd++;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
return 1;
|
||||
}
|
||||
while (slave->cstate & CS_READING)
|
||||
{
|
||||
// Wait until we know what the other thread is doing
|
||||
while ((slave->cstate & (CS_READING|CS_INNERLOOP)) == CS_READING)
|
||||
{
|
||||
spinlock_release(&slave->catch_lock);
|
||||
nanosleep(&req, NULL);
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
}
|
||||
// Other thread is in the innerloop
|
||||
if ((slave->cstate & (CS_READING|CS_INNERLOOP)) == (CS_READING|CS_INNERLOOP))
|
||||
{
|
||||
spinlock_release(&slave->catch_lock);
|
||||
LOGIF(LM, (skygw_log_write(
|
||||
LOGFILE_MESSAGE,
|
||||
"blr_slave_catchup thread returning due to "
|
||||
"lock being held by another thread. %s@%d\n",
|
||||
slave->binlogfile,
|
||||
slave->binlog_pos)));
|
||||
slave->stats.n_catchupnr++;
|
||||
return 1; // We cheat here and return 1 because otherwise
|
||||
// an error would be sent and we do not want that
|
||||
}
|
||||
|
||||
/* Release the lock for a short time to allow the other
|
||||
* thread to exit the outer reading loop.
|
||||
*/
|
||||
spinlock_release(&slave->catch_lock);
|
||||
req.tv_sec = 0;
|
||||
req.tv_nsec = 100;
|
||||
nanosleep(&req, NULL);
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
}
|
||||
if (slave->pthread)
|
||||
LOGIF(LD, (skygw_log_write(LOGFILE_DEBUG, "Multiple threads sending to same thread.\n")));
|
||||
slave->pthread = pthread_self();
|
||||
slave->cstate |= CS_READING;
|
||||
slave->cstate |= (CS_HOLD|CS_BUSY);
|
||||
spinlock_release(&slave->catch_lock);
|
||||
|
||||
if (DCB_ABOVE_HIGH_WATER(slave->dcb))
|
||||
LOGIF(LT, (skygw_log_write(LOGFILE_TRACE, "blr_slave_catchup above high water on entry.\n")));
|
||||
|
||||
do {
|
||||
if ((fd = blr_open_binlog(router, slave->binlogfile)) == -1)
|
||||
if ((fd = blr_open_binlog(router, slave->binlogfile)) == -1)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(
|
||||
LOGFILE_ERROR,
|
||||
"blr_slave_catchup failed to open binlog file %s\n",
|
||||
slave->binlogfile)));
|
||||
return 0;
|
||||
}
|
||||
slave->stats.n_bursts++;
|
||||
while (burst-- &&
|
||||
(record = blr_read_binlog(fd, slave->binlog_pos, &hdr)) != NULL)
|
||||
{
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
slave->cstate &= ~CS_HOLD;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
head = gwbuf_alloc(5);
|
||||
ptr = GWBUF_DATA(head);
|
||||
encode_value(ptr, hdr.event_size + 1, 24);
|
||||
ptr += 3;
|
||||
*ptr++ = slave->seqno++;
|
||||
*ptr++ = 0; // OK
|
||||
head = gwbuf_append(head, record);
|
||||
if (hdr.event_type == ROTATE_EVENT)
|
||||
{
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
slave->cstate &= ~CS_READING;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
LOGIF(LE, (skygw_log_write(
|
||||
LOGFILE_ERROR,
|
||||
"blr_slave_catchup failed to open binlog file %s\n",
|
||||
close(fd);
|
||||
blr_slave_rotate(slave, GWBUF_DATA(record));
|
||||
if ((fd = blr_open_binlog(router, slave->binlogfile)) == -1)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(
|
||||
LOGFILE_ERROR,
|
||||
"blr_slave_catchup failed to open binlog file %s\n",
|
||||
slave->binlogfile)));
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
slave->stats.n_bursts++;
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
slave->cstate |= CS_INNERLOOP;
|
||||
slave->cstate |= CS_HOLD;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
while ((!DCB_ABOVE_HIGH_WATER(slave->dcb)) &&
|
||||
(record = blr_read_binlog(fd, slave->binlog_pos, &hdr)) != NULL)
|
||||
written = slave->dcb->func.write(slave->dcb, head);
|
||||
if (written && hdr.event_type != ROTATE_EVENT)
|
||||
{
|
||||
if (hdr.event_size > DEF_HIGH_WATER) slave->stats.n_above++;
|
||||
head = gwbuf_alloc(5);
|
||||
ptr = GWBUF_DATA(head);
|
||||
encode_value(ptr, hdr.event_size + 1, 24);
|
||||
ptr += 3;
|
||||
*ptr++ = slave->seqno++;
|
||||
*ptr++ = 0; // OK
|
||||
head = gwbuf_append(head, record);
|
||||
if (hdr.event_type == ROTATE_EVENT)
|
||||
{
|
||||
close(fd);
|
||||
blr_slave_rotate(slave, GWBUF_DATA(record));
|
||||
if ((fd = blr_open_binlog(router, slave->binlogfile)) == -1)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(
|
||||
LOGFILE_ERROR,
|
||||
"blr_slave_catchup failed to open binlog file %s\n",
|
||||
slave->binlogfile)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
written = slave->dcb->func.write(slave->dcb, head);
|
||||
if (written && hdr.event_type != ROTATE_EVENT)
|
||||
{
|
||||
slave->binlog_pos = hdr.next_pos;
|
||||
}
|
||||
rval = written;
|
||||
slave->stats.n_events++;
|
||||
burst++;
|
||||
slave->binlog_pos = hdr.next_pos;
|
||||
}
|
||||
if (record == NULL)
|
||||
slave->stats.n_failed_read++;
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
slave->cstate &= ~CS_INNERLOOP;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
rval = written;
|
||||
slave->stats.n_events++;
|
||||
}
|
||||
if (record == NULL)
|
||||
slave->stats.n_failed_read++;
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
slave->cstate &= ~CS_BUSY;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
|
||||
close(fd);
|
||||
} while (record && DCB_BELOW_LOW_WATER(slave->dcb));
|
||||
close(fd);
|
||||
poll_fake_write_event(slave->dcb);
|
||||
if (record)
|
||||
{
|
||||
slave->stats.n_flows++;
|
||||
@ -864,25 +783,6 @@ if (hdr.event_size > DEF_HIGH_WATER) slave->stats.n_above++;
|
||||
"blr_slave_catchup slave is up to date %s, %u\n",
|
||||
slave->binlogfile, slave->binlog_pos)));
|
||||
}
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
#if 0
|
||||
if (slave->pthread != pthread_self())
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(LOGFILE_ERROR, "Multple threads in catchup for same slave: %x and %x\n", slave->pthread, pthread_self())));
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
slave->pthread = 0;
|
||||
#if 0
|
||||
if (DCB_BELOW_LOW_WATER(slave->dcb) && slave->binlog_pos != router->binlog_position) abort();
|
||||
#endif
|
||||
slave->cstate &= ~CS_READING;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
if (DCB_BELOW_LOW_WATER(slave->dcb) && slave->binlog_pos != router->binlog_position)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(LOGFILE_ERROR, "Expected to be above low water\n")));
|
||||
goto doitagain;
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
|
||||
@ -908,7 +808,7 @@ ROUTER_INSTANCE *router = slave->router;
|
||||
slave->binlog_pos != router->binlog_position)
|
||||
{
|
||||
slave->stats.n_dcb++;
|
||||
blr_slave_catchup(router, slave);
|
||||
blr_slave_catchup(router, slave, true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -917,7 +817,7 @@ ROUTER_INSTANCE *router = slave->router;
|
||||
if (slave->state == BLRS_DUMPING)
|
||||
{
|
||||
slave->stats.n_cb++;
|
||||
blr_slave_catchup(router, slave);
|
||||
blr_slave_catchup(router, slave, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user