Updates to slave catchup mode to use fake events

Addition of fake EPOLLOUT event mechanism New memlog feature for debugging purposes
2014-09-30 13:25:45 +01:00
parent 3430fc99d2
commit 0ef87e3cc1
13 changed files with 951 additions and 253 deletions
--- a/server/modules/routing/binlog/blr_slave.c
+++ b/server/modules/routing/binlog/blr_slave.c
@ -52,7 +52,6 @@
 #include <skygw_utils.h>
 #include <log_manager.h>

-
 static uint32_t extract_field(uint8_t *src, int bits);
 static void encode_value(unsigned char *data, unsigned int value, int len);
 static int blr_slave_query(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue);
@ -61,7 +60,7 @@ static void blr_slave_send_error(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, c
 static int blr_slave_send_timestamp(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave);
 static int blr_slave_register(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue);
 static int blr_slave_binlog_dump(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue);
-int blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave);
+int blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large);
 static uint8_t *blr_build_header(GWBUF	*pkt, REP_HEADER *hdr);
 static int blr_slave_callback(DCB *dcb, DCB_REASON reason, void *data);

@ -567,7 +566,7 @@ uint32_t	chksum;

 	slave->dcb->low_water  = router->low_water;
 	slave->dcb->high_water = router->high_water;
-	dcb_add_callback(slave->dcb, DCB_REASON_LOW_WATER, blr_slave_callback, slave);
+//	dcb_add_callback(slave->dcb, DCB_REASON_LOW_WATER, blr_slave_callback, slave);
 	dcb_add_callback(slave->dcb, DCB_REASON_DRAINED, blr_slave_callback, slave);

 	if (slave->binlog_pos != router->binlog_position ||
@ -576,7 +575,7 @@ uint32_t	chksum;
 		spinlock_acquire(&slave->catch_lock);
 		slave->cstate &= ~CS_UPTODATE;
 		spinlock_release(&slave->catch_lock);
-		rval = blr_slave_catchup(router, slave);
+		rval = blr_slave_catchup(router, slave, true);
 	}

 	return rval;
@ -660,187 +659,107 @@ uint8_t	*ptr;
 * We have a registered slave that is behind the current leading edge of the 
 * binlog. We must replay the log entries to bring this node up to speed.
 *
- * There may be a large numebr of records to send to the slave, the process
+ * There may be a large number of records to send to the slave, the process
 * is triggered by the slave COM_BINLOG_DUMP message and all the events must
 * be sent without receiving any new event. This measn there is no trigger into
 * MaxScale other than this initial message. However, if we simply send all the
- * events we end up with an extremely long write queue on the DCB and risk running
- * the server out of resources.
+ * events we end up with an extremely long write queue on the DCB and risk
+ * running the server out of resources.
 *
- * To resolve this the concept of high and low water marks within the DCB has been
- * added, with the ability for the DCB code to call user defined callbacks when the
- * write queue is completely drained, when it crosses above the high water mark and
- * when it crosses below the low water mark.
- * 
- * The blr_slave_catchup routine will send binlog events to the slave until the high
- * water mark is reached, at which point it will return. Later, when a low water mark
- * callback is generated by the code that drains the DCB of data the blr_slave_catchup
- * routine will again be called to write more events. The process is repeated until
- * the slave has caught up with the master.
+ * The slave catchup routine will send a burst of replication events per single
+ * call. The paramter "long" control the number of events in the burst. The
+ * short burst is intended to be used when the master receive an event and 
+ * needs to put the slave into catchup mode. This prevents the slave taking
+ * too much tiem away from the thread that is processing the master events.
 *
- * Note: an additional check that the DCB is still above the low water mark is done
- * prior to the return from this function to allow for any delays due to the call to
- * the close system call, since this may cause thread rescheduling.
+ * At the end ofthe burst a fake EPOLLOUT event is added to the poll event
+ * queue. This ensures that the slave callback for processing DCB write drain
+ * will be called and future catchup requests will be handle on another thread.
 *
 * @param	router		The binlog router
 * @param	slave		The slave that is behind
+ * @param	large		Send a long or short burst of events
 * @return			The number of bytes written
 */
 int
-blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave)
+blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
 {
 GWBUF		*head, *record;
 REP_HEADER	hdr;
-int		written, fd, rval = 1, burst = 0;
+int		written, fd, rval = 1, burst;
 uint8_t		*ptr;
 struct timespec	req;

-
+	if (large)
+		burst = router->long_burst;
+	else
+		burst = router->short_burst;
 	spinlock_acquire(&slave->catch_lock);
-	slave->cstate &= ~CS_EXPECTCB;
-	spinlock_release(&slave->catch_lock);
-doitagain:
-	/*
-	 * We have a slightly complex syncronisation mechansim here,
-	 * we need to make sure that we do not have multiple threads
-	 * running the catchup loop, but we need to be very careful
-	 * that we do not loose a call that is coming via a callback
-	 * call as this will stall the binlog catchup process.
-	 *
-	 * We don't want to simply use a traditional mutex here for
-	 * the loop, since this would block a MaxScale thread for
-	 * an unacceptable length of time.
-	 *
-	 * We have two status bits, the CS_READING that says we are
-	 * in the outer loop and the CS_INNERLOOP, to say we are in
-	 * the inner loop.
-	 *
-	 * If just CS_READING is set the other thread may be about to
-	 * enter the inner loop or may be about to exit the function
-	 * completely. Therefore we have to wait to see if CS_READING
-	 * is cleared or CS_INNERLOOP is set.
-	 *
-	 * If CS_READING gets cleared then this thread should proceed
-	 * into the loop.
-	 *
-	 * If CS_INNERLOOP get's set then this thread does not need to
-	 * proceed.
-	 *
-	 * If CS_READING is not set then this thread simply enters the
-	 * loop.
-	 */
-	req.tv_sec = 0;
-	req.tv_nsec = 1000;
-	spinlock_acquire(&slave->catch_lock);
-	if (slave->cstate & CS_UPTODATE)
+	while ((slave->cstate & (CS_HOLD|CS_BUSY)) == (CS_HOLD|CS_BUSY))
 	{
-       		LOGIF(LM, (skygw_log_write(LOGFILE_MESSAGE,
-			"blr_slave_catchup called with up to date slave %d at "
-			"%s@%d. Reading position %s@%d\n",
-				slave->serverid, slave->binlogfile,
-				slave->binlog_pos, router->binlog_name,
-				router->binlog_position)));
-		slave->stats.n_alreadyupd++;
-		spinlock_release(&slave->catch_lock);
-		return 1;
-	}
-	while (slave->cstate & CS_READING)
-	{
-		// Wait until we know what the other thread is doing
-		while ((slave->cstate & (CS_READING|CS_INNERLOOP)) == CS_READING)
-		{
-			spinlock_release(&slave->catch_lock);
-			nanosleep(&req, NULL);
-			spinlock_acquire(&slave->catch_lock);
-		}
-		// Other thread is in the innerloop
-		if ((slave->cstate & (CS_READING|CS_INNERLOOP)) == (CS_READING|CS_INNERLOOP))
-		{
-			spinlock_release(&slave->catch_lock);
-        		LOGIF(LM, (skygw_log_write(
-				LOGFILE_MESSAGE,
-				"blr_slave_catchup thread returning due to "
-				"lock being held by another thread. %s@%d\n",
-					slave->binlogfile,
-					slave->binlog_pos)));
-			slave->stats.n_catchupnr++;
-			return 1;	// We cheat here and return 1 because otherwise
-					// an error would be sent and we do not want that
-		}
-
-		/* Release the lock for a short time to allow the other
-		 * thread to exit the outer reading loop.
-		 */
 		spinlock_release(&slave->catch_lock);
+		req.tv_sec = 0;
+		req.tv_nsec = 100;
 		nanosleep(&req, NULL);
 		spinlock_acquire(&slave->catch_lock);
 	}
-	if (slave->pthread)
-		LOGIF(LD, (skygw_log_write(LOGFILE_DEBUG, "Multiple threads sending to same thread.\n")));
-	slave->pthread = pthread_self();
-	slave->cstate |= CS_READING;
+	slave->cstate |= (CS_HOLD|CS_BUSY);
 	spinlock_release(&slave->catch_lock);

-	if (DCB_ABOVE_HIGH_WATER(slave->dcb))
-		LOGIF(LT, (skygw_log_write(LOGFILE_TRACE, "blr_slave_catchup above high water on entry.\n")));
-
-	do {
-		if ((fd = blr_open_binlog(router, slave->binlogfile)) == -1)
+	if ((fd = blr_open_binlog(router, slave->binlogfile)) == -1)
+	{
+		LOGIF(LE, (skygw_log_write(
+			LOGFILE_ERROR,
+			"blr_slave_catchup failed to open binlog file %s\n",
+				slave->binlogfile)));
+		return 0;
+	}
+	slave->stats.n_bursts++;
+	while (burst-- &&
+		(record = blr_read_binlog(fd, slave->binlog_pos, &hdr)) != NULL)
+	{
+		spinlock_acquire(&slave->catch_lock);
+		slave->cstate &= ~CS_HOLD;
+		spinlock_release(&slave->catch_lock);
+		head = gwbuf_alloc(5);
+		ptr = GWBUF_DATA(head);
+		encode_value(ptr, hdr.event_size + 1, 24);
+		ptr += 3;
+		*ptr++ = slave->seqno++;
+		*ptr++ = 0;		// OK
+		head = gwbuf_append(head, record);
+		if (hdr.event_type == ROTATE_EVENT)
 		{
-			spinlock_acquire(&slave->catch_lock);
-			slave->cstate &= ~CS_READING;
-			spinlock_release(&slave->catch_lock);
-        		LOGIF(LE, (skygw_log_write(
-				LOGFILE_ERROR,
-				"blr_slave_catchup failed to open binlog file %s\n",
+			close(fd);
+			blr_slave_rotate(slave, GWBUF_DATA(record));
+			if ((fd = blr_open_binlog(router, slave->binlogfile)) == -1)
+			{
+				LOGIF(LE, (skygw_log_write(
+					LOGFILE_ERROR,
+					"blr_slave_catchup failed to open binlog file %s\n",
 					slave->binlogfile)));
-			return 0;
+				break;
+			}
 		}
-		slave->stats.n_bursts++;
 		spinlock_acquire(&slave->catch_lock);
-		slave->cstate |= CS_INNERLOOP;
+		slave->cstate |= CS_HOLD;
 		spinlock_release(&slave->catch_lock);
-		while ((!DCB_ABOVE_HIGH_WATER(slave->dcb)) &&
-			(record = blr_read_binlog(fd, slave->binlog_pos, &hdr)) != NULL)
+		written = slave->dcb->func.write(slave->dcb, head);
+		if (written && hdr.event_type != ROTATE_EVENT)
 		{
-if (hdr.event_size > DEF_HIGH_WATER) slave->stats.n_above++;
-			head = gwbuf_alloc(5);
-			ptr = GWBUF_DATA(head);
-			encode_value(ptr, hdr.event_size + 1, 24);
-			ptr += 3;
-			*ptr++ = slave->seqno++;
-			*ptr++ = 0;		// OK
-			head = gwbuf_append(head, record);
-			if (hdr.event_type == ROTATE_EVENT)
-			{
-				close(fd);
-				blr_slave_rotate(slave, GWBUF_DATA(record));
-				if ((fd = blr_open_binlog(router, slave->binlogfile)) == -1)
-				{
-        				LOGIF(LE, (skygw_log_write(
-						LOGFILE_ERROR,
-						"blr_slave_catchup failed to open binlog file %s\n",
-						slave->binlogfile)));
-					break;
-				}
-			}
-			written = slave->dcb->func.write(slave->dcb, head);
-			if (written && hdr.event_type != ROTATE_EVENT)
-			{
-				slave->binlog_pos = hdr.next_pos;
-			}
-			rval = written;
-			slave->stats.n_events++;
-			burst++;
+			slave->binlog_pos = hdr.next_pos;
 		}
-		if (record == NULL)
-			slave->stats.n_failed_read++;
-		spinlock_acquire(&slave->catch_lock);
-		slave->cstate &= ~CS_INNERLOOP;
-		spinlock_release(&slave->catch_lock);
+		rval = written;
+		slave->stats.n_events++;
+	}
+	if (record == NULL)
+		slave->stats.n_failed_read++;
+	spinlock_acquire(&slave->catch_lock);
+	slave->cstate &= ~CS_BUSY;
+	spinlock_release(&slave->catch_lock);

-		close(fd);
-	} while (record && DCB_BELOW_LOW_WATER(slave->dcb));
+	close(fd);
+	poll_fake_write_event(slave->dcb);
 	if (record)
 	{
 		slave->stats.n_flows++;
@ -864,25 +783,6 @@ if (hdr.event_size > DEF_HIGH_WATER) slave->stats.n_above++;
 				"blr_slave_catchup slave is up to date %s, %u\n",
 					slave->binlogfile, slave->binlog_pos)));
 	}
-	spinlock_acquire(&slave->catch_lock);
-#if 0
-if (slave->pthread != pthread_self())
-{
-			LOGIF(LE, (skygw_log_write(LOGFILE_ERROR, "Multple threads in catchup for same slave: %x and %x\n", slave->pthread, pthread_self())));
-abort();
-}
-#endif
-	slave->pthread = 0;
-#if 0
-if (DCB_BELOW_LOW_WATER(slave->dcb) && slave->binlog_pos != router->binlog_position) abort();
-#endif
-	slave->cstate &= ~CS_READING;
-	spinlock_release(&slave->catch_lock);
-if (DCB_BELOW_LOW_WATER(slave->dcb) && slave->binlog_pos != router->binlog_position)
-{
-			LOGIF(LE, (skygw_log_write(LOGFILE_ERROR, "Expected to be above low water\n")));
-goto doitagain;
-}
 	return rval;
 }

@ -908,7 +808,7 @@ ROUTER_INSTANCE		*router = slave->router;
 				slave->binlog_pos != router->binlog_position)
 		{
 			slave->stats.n_dcb++;
-			blr_slave_catchup(router, slave);
+			blr_slave_catchup(router, slave, true);
 		}
 	}

@ -917,7 +817,7 @@ ROUTER_INSTANCE		*router = slave->router;
 		if (slave->state == BLRS_DUMPING)
 		{
 			slave->stats.n_cb++;
-			blr_slave_catchup(router, slave);
+			blr_slave_catchup(router, slave, true);
 		}
 		else
 		{