Files
MaxScale/server/modules/routing/binlog/blr_slave.c
Mark Riddoch fd1154b944 Change of distribution mechanism to use fake events exclusively and avoid
complex locking mechanism and blocking the master DCB eent processign thread

Addition of shared BLFILE mechanism to reduce open/close overhead
2014-10-03 14:33:28 +01:00

890 lines
26 KiB
C

/*
* This file is distributed as part of MaxScale. It is free
* software: you can redistribute it and/or modify it under the terms of the
* GNU General Public License as published by the Free Software Foundation,
* version 2.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Copyright SkySQL Ab 2014
*/
/**
* @file blr_slave.c - contains code for the router to slave communication
*
* The binlog router is designed to be used in replication environments to
* increase the replication fanout of a master server. It provides a transparant
* mechanism to read the binlog entries for multiple slaves while requiring
* only a single connection to the actual master to support the slaves.
*
* The current prototype implement is designed to support MySQL 5.6 and has
* a number of limitations. This prototype is merely a proof of concept and
* should not be considered production ready.
*
* @verbatim
* Revision History
*
* Date Who Description
* 14/04/2014 Mark Riddoch Initial implementation
*
* @endverbatim
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <service.h>
#include <server.h>
#include <router.h>
#include <atomic.h>
#include <spinlock.h>
#include <blr.h>
#include <dcb.h>
#include <spinlock.h>
#include <skygw_types.h>
#include <skygw_utils.h>
#include <log_manager.h>
static uint32_t extract_field(uint8_t *src, int bits);
static void encode_value(unsigned char *data, unsigned int value, int len);
static int blr_slave_query(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue);
static int blr_slave_replay(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *master);
static void blr_slave_send_error(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, char *msg);
static int blr_slave_send_timestamp(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave);
static int blr_slave_register(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue);
static int blr_slave_binlog_dump(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue);
int blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large);
uint8_t *blr_build_header(GWBUF *pkt, REP_HEADER *hdr);
int blr_slave_callback(DCB *dcb, DCB_REASON reason, void *data);
extern int lm_enabled_logfiles_bitmask;
/**
* Process a request packet from the slave server.
*
* The router can handle a limited subset of requests from the slave, these
* include a subset of general SQL queries, a slave registeration command and
* the binlog dump command.
*
* The strategy for responding to these commands is to use caches responses
* for the the same commands that have previously been made to the real master
* if this is possible, if it is not then the router itself will synthesize a
* response.
*
* @param router The router instance this defines the master for this replication chain
* @param slave The slave specific data
* @param queue The incoming request packet
*/
int
blr_slave_request(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue)
{
if (slave->state < 0 || slave->state > BLRS_MAXSTATE)
{
LOGIF(LE, (skygw_log_write(
LOGFILE_ERROR, "Invalid slave state machine state (%d) for binlog router.\n",
slave->state)));
gwbuf_consume(queue, gwbuf_length(queue));
return 0;
}
slave->stats.n_requests++;
switch (MYSQL_COMMAND(queue))
{
case COM_QUERY:
return blr_slave_query(router, slave, queue);
break;
case COM_REGISTER_SLAVE:
return blr_slave_register(router, slave, queue);
break;
case COM_BINLOG_DUMP:
return blr_slave_binlog_dump(router, slave, queue);
break;
case COM_QUIT:
LOGIF(LD, (skygw_log_write(LOGFILE_DEBUG,
"COM_QUIT received from slave with server_id %d\n",
slave->serverid)));
break;
default:
LOGIF(LE, (skygw_log_write(
LOGFILE_ERROR,
"Unexpected MySQL Command (%d) received from slave\n",
MYSQL_COMMAND(queue))));
break;
}
return 0;
}
/**
* Handle a query from the slave. This is expected to be one of the "standard"
* queries we expect as part of the registraton process. Most of these can
* be dealt with by replying the stored responses we got from the master
* when MaxScale registered as a slave. The exception to the rule is the
* request to obtain the current timestamp value of the server.
*
* Five select statements are currently supported:
* SELECT UNIX_TIMESTAMP();
* SELECT @master_binlog_checksum
* SELECT @@GLOBAL.GTID_MODE
* SELECT VERSION()
* SELECT 1
*
* Two show commands are supported:
* SHOW VARIABLES LIKE 'SERVER_ID'
* SHOW VARIABLES LIKE 'SERVER_UUID'
*
* Five set commands are supported:
* SET @master_binlog_checksum = @@global.binlog_checksum
* SET @master_heartbeat_period=...
* SET @slave_slave_uuid=...
* SET NAMES latin1
* SET NAMES utf8
*
* @param router The router instance this defines the master for this replication chain
* @param slave The slave specific data
* @param queue The incoming request packet
* @return Non-zero if data has been sent
*/
static int
blr_slave_query(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue)
{
char *qtext, *query_text;
char *sep = " ,=";
char *word, *brkb;
int query_len;
qtext = GWBUF_DATA(queue);
query_len = extract_field((uint8_t *)qtext, 24) - 1;
qtext += 5; // Skip header and first byte of the payload
query_text = strndup(qtext, query_len);
LOGIF(LT, (skygw_log_write(
LOGFILE_TRACE, "Execute statement from the slave '%s'\n", query_text)));
/*
* Implement a very rudimental "parsing" of the query text by extarcting the
* words from the statement and matchng them against the subset of queries we
* are expecting from the slave. We already have responses to these commands,
* except for the select of UNIX_TIMESTAMP(), that we have saved from MaxScale's
* own interaction with the real master. We simply replay these saved responses
* to the slave.
*/
word = strtok_r(query_text, sep, &brkb);
if (strcasecmp(word, "SELECT") == 0)
{
word = strtok_r(NULL, sep, &brkb);
if (strcasecmp(word, "UNIX_TIMESTAMP()") == 0)
{
free(query_text);
return blr_slave_send_timestamp(router, slave);
}
else if (strcasecmp(word, "@master_binlog_checksum") == 0)
{
free(query_text);
return blr_slave_replay(router, slave, router->saved_master.chksum2);
}
else if (strcasecmp(word, "@@GLOBAL.GTID_MODE") == 0)
{
free(query_text);
return blr_slave_replay(router, slave, router->saved_master.gtid_mode);
}
else if (strcasecmp(word, "1") == 0)
{
free(query_text);
return blr_slave_replay(router, slave, router->saved_master.select1);
}
else if (strcasecmp(word, "VERSION()") == 0)
{
free(query_text);
return blr_slave_replay(router, slave, router->saved_master.selectver);
}
}
else if (strcasecmp(word, "SHOW") == 0)
{
word = strtok_r(NULL, sep, &brkb);
if (strcasecmp(word, "VARIABLES") == 0)
{
word = strtok_r(NULL, sep, &brkb);
if (strcasecmp(word, "LIKE") == 0)
{
word = strtok_r(NULL, sep, &brkb);
if (strcasecmp(word, "'SERVER_ID'") == 0)
{
free(query_text);
return blr_slave_replay(router, slave, router->saved_master.server_id);
}
else if (strcasecmp(word, "'SERVER_UUID'") == 0)
{
free(query_text);
return blr_slave_replay(router, slave, router->saved_master.uuid);
}
}
}
}
else if (strcasecmp(query_text, "SET") == 0)
{
word = strtok_r(NULL, sep, &brkb);
if (strcasecmp(word, "@master_heartbeat_period") == 0)
{
free(query_text);
return blr_slave_replay(router, slave, router->saved_master.heartbeat);
}
else if (strcasecmp(word, "@master_binlog_checksum") == 0)
{
word = strtok_r(NULL, sep, &brkb);
if (strcasecmp(word, "'none'") == 0)
slave->nocrc = 1;
else
slave->nocrc = 0;
free(query_text);
return blr_slave_replay(router, slave, router->saved_master.chksum1);
}
else if (strcasecmp(word, "@slave_uuid") == 0)
{
free(query_text);
return blr_slave_replay(router, slave, router->saved_master.setslaveuuid);
}
else if (strcasecmp(word, "NAMES") == 0)
{
word = strtok_r(NULL, sep, &brkb);
if (strcasecmp(word, "latin1") == 0)
{
free(query_text);
return blr_slave_replay(router, slave, router->saved_master.setnames);
}
else if (strcasecmp(word, "utf8") == 0)
{
free(query_text);
return blr_slave_replay(router, slave, router->saved_master.utf8);
}
}
}
free(query_text);
query_text = strndup(qtext, query_len);
LOGIF(LE, (skygw_log_write(
LOGFILE_ERROR, "Unexpected query from slave server %s\n", query_text)));
free(query_text);
blr_slave_send_error(router, slave, "Unexpected SQL query received from slave.");
return 0;
}
/**
* Send a reply to a command we have received from the slave. The reply itself
* is merely a copy of a previous message we received from the master when we
* registered as a slave. Hence we just replay this saved reply.
*
* @param router The binlog router instance
* @param slave The slave server to which we are sending the response
* @param master The saved master response
* @return Non-zero if data was sent
*/
static int
blr_slave_replay(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *master)
{
GWBUF *clone;
if (!master)
return 0;
if ((clone = gwbuf_clone(master)) != NULL)
{
return slave->dcb->func.write(slave->dcb, clone);
}
else
{
LOGIF(LE, (skygw_log_write(LOGFILE_ERROR,
"Failed to clone server response to send to slave.\n")));
return 0;
}
}
/**
* Construct an error response
*
* @param router The router instance
* @param slave The slave server instance
* @param msg The error message to send
*/
static void
blr_slave_send_error(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, char *msg)
{
GWBUF *pkt;
unsigned char *data;
int len;
if ((pkt = gwbuf_alloc(strlen(msg) + 13)) == NULL)
return;
data = GWBUF_DATA(pkt);
len = strlen(msg) + 1;
encode_value(&data[0], len, 24); // Payload length
data[3] = 0; // Sequence id
// Payload
data[4] = 0xff; // Error indicator
data[5] = 0; // Error Code
data[6] = 0; // Error Code
strncpy((char *)&data[7], "#00000", 6);
memcpy(&data[13], msg, strlen(msg)); // Error Message
slave->dcb->func.write(slave->dcb, pkt);
}
/*
* Some standard packets that have been captured from a network trace of server
* interactions. These packets are the schema definition sent in response to
* a SELECT UNIX_TIMESTAMP() statement and the EOF packet that marks the end
* of transmission of the result set.
*/
static uint8_t timestamp_def[] = {
0x01, 0x00, 0x00, 0x01, 0x01, 0x26, 0x00, 0x00, 0x02, 0x03, 0x64, 0x65, 0x66, 0x00, 0x00, 0x00,
0x10, 0x55, 0x4e, 0x49, 0x58, 0x5f, 0x54, 0x49, 0x4d, 0x45, 0x53, 0x54, 0x41, 0x4d, 0x50, 0x28,
0x29, 0x00, 0x0c, 0x3f, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x08, 0x81, 0x00, 0x00, 0x00, 0x00, 0x05,
0x00, 0x00, 0x03, 0xfe, 0x00, 0x00, 0x02, 0x00
};
static uint8_t timestamp_eof[] = { 0x05, 0x00, 0x00, 0x05, 0xfe, 0x00, 0x00, 0x02, 0x00 };
/**
* Send a response to a "SELECT UNIX_TIMESTAMP()" request. This differs from the other
* requests since we do not save a copy of the original interaction with the master
* and simply replay it. We want to always send the current time. We have stored a typcial
* response, which gives us the schema information normally returned. This is sent to the
* client and then we add a dynamic part that will insert the current timestamp data.
* Finally we send a preprepaed EOF packet to end the response stream.
*
* @param router The binlog router instance
* @param slave The slave server to which we are sending the response
* @return Non-zero if data was sent
*/
static int
blr_slave_send_timestamp(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave)
{
GWBUF *pkt;
char timestamp[20];
uint8_t *ptr;
int len, ts_len;
sprintf(timestamp, "%ld", time(0));
ts_len = strlen(timestamp);
len = sizeof(timestamp_def) + sizeof(timestamp_eof) + 5 + ts_len;
if ((pkt = gwbuf_alloc(len)) == NULL)
return 0;
ptr = GWBUF_DATA(pkt);
memcpy(ptr, timestamp_def, sizeof(timestamp_def)); // Fixed preamble
ptr += sizeof(timestamp_def);
encode_value(ptr, ts_len + 1, 24); // Add length of data packet
ptr += 3;
*ptr++ = 0x04; // Sequence number in response
*ptr++ = ts_len; // Length of result string
strncpy((char *)ptr, timestamp, ts_len); // Result string
ptr += ts_len;
memcpy(ptr, timestamp_eof, sizeof(timestamp_eof)); // EOF packet to terminate result
return slave->dcb->func.write(slave->dcb, pkt);
}
/**
* Process a slave replication registration message.
*
* We store the various bits of information the slave gives us and generate
* a reply message.
*
* @param router The router instance
* @param slave The slave server
* @param queue The BINLOG_DUMP packet
* @return Non-zero if data was sent
*/
static int
blr_slave_register(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue)
{
GWBUF *resp;
uint8_t *ptr;
int len, slen;
ptr = GWBUF_DATA(queue);
len = extract_field(ptr, 24);
ptr += 4; // Skip length and sequence number
if (*ptr++ != COM_REGISTER_SLAVE)
return 0;
slave->serverid = extract_field(ptr, 32);
ptr += 4;
slen = *ptr++;
if (slen != 0)
{
slave->hostname = strndup((char *)ptr, slen);
ptr += slen;
}
else
slave->hostname = NULL;
slen = *ptr++;
if (slen != 0)
{
ptr += slen;
slave->user = strndup((char *)ptr, slen);
}
else
slave->user = NULL;
slen = *ptr++;
if (slen != 0)
{
slave->passwd = strndup((char *)ptr, slen);
ptr += slen;
}
else
slave->passwd = NULL;
slave->port = extract_field(ptr, 16);
ptr += 2;
slave->rank = extract_field(ptr, 32);
/*
* Now construct a response
*/
if ((resp = gwbuf_alloc(11)) == NULL)
return 0;
ptr = GWBUF_DATA(resp);
encode_value(ptr, 7, 24); // Payload length
ptr += 3;
*ptr++ = 1; // Sequence number
encode_value(ptr, 0, 24);
ptr += 3;
encode_value(ptr, slave->serverid, 32);
slave->state = BLRS_REGISTERED;
return slave->dcb->func.write(slave->dcb, resp);
}
/**
* Process a COM_BINLOG_DUMP message from the slave. This is the
* final step in the process of registration. The new master, MaxScale
* must send a response packet and generate a fake BINLOG_ROTATE event
* with the binlog file requested by the slave. And then send a
* FORMAT_DESCRIPTION_EVENT that has been saved from the real master.
*
* Once send MaxScale must continue to send binlog events to the slave.
*
* @param router The router instance
* @param slave The slave server
* @param queue The BINLOG_DUMP packet
* @return The number of bytes written to the slave
*/
static int
blr_slave_binlog_dump(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue)
{
GWBUF *resp;
uint8_t *ptr;
int len, flags, serverid, rval, binlognamelen;
REP_HEADER hdr;
uint32_t chksum;
ptr = GWBUF_DATA(queue);
len = extract_field(ptr, 24);
binlognamelen = len - 11;
ptr += 4; // Skip length and sequence number
if (*ptr++ != COM_BINLOG_DUMP)
{
LOGIF(LE, (skygw_log_write(
LOGFILE_ERROR,
"blr_slave_binlog_dump expected a COM_BINLOG_DUMP but received %d\n",
*(ptr-1))));
return 0;
}
slave->binlog_pos = extract_field(ptr, 32);
ptr += 4;
flags = extract_field(ptr, 16);
ptr += 2;
serverid = extract_field(ptr, 32);
ptr += 4;
strncpy(slave->binlogfile, (char *)ptr, binlognamelen);
slave->binlogfile[binlognamelen] = 0;
slave->state = BLRS_DUMPING;
slave->seqno = 1;
if (slave->nocrc)
len = 19 + 8 + binlognamelen;
else
len = 19 + 8 + 4 + binlognamelen;
// Build a fake rotate event
resp = gwbuf_alloc(len + 5);
hdr.payload_len = len + 1;
hdr.seqno = slave->seqno++;
hdr.ok = 0;
hdr.timestamp = 0L;
hdr.event_type = ROTATE_EVENT;
hdr.serverid = router->masterid;
hdr.event_size = len;
hdr.next_pos = 0;
hdr.flags = 0x20;
ptr = blr_build_header(resp, &hdr);
encode_value(ptr, slave->binlog_pos, 64);
ptr += 8;
memcpy(ptr, slave->binlogfile, BINLOG_FNAMELEN);
ptr += BINLOG_FNAMELEN;
if (!slave->nocrc)
{
/*
* Now add the CRC to the fake binlog rotate event.
*
* The algorithm is first to compute the checksum of an empty buffer
* and then the checksum of the event portion of the message, ie we do not
* include the length, sequence number and ok byte that makes up the first
* 5 bytes of the message. We also do not include the 4 byte checksum itself.
*/
chksum = crc32(0L, NULL, 0);
chksum = crc32(chksum, GWBUF_DATA(resp) + 5, hdr.event_size - 4);
encode_value(ptr, chksum, 32);
}
rval = slave->dcb->func.write(slave->dcb, resp);
/* Send the FORMAT_DESCRIPTION_EVENT */
if (router->saved_master.fde_event)
{
resp = gwbuf_alloc(router->saved_master.fde_len + 5);
ptr = GWBUF_DATA(resp);
encode_value(ptr, router->saved_master.fde_len + 1, 24); // Payload length
ptr += 3;
*ptr++ = slave->seqno++;
*ptr++ = 0; // OK
memcpy(ptr, router->saved_master.fde_event, router->saved_master.fde_len);
encode_value(ptr, time(0), 32); // Overwrite timestamp
/*
* Since we have changed the timestamp we must recalculate the CRC
*
* Position ptr to the start of the event header,
* calculate a new checksum
* and write it into the header
*/
ptr = GWBUF_DATA(resp) + 5 + router->saved_master.fde_len - 4;
chksum = crc32(0L, NULL, 0);
chksum = crc32(chksum, GWBUF_DATA(resp) + 5, router->saved_master.fde_len - 4);
encode_value(ptr, chksum, 32);
rval = slave->dcb->func.write(slave->dcb, resp);
}
slave->dcb->low_water = router->low_water;
slave->dcb->high_water = router->high_water;
// dcb_add_callback(slave->dcb, DCB_REASON_LOW_WATER, blr_slave_callback, slave);
dcb_add_callback(slave->dcb, DCB_REASON_DRAINED, blr_slave_callback, slave);
if (slave->binlog_pos != router->binlog_position ||
strcmp(slave->binlogfile, router->binlog_name) != 0)
{
spinlock_acquire(&slave->catch_lock);
slave->cstate &= ~CS_UPTODATE;
spinlock_release(&slave->catch_lock);
#if QUEUE_SLAVE
poll_fake_write_event(slave->dcb);
#else
rval = blr_slave_catchup(router, slave, true);
#endif
}
return rval;
}
/**
* Extract a numeric field from a packet of the specified number of bits,
* the number of bits must be a multiple of 8.
*
* @param src The raw packet source
* @param bits The number of bits to extract (multiple of 8)
* @return The extracted value
*/
static uint32_t
extract_field(uint8_t *src, int bits)
{
uint32_t rval = 0, shift = 0;
while (bits > 0)
{
rval |= (*src++) << shift;
shift += 8;
bits -= 8;
}
return rval;
}
/**
* Encode a value into a number of bits in a MySQL packet
*
* @param data Pointer to location in target packet
* @param value The value to encode into the buffer
* @param len Number of bits to encode value into
*/
static void
encode_value(unsigned char *data, unsigned int value, int len)
{
while (len > 0)
{
*data++ = value & 0xff;
value >>= 8;
len -= 8;
}
}
/**
* Populate a header structure for a replication message from a GWBUF structure.
*
* @param pkt The incoming packet in a GWBUF chain
* @param hdr The packet header to populate
* @return A pointer to the first byte following the event header
*/
uint8_t *
blr_build_header(GWBUF *pkt, REP_HEADER *hdr)
{
uint8_t *ptr;
ptr = GWBUF_DATA(pkt);
encode_value(ptr, hdr->payload_len, 24);
ptr += 3;
*ptr++ = hdr->seqno;
*ptr++ = hdr->ok;
encode_value(ptr, hdr->timestamp, 32);
ptr += 4;
*ptr++ = hdr->event_type;
encode_value(ptr, hdr->serverid, 32);
ptr += 4;
encode_value(ptr, hdr->event_size, 32);
ptr += 4;
encode_value(ptr, hdr->next_pos, 32);
ptr += 4;
encode_value(ptr, hdr->flags, 16);
ptr += 2;
return ptr;
}
/**
* We have a registered slave that is behind the current leading edge of the
* binlog. We must replay the log entries to bring this node up to speed.
*
* There may be a large number of records to send to the slave, the process
* is triggered by the slave COM_BINLOG_DUMP message and all the events must
* be sent without receiving any new event. This measn there is no trigger into
* MaxScale other than this initial message. However, if we simply send all the
* events we end up with an extremely long write queue on the DCB and risk
* running the server out of resources.
*
* The slave catchup routine will send a burst of replication events per single
* call. The paramter "long" control the number of events in the burst. The
* short burst is intended to be used when the master receive an event and
* needs to put the slave into catchup mode. This prevents the slave taking
* too much tiem away from the thread that is processing the master events.
*
* At the end of the burst a fake EPOLLOUT event is added to the poll event
* queue. This ensures that the slave callback for processing DCB write drain
* will be called and future catchup requests will be handled on another thread.
*
* @param router The binlog router
* @param slave The slave that is behind
* @param large Send a long or short burst of events
* @return The number of bytes written
*/
int
blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
{
GWBUF *head, *record;
REP_HEADER hdr;
int written, rval = 1, burst;
uint8_t *ptr;
struct timespec req;
extern unsigned long hkheartbeat;
unsigned long beat;
beat = hkheartbeat;
if (large)
burst = router->long_burst;
else
burst = router->short_burst;
spinlock_acquire(&slave->catch_lock);
#if QUEUE_SLAVE
if (slave->cstate & CS_BUSY)
return 0;
slave->cstate = CS_BUSY;
spinlock_release(&slave->catch_lock);
#else
while ((slave->cstate & (CS_HOLD|CS_BUSY)) == (CS_HOLD|CS_BUSY))
{
spinlock_release(&slave->catch_lock);
req.tv_sec = 0;
req.tv_nsec = 100;
nanosleep(&req, NULL);
spinlock_acquire(&slave->catch_lock);
}
if (slave->cstate & CS_BUSY)
{
spinlock_release(&slave->catch_lock);
if (hkheartbeat - beat > 5) LOGIF(LE, (skygw_log_write(LOGFILE_ERROR,
"Long wait in blr_salve_catchup %ld00ms with %s burst, return without write records.\n",
hkheartbeat - beat, large ? "long" : "short")));
return;
}
slave->cstate |= CS_BUSY;
spinlock_release(&slave->catch_lock);
if (hkheartbeat - beat > 5) LOGIF(LE, (skygw_log_write(LOGFILE_ERROR,
"Long wait in blr_slave_catchup %ld00ms with %s burst.\n",
hkheartbeat - beat, large ? "long" : "short")));
#endif
if (slave->file == NULL)
{
if ((slave->file = blr_open_binlog(router, slave->binlogfile)) == NULL)
{
LOGIF(LE, (skygw_log_write(
LOGFILE_ERROR,
"blr_slave_catchup failed to open binlog file %s\n",
slave->binlogfile)));
return 0;
}
}
slave->stats.n_bursts++;
while (burst-- &&
(record = blr_read_binlog(slave->file, slave->binlog_pos, &hdr)) != NULL)
{
#if QUEUE_SLAVE
#else
spinlock_acquire(&slave->catch_lock);
slave->cstate &= ~CS_HOLD;
spinlock_release(&slave->catch_lock);
#endif
head = gwbuf_alloc(5);
ptr = GWBUF_DATA(head);
encode_value(ptr, hdr.event_size + 1, 24);
ptr += 3;
*ptr++ = slave->seqno++;
*ptr++ = 0; // OK
head = gwbuf_append(head, record);
if (hdr.event_type == ROTATE_EVENT)
{
blr_close_binlog(router, slave->file);
blr_slave_rotate(slave, GWBUF_DATA(record));
if ((slave->file = blr_open_binlog(router, slave->binlogfile)) == NULL)
{
LOGIF(LE, (skygw_log_write(
LOGFILE_ERROR,
"blr_slave_catchup failed to open binlog file %s\n",
slave->binlogfile)));
break;
}
}
written = slave->dcb->func.write(slave->dcb, head);
if (written && hdr.event_type != ROTATE_EVENT)
{
slave->binlog_pos = hdr.next_pos;
}
rval = written;
slave->stats.n_events++;
#if QUEUE_SLAVE
#else
spinlock_acquire(&slave->catch_lock);
slave->cstate |= CS_HOLD;
spinlock_release(&slave->catch_lock);
#endif
}
if (record == NULL)
slave->stats.n_failed_read++;
spinlock_acquire(&slave->catch_lock);
slave->cstate &= ~CS_BUSY;
spinlock_release(&slave->catch_lock);
if (record)
{
slave->stats.n_flows++;
spinlock_acquire(&slave->catch_lock);
slave->cstate |= CS_EXPECTCB;
spinlock_release(&slave->catch_lock);
poll_fake_write_event(slave->dcb);
}
else
{
int state_change = 0;
spinlock_acquire(&slave->catch_lock);
if ((slave->cstate & CS_UPTODATE) == 0)
{
slave->stats.n_upd++;
slave->cstate |= CS_UPTODATE;
state_change = 1;
}
spinlock_release(&slave->catch_lock);
if (state_change)
LOGIF(LM, (skygw_log_write(LOGFILE_MESSAGE,
"blr_slave_catchup slave is up to date %s, %u\n",
slave->binlogfile, slave->binlog_pos)));
}
return rval;
}
/**
* The DCB callback used by the slave to obtain DCB_REASON_LOW_WATER callbacks
* when the server sends all the the queue data for a DCB. This is the mechanism
* that is used to implement the flow control mechanism for the sending of
* large quantities of binlog records during the catchup process.
*
* @param dcb The DCB of the slave connection
* @param reason The reason the callback was called
* @param data The user data, in this case the server structure
*/
int
blr_slave_callback(DCB *dcb, DCB_REASON reason, void *data)
{
ROUTER_SLAVE *slave = (ROUTER_SLAVE *)data;
ROUTER_INSTANCE *router = slave->router;
if (reason == DCB_REASON_DRAINED)
{
if (slave->state == BLRS_DUMPING &&
(slave->binlog_pos != router->binlog_position
|| strcmp(slave->binlogfile, router->binlog_name)))
{
spinlock_acquire(&slave->catch_lock);
slave->cstate &= ~CS_UPTODATE;
spinlock_release(&slave->catch_lock);
slave->stats.n_dcb++;
blr_slave_catchup(router, slave, true);
}
}
if (reason == DCB_REASON_LOW_WATER)
{
if (slave->state == BLRS_DUMPING)
{
slave->stats.n_cb++;
blr_slave_catchup(router, slave, true);
}
else
{
slave->stats.n_cbna++;
}
}
return 0;
}
/**
* Rotate the slave to the new binlog file
*
* @param slave The slave instance
* @param ptr The rotate event (minus header and OK byte)
*/
void
blr_slave_rotate(ROUTER_SLAVE *slave, uint8_t *ptr)
{
int len = EXTRACT24(ptr + 9); // Extract the event length
len = len - (19 + 8 + 4); // Remove length of header, checksum and position
if (len > BINLOG_FNAMELEN)
len = BINLOG_FNAMELEN;
ptr += 19; // Skip header
slave->binlog_pos = extract_field(ptr, 32);
slave->binlog_pos += (extract_field(ptr+4, 32) << 32);
memcpy(slave->binlogfile, ptr + 8, len);
slave->binlogfile[len] = 0;
}