MXS-1545: handling of slave file EOF refactoring

MXS-1545: handling of slave file EOF refactoring.

Some slave/router state are now checked before any WARN/ERROR messages
about slave file EOF.

The missing “next_file” is always logged with warn.
This commit is contained in:
MassimilianoPinto 2017-12-01 14:02:02 +01:00
parent d4c0d74ab4
commit 574af7762d
4 changed files with 484 additions and 137 deletions

View File

@ -1063,7 +1063,7 @@ createInstance(SERVICE *service, char **options)
if (inst->mariadb10_master_gtid &&
inst->current_pos <= 4)
{
MARIADB_GTID_INFO last_gtid = {};
MARIADB_GTID_INFO last_gtid;
memset(&last_gtid, 0, sizeof(last_gtid));
/* Get last MariaDB GTID from repo */
@ -1224,6 +1224,7 @@ newSession(MXS_ROUTER *instance, MXS_SESSION *session)
slave->mariadb_gtid = NULL;
slave->gtid_maps = NULL;
memset(&slave->f_info, 0, sizeof (MARIADB_GTID_INFO));
slave->annotate_rows = false;
/**
* Add this session to the list of active sessions.

View File

@ -1714,8 +1714,8 @@ blr_file_next_exists(ROUTER_INSTANCE *router,
strncpy(next_file, result.file, BINLOG_FNAMELEN);
next_file[BINLOG_FNAMELEN] = '\0';
MXS_INFO("The next Binlog file from GTID maps repo is [%s]",
bigbuf);
MXS_DEBUG("The next Binlog file from GTID maps repo is [%s]",
bigbuf);
spinlock_acquire(&slave->catch_lock);

View File

@ -1299,11 +1299,11 @@ blr_handle_binlog_record(ROUTER_INSTANCE *router, GWBUF *pkt)
}
/**
* Check Event Type limit:
* If supported, gather statistics about
* the replication event types
* else stop replication from master
*/
* Check Event Type limit:
* If supported, gather statistics about
* the replication event types
* else stop replication from master
*/
int event_limit = router->mariadb10_compat ?
MAX_EVENT_TYPE_MARIADB10 : MAX_EVENT_TYPE;
@ -3619,4 +3619,4 @@ void blr_set_checksum(ROUTER_INSTANCE *inst, GWBUF *buf)
MXS_FREE(val);
}
}
}
}

View File

@ -109,6 +109,14 @@ typedef struct
uint64_t rowid; /* ROWID of router current file*/
} BINARY_LOG_DATA_RESULT;
/** Slave file read EOF handling */
typedef enum
{
SLAVE_EOF_ROTATE = 0,
SLAVE_EOF_WARNING,
SLAVE_EOF_ERROR
} slave_eof_action_t;
extern void poll_fake_write_event(DCB *dcb);
static char* get_next_token(char *str, const char* delim, char **saveptr);
extern int load_mysql_users(SERV_LISTENER *listener);
@ -341,7 +349,7 @@ static bool blr_handle_complex_select(ROUTER_INSTANCE *router,
extern bool blr_is_current_binlog(ROUTER_INSTANCE *router,
ROUTER_SLAVE *slave);
extern bool blr_compare_binlogs(const ROUTER_INSTANCE *router,
const MARIADB_GTID_INFO *slave,
const MARIADB_GTID_ELEMS *info,
const char *r_file,
const char *s_file);
static bool blr_purge_binary_logs(ROUTER_INSTANCE *router,
@ -376,6 +384,15 @@ static bool blr_apply_changes(ROUTER_INSTANCE *router,
CHANGE_MASTER_OPTIONS change_master,
char *new_logfile,
char *error);
static void blr_slave_info_save(const MARIADB_GTID_INFO *info,
MARIADB_GTID_INFO *save_info,
char *save_prefix);
static void blr_slave_log_next_file_action(const ROUTER_INSTANCE *router,
const ROUTER_SLAVE *slave,
const char *c_prefix,
const char *next_file,
slave_eof_action_t log_action);
/**
* Process a request packet from the slave server.
*
@ -439,7 +456,10 @@ blr_slave_request(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue)
/* Request now the binlog records */
rv = blr_slave_binlog_dump(router, slave, queue);
if (rv && router->send_slave_heartbeat && slave->heartbeat > 0)
/* Check whether to add the heartbeat check for this slave */
if (rv && slave->state == BLRS_DUMPING &&
router->send_slave_heartbeat &&
slave->heartbeat > 0)
{
char task_name[BLRM_TASK_NAME_LEN + 1] = "";
snprintf(task_name,
@ -2341,6 +2361,9 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
file = NULL;
#endif
// Prefix for BLR_BINLOG_STORAGE_TREE
char t_prefix[BINLOG_FILE_EXTRA_INFO] = "";
if (file == NULL)
{
rotating = router->rotating;
@ -2360,11 +2383,22 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
poll_fake_write_event(slave->dcb);
return rval;
}
MXS_ERROR("Slave %s:%i, server-id %d, binlog '%s': blr_slave_catchup "
"failed to open binlog file",
/* Fill the file prefix */
if (f_tree)
{
sprintf(t_prefix,
"%" PRIu32 "/%" PRIu32 "/",
f_tree->gtid_elms.domain_id,
f_tree->gtid_elms.server_id);
}
MXS_ERROR("Slave %s:%i, server-id %d, binlog '%s%s': blr_slave_catchup "
"failed to open binlog file.",
slave->dcb->remote,
dcb_get_port(slave->dcb),
slave->serverid,
t_prefix,
slave->binlogfile);
slave->cstate &= ~CS_BUSY;
@ -2395,7 +2429,9 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
#endif
int events_before = slave->stats.n_events;
/* Loop read binlog events from slave binlog file */
while (burst-- && burst_size > 0 &&
/* Read one binlog event */
(record = blr_read_binlog(router,
file,
slave->binlog_pos,
@ -2407,11 +2443,22 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
uint32_t binlog_pos;
uint32_t event_size;
/* Get up to date file prefix */
if (f_tree)
{
sprintf(t_prefix,
"%" PRIu32 "/%" PRIu32 "/",
slave->f_info.gtid_elms.domain_id,
slave->f_info.gtid_elms.server_id);
}
strcpy(binlog_name, slave->binlogfile);
binlog_pos = slave->binlog_pos;
/* Don't sent special events generated by MaxScale
* or ANNOTATE_ROWS events if not requested */
/**
* Don't sent special events generated by MaxScale
* or ANNOTATE_ROWS events if not requested
*/
if (hdr.event_type == MARIADB10_START_ENCRYPTION_EVENT ||
hdr.event_type == IGNORABLE_EVENT ||
(!slave->annotate_rows &&
@ -2452,7 +2499,8 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
slave->encryption_ctx = encryption_ctx;
MXS_INFO("Start Encryption event found while reading. "
"Binlog %s is encrypted. First event at %lu",
"Binlog '%s%s' is encrypted. First event at %lu",
t_prefix,
slave->binlogfile,
(unsigned long)hdr.next_pos);
}
@ -2460,18 +2508,20 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
else if (hdr.event_type == MARIADB_ANNOTATE_ROWS_EVENT)
{
MXS_INFO("Skipping ANNOTATE_ROWS event [%s] of size %lu while "
"reading binlog %s at %lu",
"reading binlog '%s%s' at %lu",
blr_get_event_description(router, hdr.event_type),
(unsigned long)hdr.event_size,
t_prefix,
slave->binlogfile,
(unsigned long)slave->binlog_pos);
}
else
{
MXS_INFO("Found ignorable event [%s] of size %lu while "
"reading binlog %s at %lu",
"reading binlog '%s%s' at %lu",
blr_get_event_description(router, hdr.event_type),
(unsigned long)hdr.event_size,
t_prefix,
slave->binlogfile,
(unsigned long)slave->binlog_pos);
}
@ -2487,6 +2537,7 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
break;
}
/* Handle ROTATE_EVENT */
if (hdr.event_type == ROTATE_EVENT)
{
unsigned long beat1 = hkheartbeat;
@ -2497,6 +2548,7 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
MXS_ERROR("blr_close_binlog took %lu maxscale beats",
hkheartbeat - beat1);
}
/* Set new file in slave->binlogfile */
blr_slave_rotate(router, slave, GWBUF_DATA(record));
/* reset the encryption context */
@ -2526,11 +2578,22 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
poll_fake_write_event(slave->dcb);
return rval;
}
MXS_ERROR("Slave %s:%i, server-id %d, binlog '%s': blr_slave_catchup "
/* Refresh file prefix */
if (f_tree)
{
sprintf(t_prefix,
"%" PRIu32 "/%" PRIu32 "/",
slave->f_info.gtid_elms.domain_id,
slave->f_info.gtid_elms.server_id);
}
MXS_ERROR("Slave %s:%i, server-id %d, binlog '%s%s': blr_slave_catchup "
"failed to open binlog file in rotate event",
slave->dcb->remote,
dcb_get_port(slave->dcb),
slave->serverid,
t_prefix,
slave->binlogfile);
slave->state = BLRS_ERRORED;
@ -2556,6 +2619,7 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
return 0;
}
#ifdef BLFILE_IN_SLAVE
file = slave->file;
#endif
@ -2566,6 +2630,7 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
}
}
/* Send the binlog event */
if (blr_send_event(BLR_THREAD_ROLE_SLAVE,
binlog_name,
binlog_pos,
@ -2582,12 +2647,13 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
}
else
{
MXS_WARNING("Slave %s:%i, server-id %d, binlog '%s, position %u: "
MXS_WARNING("Slave %s:%i, server-id %d, binlog '%s%s', position %u: "
"Slave-thread could not send event to slave, "
"closing connection.",
slave->dcb->remote,
dcb_get_port(slave->dcb),
slave->serverid,
t_prefix,
binlog_name,
binlog_pos);
#ifndef BLFILE_IN_SLAVE
@ -2623,28 +2689,39 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
ss_dassert(record == NULL);
/* Refresh file prefix */
if (f_tree)
{
sprintf(t_prefix,
"%" PRIu32 "/%" PRIu32 "/",
slave->f_info.gtid_elms.domain_id,
slave->f_info.gtid_elms.server_id);
}
if (hdr.ok != SLAVE_POS_READ_OK)
{
slave->stats.n_failed_read++;
if (hdr.ok == SLAVE_POS_BAD_FD)
{
MXS_ERROR("%s Slave %s:%i, server-id %d, binlog '%s', %s",
MXS_ERROR("%s Slave %s:%i, server-id %d, binlog '%s%s', %s",
router->service->name,
slave->dcb->remote,
dcb_get_port(slave->dcb),
slave->serverid,
t_prefix,
slave->binlogfile,
read_errmsg);
}
if (hdr.ok == SLAVE_POS_BEYOND_EOF)
{
MXS_ERROR("%s Slave %s:%i, server-id %d, binlog '%s', %s",
MXS_ERROR("%s Slave %s:%i, server-id %d, binlog '%s%s', %s",
router->service->name,
slave->dcb->remote,
dcb_get_port(slave->dcb),
slave->serverid,
t_prefix,
slave->binlogfile,
read_errmsg);
@ -2662,11 +2739,12 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
if (hdr.ok == SLAVE_POS_READ_ERR)
{
MXS_ERROR("%s Slave %s:%i, server-id %d, binlog '%s', %s",
MXS_ERROR("%s Slave %s:%i, server-id %d, binlog '%s%s', %s",
router->service->name,
slave->dcb->remote,
dcb_get_port(slave->dcb),
slave->serverid,
t_prefix,
slave->binlogfile,
read_errmsg);
@ -2696,12 +2774,13 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
if (hdr.ok == SLAVE_POS_READ_UNSAFE)
{
MXS_NOTICE("%s: Slave %s:%i, server-id %d, binlog '%s', read %d events, "
MXS_NOTICE("%s: Slave %s:%i, server-id %d, binlog '%s%s', read %d events, "
"current committed transaction event being sent: %lu, %s",
router->service->name,
slave->dcb->remote,
dcb_get_port(slave->dcb),
slave->serverid,
t_prefix,
slave->binlogfile,
slave->stats.n_events - events_before,
router->current_safe_event,
@ -2719,11 +2798,17 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
/**
* Check now slave position with read indicator = SLAVE_POS_READ_OK
*
* 1) Same name and pos as current router file: aka Up To Date
* Two cases handled:
* (1) The slave is Up To Date
* (2) The slave is at EOF of a file which is not the current router file
*
*/
if (slave->binlog_pos == router->binlog_position &&
blr_is_current_binlog(router, slave))
{
/**
* (1) Same name and pos as current router file: aka Up To Date
*/
spinlock_acquire(&router->binlog_lock);
spinlock_acquire(&slave->catch_lock);
@ -2738,7 +2823,7 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
spinlock_release(&slave->catch_lock);
spinlock_release(&router->binlog_lock);
/* force slave to read events via catchup routine */
/* Force slave to read events via catchup routine */
poll_fake_write_event(slave->dcb);
}
else
@ -2758,128 +2843,246 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
}
else
{
char next_file[BINLOG_FNAMELEN + 1] = "";
/* 2) Checking End Of File of the slave binlog file */
/**
* (2) Checking End Of File of the slave binlog file
* and current router file
*/
if (slave->binlog_pos >= blr_file_size(file) &&
router->rotating == 0 &&
(!blr_is_current_binlog(router, slave)))
!blr_is_current_binlog(router, slave))
{
/**
* If next file to read doesn't exist, retry the check up to
* MISSING_FILE_READ_RETRIES times before giving up.
* This is end of current slave file
* which is not the current router binlog file
*/
char next_file[BINLOG_FNAMELEN + 1] = "";
MARIADB_GTID_INFO current_info;
char c_prefix[BINLOG_FILE_EXTRA_INFO] = "";
bool have_heartbeat = router->send_slave_heartbeat &&
(slave->heartbeat > 0);
/**
* Save current MARIADB_GTID_INFO detail because
* calling blr_file_next_exists() overwrites that
*/
if (f_tree)
{
spinlock_acquire(&slave->catch_lock);
blr_slave_info_save(&slave->f_info, &current_info, c_prefix);
spinlock_release(&slave->catch_lock);
}
/**
* Check now whether the next file exists and it's readable
*
* If not, handle some cases
* if found issue a fake_rotate event
*/
if (!blr_file_next_exists(router, slave, next_file))
{
spinlock_acquire(&slave->catch_lock);
if (slave->stats.n_failed_read < MISSING_FILE_READ_RETRIES)
/**
* The next binlog file to read doesn't exist
* or it's not set.
*/
if (router->mariadb10_master_gtid &&
router->master_state == BLRM_SLAVE_STOPPED &&
!router->binlog_name[0])
{
slave->cstate |= CS_EXPECTCB;
slave->cstate &= ~CS_BUSY;
/**
* (1) Don't care about empty router->binlogname in
* BLRM_SLAVE_STOPPED state when GTID
* registration is on:
* set CS_WAIT_DATA and return.
*/
spinlock_acquire(&slave->catch_lock);
if (f_tree)
{
/**
* We need to deal with current slave file:
* restore first the GTID info into slave->f_info
*/
memcpy(&slave->f_info,
&current_info,
sizeof(MARIADB_GTID_INFO));
}
/**
* We force cachtup state to CS_WAIT_DATA now:
*
* The slave can be called by any new master
* event received (no matter which is the binlog file)
* or by an heartbeat event.
*/
slave->cstate = CS_WAIT_DATA;
spinlock_release(&slave->catch_lock);
/* Force slave to read via catchup routine */
poll_fake_write_event(slave->dcb);
#ifndef BLFILE_IN_SLAVE
/* Close file */
blr_close_binlog(router, file);
#endif
return rval;
}
slave->state = BLRS_ERRORED;
/**
* (2) The next file is not available/existent, actions:
*
* If router state is BLRM_BINLOGDUMP
* - abort slave connection if MISSING_FILE_READ_RETRIES is hit
* or
* - just log a warning message
*
* Note: in any other router state we don't log messages
*/
if (router->master_state == BLRM_BINLOGDUMP)
{
spinlock_acquire(&slave->catch_lock);
/* Router state is BLRM_BINLOGDUMP (aka replicating) */
if (slave->stats.n_failed_read < MISSING_FILE_READ_RETRIES)
{
slave->stats.n_failed_read++;
spinlock_release(&slave->catch_lock);
spinlock_release(&slave->catch_lock);
MXS_ERROR("%s: Slave [%s]:%d, server-id %d reached "
"end of file for '%s' and next file to read '%s' "
"doesn't exist. Force replication abort after %d retries.",
router->service->name,
slave->dcb->remote,
dcb_get_port(slave->dcb),
slave->serverid,
slave->binlogfile,
next_file,
MISSING_FILE_READ_RETRIES);
/* Log warning for missing file */
blr_slave_log_next_file_action(router,
slave,
c_prefix,
next_file,
SLAVE_EOF_WARNING);
}
else
{
/**
* Force error and disconnect
* when exceeding error counter limit
*/
slave->state = BLRS_ERRORED;
/* Send error that stops slave replication */
blr_send_custom_error(slave->dcb,
slave->seqno++,
0,
"next binlog file to read doesn't exist",
"HY000",
BINLOG_FATAL_ERROR_READING);
spinlock_release(&slave->catch_lock);
/* Log error for missing file */
blr_slave_log_next_file_action(router,
slave,
c_prefix,
next_file,
SLAVE_EOF_ERROR);
/* Send error that stops slave replication */
blr_send_custom_error(slave->dcb,
slave->seqno++,
0,
"next binlog file to read doesn't exist",
"HY000",
BINLOG_FATAL_ERROR_READING);
#ifndef BLFILE_IN_SLAVE
blr_close_binlog(router, file);
/* Close file */
blr_close_binlog(router, file);
#endif
dcb_close(slave->dcb);
/* Disconnect client */
dcb_close(slave->dcb);
return 0;
}
return 0;
}
} // No else branch: no further actions
/* We may have reached the end of file of a non-current
* binlog file.
*
* Note if the master is rotating there is a window during
* which the rotate event has been written to the old binlog
* but the new binlog file has not yet been created. Therefore
* we ignore these issues during the rotate processing.
*/
MXS_ERROR("%s: Slave [%s]:%d, server-id %d reached end of file for binlog file %s "
"at %lu which is not the file currently being downloaded. "
"Master binlog is %s, %lu. This may be caused by a "
"previous failure of the master.",
router->service->name,
slave->dcb->remote,
dcb_get_port(slave->dcb),
slave->serverid,
slave->binlogfile,
(unsigned long)slave->binlog_pos,
router->binlog_name,
router->binlog_position);
/* Reset encryption context */
MXS_FREE(slave->encryption_ctx);
slave->encryption_ctx = NULL;
/* Now pass the next_file to blr_slave_fake_rotate() */
#ifdef BLFILE_IN_SLAVE
if (blr_slave_fake_rotate(router,
slave,
&slave->file,
next_file))
#else
if (blr_slave_fake_rotate(router,
slave,
&file,
next_file))
#endif
{
spinlock_acquire(&slave->catch_lock);
slave->cstate |= CS_EXPECTCB;
spinlock_release(&slave->catch_lock);
/*
* Fake rotate written to client:
* no need to call poll_fake_write_event()
/**
* We need to deal with current slave file:
* restore first the GTID info into slave->f_info
*/
spinlock_acquire(&slave->catch_lock);
if (f_tree)
{
memcpy(&slave->f_info,
&current_info,
sizeof(MARIADB_GTID_INFO));
}
/**
* We force cachtup state to CS_WAIT_DATA now:
*
* The slave can be called by any new master
* event received (no matter which is the binlog file)
* or by an heartbeat event.
*/
slave->cstate = CS_WAIT_DATA;
spinlock_release(&slave->catch_lock);
}
else
{
slave->state = BLRS_ERRORED;
dcb_close(slave->dcb);
#ifndef BLFILE_IN_SLAVE
blr_close_binlog(router, file);
/* We may have reached the end of file of a non-current
* binlog file.
*
* Note if the master is rotating there is a window during
* which the rotate event has been written to the old binlog
* but the new binlog file has not yet been created. Therefore
* we ignore these issues during the rotate processing.
*
* We send a fake_rotate_event to 'next_file'
* Note:
* slave->f_info updated by previous call to
* blr_file_next_exists()
*/
blr_slave_log_next_file_action(router,
slave,
c_prefix,
next_file,
SLAVE_EOF_ROTATE);
/* Reset encryption context */
MXS_FREE(slave->encryption_ctx);
slave->encryption_ctx = NULL;
/* Now pass the next_file to blr_slave_fake_rotate() */
#ifdef BLFILE_IN_SLAVE
if (blr_slave_fake_rotate(router,
slave,
&slave->file,
next_file))
#else
if (blr_slave_fake_rotate(router,
slave,
&file,
next_file))
#endif
return 0;
{
spinlock_acquire(&slave->catch_lock);
slave->cstate |= CS_EXPECTCB;
spinlock_release(&slave->catch_lock);
/**
* Note:
* Fake rotate just written to client,
* no need to call poll_fake_write_event()
*/
}
else
{
/* Set ERROR */
slave->state = BLRS_ERRORED;
/* Disconnect client */
dcb_close(slave->dcb);
#ifndef BLFILE_IN_SLAVE
/* Close file */
blr_close_binlog(router, file);
#endif
return 0;
}
}
}
else
{
/**
* Nothing has been written to client right now
{ /**
* Still reading from current slave file but
* nothing has been written to client right now
* (perhaps some ignorable / skipped events)
* just retry to read again.
*/
spinlock_acquire(&slave->catch_lock);
slave->cstate |= CS_EXPECTCB;
spinlock_release(&slave->catch_lock);
poll_fake_write_event(slave->dcb);
}
}
@ -2887,9 +3090,11 @@ blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large)
#ifndef BLFILE_IN_SLAVE
if (file)
{
/* Close file */
blr_close_binlog(router, file);
}
#endif
return rval;
}
@ -3112,7 +3317,7 @@ blr_slave_read_fde(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave)
static uint32_t
blr_slave_send_fde(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *fde)
{
GWBUF *head;
GWBUF *event;
uint8_t *ptr;
uint32_t chksum;
uint32_t event_size;
@ -3123,12 +3328,14 @@ blr_slave_send_fde(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *fde)
return 0;
}
event_ptr = GWBUF_DATA(fde);
if ((head = gwbuf_alloc(MYSQL_HEADER_LEN + 1)) == NULL)
event_size = GWBUF_LENGTH(fde);
if ((event = gwbuf_alloc(MYSQL_HEADER_LEN + 1 + event_size)) == NULL)
{
return 0;
}
ptr = GWBUF_DATA(head);
ptr = GWBUF_DATA(event);
event_size = GWBUF_LENGTH(fde);
@ -3137,13 +3344,15 @@ blr_slave_send_fde(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *fde)
ptr += 3;
*ptr++ = slave->seqno++;
*ptr++ = 0; // OK/ERR byte
head = gwbuf_append(head, fde);
event_ptr = GWBUF_DATA(fde);
encode_value(event_ptr, time(0), 32); // Overwrite timestamp
event_ptr += 13; // 4 time + 1 type + 4 server_id + 4 event_size
// Copy FDE data
memcpy(ptr, GWBUF_DATA(fde), event_size);
encode_value(ptr, time(0), 32); // Overwrite timestamp
ptr += 13; // 4 time + 1 type + 4 server_id + 4 event_size
/* event_ptr points to position of the next event */
encode_value(event_ptr, 0, 32); // Set next position to 0
encode_value(ptr, 0, 32); // Set next position to 0
/*
* Since we have changed the timestamp we must recalculate the CRC
@ -3152,14 +3361,14 @@ blr_slave_send_fde(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *fde)
* calculate a new checksum
* and write it into the header
*/
ptr = GWBUF_DATA(fde) + event_size - BINLOG_EVENT_CRC_SIZE;
ptr = GWBUF_DATA(event) + MYSQL_HEADER_LEN + 1 + event_size - BINLOG_EVENT_CRC_SIZE;
chksum = crc32(0L, NULL, 0);
chksum = crc32(chksum,
GWBUF_DATA(fde),
GWBUF_DATA(event) + MYSQL_HEADER_LEN + 1,
event_size - BINLOG_EVENT_CRC_SIZE);
encode_value(ptr, chksum, 32);
return MXS_SESSION_ROUTE_REPLY(slave->dcb->session, head);
return MXS_SESSION_ROUTE_REPLY(slave->dcb->session, event);
}
@ -5903,9 +6112,11 @@ blr_send_slave_heartbeat(void *inst)
while (sptr)
{
/* skip servers with state = 0 */
if ( (sptr->state == BLRS_DUMPING) && (sptr->heartbeat > 0) &&
((t_now + 1 - sptr->lastReply) >= sptr->heartbeat) )
if ((sptr->state == BLRS_DUMPING) &&
(sptr->heartbeat > 0) &&
((t_now + 1 - sptr->lastReply) >= sptr->heartbeat))
{
MXS_NOTICE("Sending Heartbeat to slave server-id %d. "
"Heartbeat interval is %d, last event time is %lu",
@ -5919,9 +6130,6 @@ blr_send_slave_heartbeat(void *inst)
/* Set last time */
sptr->lastReply = t_now;
}
sptr->lastReply = t_now;
}
sptr = sptr->next;
@ -6813,7 +7021,6 @@ static bool blr_slave_gtid_request(ROUTER_INSTANCE *router,
strcpy(slave->binlogfile, router_curr_file);
slave->binlog_pos = 4;
// TODO: Add prefix
MXS_INFO("Slave %d is registering with empty GTID:"
" sending events from current binlog file %s%s,"
" pos %" PRIu32 "",
@ -6877,9 +7084,10 @@ static bool blr_slave_gtid_request(ROUTER_INSTANCE *router,
snprintf(errmsg,
BINLOG_ERROR_MSG_LEN,
"Requested MariaDB GTID '%s' by server %lu"
" has not been found",
" not found. GTID_STRICT_MODE=%s",
slave->mariadb_gtid,
(unsigned long)slave->serverid);
(unsigned long)slave->serverid,
slave->gtid_strict_mode ? "ON" : "OFF");
errmsg[BINLOG_ERROR_MSG_LEN] = '\0';
/* Check strict mode */
@ -6988,7 +7196,7 @@ static bool blr_slave_gtid_request(ROUTER_INSTANCE *router,
}
}
/* Set GTID details in f_info*/
/* Set GTID details in f_info */
memcpy(&slave->f_info, &f_gtid, sizeof(MARIADB_GTID_INFO));
}
}
@ -8201,14 +8409,15 @@ static void blr_slave_skip_empty_files(ROUTER_INSTANCE *router,
* Stop if the new file is the current binlog file.
*/
while (!blr_compare_binlogs(router,
f_tree,
&f_tree->gtid_elms,
router_curr_file,
binlog_file) &&
blr_slave_get_file_size(file_path) <= 4 &&
blr_file_next_exists(router, slave, next_file))
{
// Log skipped file
MXS_INFO("Slave %s:%i, skip reading empty file '%s' (4 bytes size).",
MXS_INFO("Slave %s:%i, skip reading empty file '%s' "
"(0 or 4 bytes size).",
slave->dcb->remote,
dcb_get_port(slave->dcb),
binlog_file);
@ -9471,3 +9680,140 @@ static bool blr_apply_changes(ROUTER_INSTANCE *router,
return ret;
}
/**
* Saves a MARIADB_GTID_INFO data for later usage
*
* @param info The MARIADB_GTID_INFO data to copy
* @param save_info The MARIADB_GTID_INFO allocated
* buffer to save data
* @param save_prefix The allocated buffer where
* to save file prefix
*/
static void blr_slave_info_save(const MARIADB_GTID_INFO *info,
MARIADB_GTID_INFO *save_info,
char *save_prefix)
{
/* Save current file details */
memcpy(save_info, info, sizeof(MARIADB_GTID_INFO));
/* Fill save file prefix */
sprintf(save_prefix,
"%" PRIu32 "/%" PRIu32 "/",
save_info->gtid_elms.domain_id,
save_info->gtid_elms.server_id);
}
/**
* Log message for slave file End Of File
*
* @param router The current router instance
* @param slave The connected slave
* @param c_prefix The file prefix of slave file
* @param next_file The next file to read or fake rotate to
* @param log_action The action type to log
*/
static void blr_slave_log_next_file_action(const ROUTER_INSTANCE *router,
const ROUTER_SLAVE *slave,
const char *c_prefix,
const char *next_file,
slave_eof_action_t log_action)
{
char m_prefix[BINLOG_FILE_EXTRA_INFO] = "";
char r_prefix[BINLOG_FILE_EXTRA_INFO] = "";
bool s_tree = router->storage_type == BLR_BINLOG_STORAGE_TREE;
bool have_heartbeat = router->send_slave_heartbeat &&
(slave->heartbeat > 0);
spinlock_acquire(&router->binlog_lock);
if (s_tree)
{
/* Get master file prefix */
sprintf(m_prefix,
"%" PRIu32 "/%" PRIu32 "/",
router->mariadb10_gtid_domain,
router->orig_masterid);
/* Get rotating slave file prefix */
sprintf(r_prefix,
"%" PRIu32 "/%" PRIu32 "/",
slave->f_info.gtid_elms.domain_id,
slave->f_info.gtid_elms.server_id);
}
spinlock_release(&router->binlog_lock);
switch(log_action)
{
case SLAVE_EOF_ROTATE:
/* This has to be always logged */
MXS_WARNING("%s: Slave [%s]:%d, server-id %d reached end of file for binlog file [%s%s] "
"at %lu which is not the file currently being downloaded or last file found. "
"This may be caused by a previous failure of the master. "
"Current master binlog is [%s%s] at %lu, replication state is [%s]. "
"Now rotating to new file [%s%s]",
router->service->name,
slave->dcb->remote,
dcb_get_port(slave->dcb),
slave->serverid,
c_prefix,
slave->binlogfile,
(unsigned long)slave->binlog_pos,
m_prefix,
router->binlog_name[0] ? router->binlog_name : "no_set_yet",
router->binlog_position,
blrm_states[router->master_state],
r_prefix,
next_file);
break;
case SLAVE_EOF_ERROR:
/* Log error */
MXS_ERROR("%s: Slave [%s]:%d, server-id %d reached "
"end of file for '%s%s' and next file to read%s%s%s%s "
"is not %s. Force replication abort after %d retries.",
router->service->name,
slave->dcb->remote,
dcb_get_port(slave->dcb),
slave->serverid,
c_prefix,
slave->binlogfile,
next_file[0] ? " '" : "",
next_file[0] ? r_prefix : "",
next_file,
next_file[0] ? "'" : "",
next_file[0] ? "accessible" : "existent",
MISSING_FILE_READ_RETRIES);
break;
case SLAVE_EOF_WARNING:
/* We don't have the next_file, just warning */
MXS_WARNING("%s: Slave [%s]:%d, server-id %d reached end "
"of file for binlog file [%s%s] "
"at %lu. This is the last downloaded or "
"the last file found. "
"Next file%s%s%s%s is not %s. "
"This may be caused by a previous failure of "
"the master server. Current master binlog is "
"[%s%s] at %lu and replication state is [%s]. "
"The slave server is now in '%s' state.",
router->service->name,
slave->dcb->remote,
dcb_get_port(slave->dcb),
slave->serverid,
c_prefix,
slave->binlogfile,
(unsigned long)slave->binlog_pos,
next_file[0] ? " '" : "",
next_file[0] ? r_prefix : "",
next_file,
next_file[0] ? "'" : "",
next_file[0] ? "accessible" : "existent",
m_prefix,
router->binlog_name[0] ? router->binlog_name : "no_set_yet",
router->binlog_position,
blrm_states[router->master_state],
have_heartbeat ? "wait_state" : "read_again");
break;
default:
break;
}
}