Resolve issue with icorrectly markign slave connection as errored

This commit is contained in:
Mark Riddoch
2014-10-21 14:31:05 +01:00
parent 554a054e18
commit 932fc5dc2c
5 changed files with 61 additions and 6 deletions

View File

@ -400,6 +400,7 @@ static char *blrs_states[] = { "Created", "Unregistered", "Registered",
extern void blr_start_master(ROUTER_INSTANCE *);
extern void blr_master_response(ROUTER_INSTANCE *, GWBUF *);
extern void blr_master_reconnect(ROUTER_INSTANCE *);
extern int blr_master_connected(ROUTER_INSTANCE *);
extern int blr_slave_request(ROUTER_INSTANCE *, ROUTER_SLAVE *, GWBUF *);
extern void blr_slave_rotate(ROUTER_SLAVE *slave, uint8_t *ptr);
@ -413,4 +414,5 @@ extern void blr_file_flush(ROUTER_INSTANCE *);
extern BLFILE *blr_open_binlog(ROUTER_INSTANCE *, char *);
extern GWBUF *blr_read_binlog(ROUTER_INSTANCE *, BLFILE *, unsigned int, REP_HEADER *);
extern void blr_close_binlog(ROUTER_INSTANCE *, BLFILE *);
extern unsigned long blr_file_size(BLFILE *);
#endif

View File

@ -809,10 +809,21 @@ static void
errorReply(ROUTER *instance, void *router_session, GWBUF *message, DCB *backend_dcb, error_action_t action, bool *succp)
{
ROUTER_INSTANCE *router = (ROUTER_INSTANCE *)instance;
int error, len;
char msg[85];
len = sizeof(error);
if (getsockopt(router->master->fd, SOL_SOCKET, SO_ERROR, &error, &len) != 0)
{
strerror_r(error, msg, 80);
strcat(msg, " ");
}
else
strcpy(msg, "");
LOGIF(LE, (skygw_log_write_flush(
LOGFILE_ERROR, "Erorr Reply '%s', attempting reconnect to master",
message)));
LOGFILE_ERROR, "Erorr Reply '%s', %sattempting reconnect to master",
message, msg)));
*succp = false;
blr_master_reconnect(router);
}

View File

@ -522,6 +522,13 @@ uint32_t rval = 0, shift = 0;
return rval;
}
/**
* Log the event header of binlog event
*
* @param file The log file into which to write the entry
* @param msg A message strign to preceed the header with
* @param ptr The event header raw data
*/
static void
blr_log_header(logfile_id_t file, char *msg, uint8_t *ptr)
{
@ -535,3 +542,19 @@ int i;
skygw_log_write_flush(file, "%s", buf);
}
/**
* Return the size of the current binlog file
*
* @param file The binlog file
* @return The current size of the binlog file
*/
unsigned long
blr_file_size(BLFILE *file)
{
struct stat statb;
if (fstat(file->fd, &statb) == 0)
return statb.st_size;
return 0;
}

View File

@ -138,6 +138,7 @@ blr_restart_master(ROUTER_INSTANCE *router)
GWBUF *ptr;
dcb_close(router->master);
dcb_close(router->client);
dcb_free(router->master);
dcb_free(router->client);
@ -1069,3 +1070,16 @@ int i;
skygw_log_write_flush(file, "%s", buf);
}
/**
* Check if the master connection is in place and we
* are downlaoding binlogs
*
* @param router The router instance
* @return non-zero if we are recivign binlog records
*/
int
blr_master_connected(ROUTER_INSTANCE *router)
{
return router->master_state == BLRM_BINLOGDUMP;
}

View File

@ -810,7 +810,10 @@ uint8_t *ptr;
}
else
{
if (router->rotating != 0 && strcmp(router->binlog_name, slave->binlogfile) != 0)
if (slave->binlog_pos >= blr_file_size(slave->file)
&& router->rotating == 0
&& strcmp(router->binlog_name, slave->binlogfile) != 0
&& blr_master_connected(router))
{
/* We may have reached the end of file of a non-current
* binlog file.
@ -821,9 +824,11 @@ uint8_t *ptr;
* we ignore these issues during the rotate processing.
*/
LOGIF(LE, (skygw_log_write(LOGFILE_ERROR,
"Slave reached end of file for binlong file %s "
"which is not the file currently being downloaded.",
slave->binlogfile)));
"Slave reached end of file for binlong file %s at %u "
"which is not the file currently being downloaded. "
"Master binlog is %s, %lu.",
slave->binlogfile, slave->binlog_pos,
router->binlog_name, router->binlog_position)));
slave->state = BLRS_ERRORED;
}
else