Added blr_check_heartbeat as task
Added blr_check_heartbeat as task
This commit is contained in:
@ -105,6 +105,8 @@ extern void blr_cache_read_master_data(ROUTER_INSTANCE *router);
|
|||||||
extern char *decryptPassword(char *crypt);
|
extern char *decryptPassword(char *crypt);
|
||||||
extern char *create_hex_sha1_sha1_passwd(char *passwd);
|
extern char *create_hex_sha1_sha1_passwd(char *passwd);
|
||||||
extern int blr_read_events_all_events(ROUTER_INSTANCE *router, int fix, int debug);
|
extern int blr_read_events_all_events(ROUTER_INSTANCE *router, int fix, int debug);
|
||||||
|
void blr_master_close(ROUTER_INSTANCE *);
|
||||||
|
int blr_check_heartbeat(ROUTER_INSTANCE *router);
|
||||||
|
|
||||||
/** The module object definition */
|
/** The module object definition */
|
||||||
static ROUTER_OBJECT MyObject = {
|
static ROUTER_OBJECT MyObject = {
|
||||||
@ -967,11 +969,11 @@ struct tm tm;
|
|||||||
router_inst->stats.n_delayedreconnects);
|
router_inst->stats.n_delayedreconnects);
|
||||||
dcb_printf(dcb, "\tCurrent binlog file: %s\n",
|
dcb_printf(dcb, "\tCurrent binlog file: %s\n",
|
||||||
router_inst->binlog_name);
|
router_inst->binlog_name);
|
||||||
dcb_printf(dcb, "\tCurrent binlog position: %u\n",
|
dcb_printf(dcb, "\tCurrent binlog position: %lu\n",
|
||||||
router_inst->current_pos);
|
router_inst->current_pos);
|
||||||
if (router_inst->trx_safe) {
|
if (router_inst->trx_safe) {
|
||||||
if (router_inst->pending_transaction) {
|
if (router_inst->pending_transaction) {
|
||||||
dcb_printf(dcb, "\tCurrent open transaction pos: %u\n",
|
dcb_printf(dcb, "\tCurrent open transaction pos: %lu\n",
|
||||||
router_inst->binlog_position);
|
router_inst->binlog_position);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1004,41 +1006,53 @@ struct tm tm;
|
|||||||
router_inst->stats.n_reads);
|
router_inst->stats.n_reads);
|
||||||
dcb_printf(dcb, "\tNumber of residual data packets: %u\n",
|
dcb_printf(dcb, "\tNumber of residual data packets: %u\n",
|
||||||
router_inst->stats.n_residuals);
|
router_inst->stats.n_residuals);
|
||||||
dcb_printf(dcb, "\tAverage events per packet %.1f\n",
|
dcb_printf(dcb, "\tAverage events per packet: %.1f\n",
|
||||||
router_inst->stats.n_reads != 0 ? ((double)router_inst->stats.n_binlogs / router_inst->stats.n_reads) : 0);
|
router_inst->stats.n_reads != 0 ? ((double)router_inst->stats.n_binlogs / router_inst->stats.n_reads) : 0);
|
||||||
dcb_printf(dcb, "\tLast event from master at: %s",
|
|
||||||
buf);
|
|
||||||
dcb_printf(dcb, "\t (%d seconds ago)\n",
|
|
||||||
time(0) - router_inst->stats.lastReply);
|
|
||||||
|
|
||||||
if (!router_inst->mariadb10_compat) {
|
spinlock_acquire(&router_inst->lock);
|
||||||
dcb_printf(dcb, "\tLast event from master: 0x%x, %s",
|
if (router_inst->stats.lastReply) {
|
||||||
router_inst->lastEventReceived,
|
if (buf[strlen(buf)-1] == '\n') {
|
||||||
(router_inst->lastEventReceived >= 0 &&
|
buf[strlen(buf)-1] = '\0';
|
||||||
router_inst->lastEventReceived <= MAX_EVENT_TYPE) ?
|
}
|
||||||
event_names[router_inst->lastEventReceived] : "unknown");
|
dcb_printf(dcb, "\tLast event from master at: %s (%d seconds ago)\n",
|
||||||
} else {
|
buf, time(0) - router_inst->stats.lastReply);
|
||||||
char *ptr = NULL;
|
|
||||||
if (router_inst->lastEventReceived >= 0 && router_inst->lastEventReceived <= MAX_EVENT_TYPE) {
|
if (!router_inst->mariadb10_compat) {
|
||||||
ptr = event_names[router_inst->lastEventReceived];
|
dcb_printf(dcb, "\tLast event from master: 0x%x, %s\n",
|
||||||
|
router_inst->lastEventReceived,
|
||||||
|
(router_inst->lastEventReceived >= 0 &&
|
||||||
|
router_inst->lastEventReceived <= MAX_EVENT_TYPE) ?
|
||||||
|
event_names[router_inst->lastEventReceived] : "unknown");
|
||||||
} else {
|
} else {
|
||||||
/* Check MariaDB 10 new events */
|
char *ptr = NULL;
|
||||||
if (router_inst->lastEventReceived >= MARIADB_NEW_EVENTS_BEGIN && router_inst->lastEventReceived <= MAX_EVENT_TYPE_MARIADB10) {
|
if (router_inst->lastEventReceived >= 0 && router_inst->lastEventReceived <= MAX_EVENT_TYPE) {
|
||||||
ptr = event_names_mariadb10[(router_inst->lastEventReceived - MARIADB_NEW_EVENTS_BEGIN)];
|
ptr = event_names[router_inst->lastEventReceived];
|
||||||
|
} else {
|
||||||
|
/* Check MariaDB 10 new events */
|
||||||
|
if (router_inst->lastEventReceived >= MARIADB_NEW_EVENTS_BEGIN && router_inst->lastEventReceived <= MAX_EVENT_TYPE_MARIADB10) {
|
||||||
|
ptr = event_names_mariadb10[(router_inst->lastEventReceived - MARIADB_NEW_EVENTS_BEGIN)];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dcb_printf(dcb, "\tLast event from master: 0x%x, %s\n",
|
||||||
|
router_inst->lastEventReceived, (ptr != NULL) ? ptr : "unknown");
|
||||||
}
|
}
|
||||||
|
|
||||||
dcb_printf(dcb, "\tLast event from master: 0x%x, %s",
|
if (router_inst->lastEventTimestamp) {
|
||||||
router_inst->lastEventReceived, (ptr != NULL) ? ptr : "unknown");
|
time_t last_event = (time_t)router_inst->lastEventTimestamp;
|
||||||
}
|
localtime_r(&last_event, &tm);
|
||||||
|
asctime_r(&tm, buf);
|
||||||
if (router_inst->lastEventTimestamp)
|
if (buf[strlen(buf)-1] == '\n') {
|
||||||
{
|
buf[strlen(buf)-1] = '\0';
|
||||||
localtime_r((const time_t*)&router_inst->lastEventTimestamp, &tm);
|
}
|
||||||
asctime_r(&tm, buf);
|
dcb_printf(dcb, "\tLast binlog event timestamp: %ld (%s)\n",
|
||||||
dcb_printf(dcb, "\tLast binlog event timestamp: %ld (%s)\n",
|
|
||||||
router_inst->lastEventTimestamp, buf);
|
router_inst->lastEventTimestamp, buf);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
dcb_printf(dcb, "\tNo events received from master yet\n");
|
||||||
}
|
}
|
||||||
|
spinlock_release(&router_inst->lock);
|
||||||
|
|
||||||
if (router_inst->active_logs)
|
if (router_inst->active_logs)
|
||||||
dcb_printf(dcb, "\tRouter processing binlog records\n");
|
dcb_printf(dcb, "\tRouter processing binlog records\n");
|
||||||
if (router_inst->reconnect_pending)
|
if (router_inst->reconnect_pending)
|
||||||
@ -1162,7 +1176,8 @@ struct tm tm;
|
|||||||
if (session->lastEventTimestamp
|
if (session->lastEventTimestamp
|
||||||
&& router_inst->lastEventTimestamp)
|
&& router_inst->lastEventTimestamp)
|
||||||
{
|
{
|
||||||
localtime_r((const time_t*)&session->lastEventTimestamp, &tm);
|
time_t session_last_event = (time_t)session->lastEventTimestamp;
|
||||||
|
localtime_r(&session_last_event, &tm);
|
||||||
asctime_r(&tm, buf);
|
asctime_r(&tm, buf);
|
||||||
dcb_printf(dcb, "\t\tLast binlog event timestamp %u, %s", session->lastEventTimestamp, buf);
|
dcb_printf(dcb, "\t\tLast binlog event timestamp %u, %s", session->lastEventTimestamp, buf);
|
||||||
dcb_printf(dcb, "\t\tSeconds behind master %u\n", router_inst->lastEventTimestamp - session->lastEventTimestamp);
|
dcb_printf(dcb, "\t\tSeconds behind master %u\n", router_inst->lastEventTimestamp - session->lastEventTimestamp);
|
||||||
@ -1306,15 +1321,19 @@ unsigned long mysql_errno;
|
|||||||
|
|
||||||
LOGIF(LE, (skygw_log_write_flush(
|
LOGIF(LE, (skygw_log_write_flush(
|
||||||
LOGFILE_ERROR, "%s: Master connection error %lu '%s' in state '%s', "
|
LOGFILE_ERROR, "%s: Master connection error %lu '%s' in state '%s', "
|
||||||
"%sattempting reconnect to master",
|
"%sattempting reconnect to master %s:%d",
|
||||||
router->service->name, mysql_errno, errmsg,
|
router->service->name, mysql_errno, errmsg,
|
||||||
blrm_states[router->master_state], msg)));
|
blrm_states[router->master_state], msg,
|
||||||
|
router->service->dbref->server->name,
|
||||||
|
router->service->dbref->server->port)));
|
||||||
} else {
|
} else {
|
||||||
LOGIF(LE, (skygw_log_write_flush(
|
LOGIF(LE, (skygw_log_write_flush(
|
||||||
LOGFILE_ERROR, "%s: Master connection error %lu '%s' in state '%s', "
|
LOGFILE_ERROR, "%s: Master connection error %lu '%s' in state '%s', "
|
||||||
"%sattempting reconnect to master",
|
"%sattempting reconnect to master %s:%d",
|
||||||
router->service->name, router->m_errno, router->m_errmsg,
|
router->service->name, router->m_errno, router->m_errmsg,
|
||||||
blrm_states[router->master_state], msg)));
|
blrm_states[router->master_state], msg,
|
||||||
|
router->service->dbref->server->name,
|
||||||
|
router->service->dbref->server->port)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (errmsg)
|
if (errmsg)
|
||||||
@ -1877,3 +1896,56 @@ static int blr_check_binlog(ROUTER_INSTANCE *router) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks last heartbeat or last received event against router->heartbeat time interval
|
||||||
|
*
|
||||||
|
* @param router Current router instance
|
||||||
|
* @return 0 if master connection must be closed and opened again, 1 otherwise
|
||||||
|
*/
|
||||||
|
|
||||||
|
int
|
||||||
|
blr_check_heartbeat(ROUTER_INSTANCE *router) {
|
||||||
|
time_t t_now = time(0);
|
||||||
|
char *event_desc = NULL;
|
||||||
|
|
||||||
|
if (router->master_state != BLRM_BINLOGDUMP) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!router->mariadb10_compat) {
|
||||||
|
event_desc = (router->lastEventReceived >= 0 &&
|
||||||
|
router->lastEventReceived <= MAX_EVENT_TYPE) ?
|
||||||
|
event_names[router->lastEventReceived] : "unknown";
|
||||||
|
} else {
|
||||||
|
if (router->lastEventReceived >= 0 &&
|
||||||
|
router->lastEventReceived <= MAX_EVENT_TYPE) {
|
||||||
|
event_desc = event_names[router->lastEventReceived];
|
||||||
|
} else {
|
||||||
|
/* Check MariaDB 10 new events */
|
||||||
|
if (router->lastEventReceived >= MARIADB_NEW_EVENTS_BEGIN &&
|
||||||
|
router->lastEventReceived <= MAX_EVENT_TYPE_MARIADB10) {
|
||||||
|
event_desc = event_names_mariadb10[(router->lastEventReceived - MARIADB_NEW_EVENTS_BEGIN)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (router->master_state == BLRM_BINLOGDUMP && router->lastEventReceived > 0) {
|
||||||
|
if ((t_now - router->stats.lastReply) > (router->heartbeat + 1)) {
|
||||||
|
LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR,
|
||||||
|
"ERROR: No event received from master %s:%d in heartbeat period (%d seconds), last event (%s %d) received %lu seconds ago. Assuming connection is dead and reconnecting.",
|
||||||
|
router->service->dbref->server->name,
|
||||||
|
router->service->dbref->server->port,
|
||||||
|
router->heartbeat,
|
||||||
|
event_desc,
|
||||||
|
router->lastEventReceived,
|
||||||
|
t_now - router->stats.lastReply)));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@ -40,6 +40,7 @@
|
|||||||
* when an error is encountered in BLRM_BINLOGDUMP state.
|
* when an error is encountered in BLRM_BINLOGDUMP state.
|
||||||
* Server error code and msg are reported via SHOW SLAVE STATUS
|
* Server error code and msg are reported via SHOW SLAVE STATUS
|
||||||
* 03/08/2015 Massimiliano Pinto Initial implementation of transaction safety
|
* 03/08/2015 Massimiliano Pinto Initial implementation of transaction safety
|
||||||
|
* 13/08/2015 Massimiliano Pinto Addition of heartbeat check
|
||||||
*
|
*
|
||||||
* @endverbatim
|
* @endverbatim
|
||||||
*/
|
*/
|
||||||
@ -89,6 +90,8 @@ static char *blr_extract_column(GWBUF *buf, int col);
|
|||||||
void blr_cache_response(ROUTER_INSTANCE *router, char *response, GWBUF *buf);
|
void blr_cache_response(ROUTER_INSTANCE *router, char *response, GWBUF *buf);
|
||||||
void poll_fake_write_event(DCB *dcb);
|
void poll_fake_write_event(DCB *dcb);
|
||||||
GWBUF *blr_read_events_from_pos(ROUTER_INSTANCE *router, unsigned long long pos, REP_HEADER *hdr);
|
GWBUF *blr_read_events_from_pos(ROUTER_INSTANCE *router, unsigned long long pos, REP_HEADER *hdr);
|
||||||
|
static void blr_check_last_master_event(void *inst);
|
||||||
|
extern int blr_check_heartbeat(ROUTER_INSTANCE *router);
|
||||||
static int keepalive = 1;
|
static int keepalive = 1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -638,9 +641,22 @@ char query[128];
|
|||||||
router->service->dbref->server->port)));
|
router->service->dbref->server->port)));
|
||||||
break;
|
break;
|
||||||
case BLRM_BINLOGDUMP:
|
case BLRM_BINLOGDUMP:
|
||||||
|
{
|
||||||
|
char *name;
|
||||||
|
|
||||||
// Main body, we have received a binlog record from the master
|
// Main body, we have received a binlog record from the master
|
||||||
blr_handle_binlog_record(router, buf);
|
blr_handle_binlog_record(router, buf);
|
||||||
|
|
||||||
|
// set heartbeat check task
|
||||||
|
if ((name = (char *)malloc(80)) != NULL)
|
||||||
|
{
|
||||||
|
sprintf(name, "%s heartbeat", router->service->name);
|
||||||
|
hktask_add(name, blr_check_last_master_event, router, router->heartbeat);
|
||||||
|
free(name);
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (router->reconnect_pending)
|
if (router->reconnect_pending)
|
||||||
@ -978,6 +994,14 @@ int n_bufs = -1, pn_bufs = -1;
|
|||||||
free(router->m_errmsg);
|
free(router->m_errmsg);
|
||||||
router->m_errmsg = NULL;
|
router->m_errmsg = NULL;
|
||||||
|
|
||||||
|
router->stats.n_binlogs++;
|
||||||
|
router->lastEventReceived = hdr.event_type;
|
||||||
|
router->lastEventTimestamp = hdr.timestamp;
|
||||||
|
|
||||||
|
#ifdef SHOW_EVENTS
|
||||||
|
printf("blr: event type 0x%02x, flags 0x%04x, event size %d, event timestamp %lu\n", hdr.event_type, hdr.flags, hdr.event_size, hdr.timestamp);
|
||||||
|
#endif
|
||||||
|
|
||||||
spinlock_release(&router->lock);
|
spinlock_release(&router->lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1012,13 +1036,7 @@ int n_bufs = -1, pn_bufs = -1;
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
router->stats.n_binlogs++;
|
|
||||||
router->lastEventReceived = hdr.event_type;
|
|
||||||
router->lastEventTimestamp = hdr.timestamp;
|
|
||||||
|
|
||||||
#ifdef SHOW_EVENTS
|
|
||||||
printf("blr: event type 0x%02x, flags 0x%04x, event size %d", hdr.event_type, hdr.flags, hdr.event_size);
|
|
||||||
#endif
|
|
||||||
/**
|
/**
|
||||||
* Check for an open transaction, if the option is set
|
* Check for an open transaction, if the option is set
|
||||||
* Only complete transactions should be sent to sleves
|
* Only complete transactions should be sent to sleves
|
||||||
@ -1118,6 +1136,7 @@ int n_bufs = -1, pn_bufs = -1;
|
|||||||
router->binlog_name,
|
router->binlog_name,
|
||||||
router->current_pos)));
|
router->current_pos)));
|
||||||
router->stats.n_fakeevents++;
|
router->stats.n_fakeevents++;
|
||||||
|
|
||||||
if (hdr.event_type == FORMAT_DESCRIPTION_EVENT)
|
if (hdr.event_type == FORMAT_DESCRIPTION_EVENT)
|
||||||
{
|
{
|
||||||
uint8_t *new_fde;
|
uint8_t *new_fde;
|
||||||
@ -1162,7 +1181,9 @@ int n_bufs = -1, pn_bufs = -1;
|
|||||||
"Binlog %s @ %d.",
|
"Binlog %s @ %d.",
|
||||||
router->binlog_name,
|
router->binlog_name,
|
||||||
router->current_pos)));
|
router->current_pos)));
|
||||||
|
|
||||||
router->stats.n_heartbeats++;
|
router->stats.n_heartbeats++;
|
||||||
|
|
||||||
}
|
}
|
||||||
else if (hdr.flags != LOG_EVENT_ARTIFICIAL_F)
|
else if (hdr.flags != LOG_EVENT_ARTIFICIAL_F)
|
||||||
{
|
{
|
||||||
@ -1870,3 +1891,103 @@ int n;
|
|||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop and start the master connection
|
||||||
|
*
|
||||||
|
* @param router The router instance
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
blr_stop_start_master(ROUTER_INSTANCE *router) {
|
||||||
|
|
||||||
|
if (router->master) {
|
||||||
|
if (router->master->fd != -1 && router->master->state == DCB_STATE_POLLING) {
|
||||||
|
blr_master_close(router);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
spinlock_acquire(&router->lock);
|
||||||
|
|
||||||
|
router->master_state = BLRM_SLAVE_STOPPED;
|
||||||
|
|
||||||
|
/* set last_safe_pos */
|
||||||
|
router->last_safe_pos = router->binlog_position;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set router->prevbinlog to router->binlog_name
|
||||||
|
* The FDE event with current filename may arrive after STOP SLAVE is received
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (strcmp(router->binlog_name, router->prevbinlog) != 0)
|
||||||
|
strncpy(router->prevbinlog, router->binlog_name, BINLOG_FNAMELEN);
|
||||||
|
|
||||||
|
if (router->client) {
|
||||||
|
if (router->client->fd != -1 && router->client->state == DCB_STATE_POLLING) {
|
||||||
|
dcb_close(router->client);
|
||||||
|
router->client = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Discard the queued residual data */
|
||||||
|
while (router->residual)
|
||||||
|
{
|
||||||
|
router->residual = gwbuf_consume(router->residual, GWBUF_LENGTH(router->residual));
|
||||||
|
}
|
||||||
|
router->residual = NULL;
|
||||||
|
|
||||||
|
/* Now it is safe to unleash other threads on this router instance */
|
||||||
|
router->reconnect_pending = 0;
|
||||||
|
router->active_logs = 0;
|
||||||
|
|
||||||
|
router->master_state = BLRM_UNCONNECTED;
|
||||||
|
spinlock_release(&router->lock);
|
||||||
|
|
||||||
|
blr_start_master(router);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The heartbeat check function called from the housekeeper.
|
||||||
|
* We can try a new master connection if current one is seen out of date
|
||||||
|
*
|
||||||
|
* @param router Current router instance
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
blr_check_last_master_event(void *inst) {
|
||||||
|
ROUTER_INSTANCE *router = (ROUTER_INSTANCE *)inst;
|
||||||
|
int master_check = 1;
|
||||||
|
int master_state = BLRM_UNCONNECTED;
|
||||||
|
char *name = NULL;
|
||||||
|
|
||||||
|
spinlock_acquire(&router->lock);
|
||||||
|
|
||||||
|
master_check = blr_check_heartbeat(router);
|
||||||
|
|
||||||
|
master_state = router->master_state;
|
||||||
|
|
||||||
|
spinlock_release(&router->lock);
|
||||||
|
|
||||||
|
if (!master_check) {
|
||||||
|
/*
|
||||||
|
* stop current master connection
|
||||||
|
* and try a new connection
|
||||||
|
*/
|
||||||
|
blr_stop_start_master(router);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( (!master_check) || (master_state != BLRM_BINLOGDUMP) ) {
|
||||||
|
/*
|
||||||
|
* Remove the task, it will be added again
|
||||||
|
* when master state is back to BLRM_BINLOGDUMP
|
||||||
|
* by blr_master_response()
|
||||||
|
*/
|
||||||
|
if ((name = (char *)malloc(80)) != NULL) {
|
||||||
|
sprintf(name, "%s heartbeat", router->service->name);
|
||||||
|
|
||||||
|
hktask_remove(name);
|
||||||
|
|
||||||
|
free(name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user