Added a variable for current event being processed after a transaction is committed.

This commit is contained in:
Markus Makela
2015-10-23 12:33:51 +03:00
parent a8f866d372
commit 40ffe21dd8
5 changed files with 99 additions and 38 deletions

View File

@ -509,6 +509,7 @@ char task_name[BLRM_TASK_NAME_LEN+1] = "";
inst->binlog_position = 0;
inst->current_pos = 0;
inst->current_safe_event = 0;
strcpy(inst->binlog_name, "");
strcpy(inst->prevbinlog, "");

View File

@ -199,6 +199,7 @@ unsigned char magic[] = BINLOG_MAGIC;
write(fd, magic, 4);
router->current_pos = 4; /* Initial position after the magic number */
router->binlog_position = 4; /* Initial position after the magic number */
router->current_safe_event = 4;
}
@ -852,6 +853,7 @@ double average_bytes = 0;
router->current_pos = 4;
router->binlog_position = 4;
router->current_safe_event = 4;
while (1){
@ -934,6 +936,7 @@ double average_bytes = 0;
if (pending_transaction) {
router->binlog_position = last_known_commit;
router->current_safe_event = last_known_commit;
router->current_pos = pos;
router->pending_transaction = 1;
pending_transaction = 0;
@ -948,6 +951,7 @@ double average_bytes = 0;
/* any error */
if (n != 0) {
router->binlog_position = last_known_commit;
router->current_safe_event = last_known_commit;
router->current_pos = pos;
LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR,
@ -967,6 +971,7 @@ double average_bytes = 0;
return 1;
} else {
router->binlog_position = pos;
router->current_safe_event = pos;
router->current_pos = pos;
return 0;
@ -1009,6 +1014,7 @@ double average_bytes = 0;
if (event_error) {
router->binlog_position = last_known_commit;
router->current_safe_event = last_known_commit;
router->current_pos = pos;
LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR,
@ -1039,6 +1045,7 @@ double average_bytes = 0;
hdr.event_size, pos)));
router->binlog_position = last_known_commit;
router->current_safe_event = last_known_commit;
router->current_pos = pos;
LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR,
@ -1067,6 +1074,7 @@ double average_bytes = 0;
hdr.event_size, pos)));
router->binlog_position = last_known_commit;
router->current_safe_event = last_known_commit;
router->current_pos = pos;
LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR,
@ -1123,6 +1131,7 @@ double average_bytes = 0;
gwbuf_free(result);
router->binlog_position = last_known_commit;
router->current_safe_event = last_known_commit;
router->current_pos = pos;
LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR,
@ -1367,6 +1376,7 @@ double average_bytes = 0;
pos)));
router->binlog_position = last_known_commit;
router->current_safe_event = last_known_commit;
router->current_pos = pos;
LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR,
@ -1396,6 +1406,7 @@ double average_bytes = 0;
pos)));
router->binlog_position = last_known_commit;
router->current_safe_event = last_known_commit;
router->current_pos = pos;
LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR,
@ -1445,6 +1456,7 @@ double average_bytes = 0;
last_known_commit)));
router->binlog_position = last_known_commit;
router->current_safe_event = last_known_commit;
router->current_pos = pos;
router->pending_transaction = 1;
@ -1456,6 +1468,7 @@ double average_bytes = 0;
return 0;
} else {
router->binlog_position = pos;
router->current_safe_event = pos;
router->current_pos = pos;
return 0;

View File

@ -1064,6 +1064,7 @@ int n_bufs = -1, pn_bufs = -1;
spinlock_acquire(&router->binlog_lock);
router->binlog_position = router->current_pos;
router->current_safe_event = router->current_pos;
spinlock_release(&router->binlog_lock);
}
@ -1283,6 +1284,7 @@ int n_bufs = -1, pn_bufs = -1;
spinlock_acquire(&router->binlog_lock);
router->binlog_position = router->current_pos;
router->current_safe_event = router->current_pos;
spinlock_release(&router->binlog_lock);
@ -1325,6 +1327,13 @@ int n_bufs = -1, pn_bufs = -1;
spinlock_acquire(&router->binlog_lock);
/** The current safe position is only updated
* if it points to the event we just distributed. */
if(router->current_safe_event == pos)
{
router->current_safe_event = new_hdr.next_pos;
}
pos = new_hdr.next_pos;
spinlock_release(&router->binlog_lock);
@ -1596,6 +1605,10 @@ uint8_t *buf;
ROUTER_SLAVE *slave, *nextslave;
int action;
spinlock_acquire(&router->binlog_lock);
uint64_t current_safe_event = router->current_safe_event;
spinlock_release(&router->binlog_lock);
spinlock_acquire(&router->lock);
slave = router->slaves;
while (slave)
@ -1634,13 +1647,69 @@ int action;
slave->stats.n_actions[action-1]++;
spinlock_release(&slave->catch_lock);
bool sendevent = false;
bool forcecatchup = false;
bool alreadysent = false;
if (action == 1)
{
if (slave->binlog_pos <= router->last_written &&
if(router->trx_safe && slave->binlog_pos == current_safe_event &&
(strcmp(slave->binlogfile, router->binlog_name) == 0 ||
(hdr->event_type == ROTATE_EVENT &&
strcmp(slave->binlogfile, router->prevbinlog))))
{
/**
* Slave needs the current event being distributed
*/
sendevent = true;
}
else if (slave->binlog_pos == router->last_written &&
(strcmp(slave->binlogfile, router->binlog_name) == 0 ||
(hdr->event_type == ROTATE_EVENT &&
strcmp(slave->binlogfile, router->prevbinlog))))
{
/**
* Transaction safety is off or there are no pending transactions
*/
sendevent = true;
}
else if (slave->binlog_pos == hdr->next_pos
&& strcmp(slave->binlogfile, router->binlog_name) == 0)
{
/*
* Slave has already read record from file, no
* need to distrbute this event
*/
alreadysent = true;
}
else if ((slave->binlog_pos > hdr->next_pos - hdr->event_size)
&& strcmp(slave->binlogfile, router->binlog_name) == 0)
{
/*
* The slave is ahead of the master, this should never
* happen. Force the slave to catchup mode in order to
* try to resolve the issue.
*/
LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR,
"Slave %d is ahead of expected position %s@%d. "
"Expected position %d",
slave->serverid, slave->binlogfile,
(unsigned long)slave->binlog_pos,
hdr->next_pos - hdr->event_size)));
forcecatchup = true;
}
else
{
/*
* The slave is not at the position it should be. Force it into
* catchup mode rather than send this event.
*/
forcecatchup = true;
}
if(sendevent)
{
/*
* The slave should be up to date, check that the binlog
* position matches the event we have to distribute or
@ -1685,49 +1754,20 @@ int action;
}
spinlock_release(&slave->catch_lock);
}
else if (slave->binlog_pos == hdr->next_pos
&& strcmp(slave->binlogfile, router->binlog_name) == 0)
{
/*
* Slave has already read record from file, no
* need to distrbute this event
*/
spinlock_acquire(&slave->catch_lock);
else if (alreadysent)
{
spinlock_acquire(&slave->catch_lock);
slave->cstate &= ~CS_BUSY;
spinlock_release(&slave->catch_lock);
}
else if ((slave->binlog_pos > hdr->next_pos - hdr->event_size)
&& strcmp(slave->binlogfile, router->binlog_name) == 0)
{
/*
* The slave is ahead of the master, this should never
* happen. Force the slave to catchup mode in order to
* try to resolve the issue.
*/
LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR,
"Slave %d is ahead of expected position %s@%d. "
"Expected position %d",
slave->serverid, slave->binlogfile,
(unsigned long)slave->binlog_pos,
hdr->next_pos - hdr->event_size)));
spinlock_acquire(&slave->catch_lock);
}
else if (forcecatchup)
{
spinlock_acquire(&slave->catch_lock);
slave->cstate &= ~(CS_UPTODATE|CS_BUSY);
slave->cstate |= CS_EXPECTCB;
spinlock_release(&slave->catch_lock);
poll_fake_write_event(slave->dcb);
}
else
{
/*
* The slave is not at the position it should be. Force it into
* catchup mode rather than send this event.
*/
spinlock_acquire(&slave->catch_lock);
slave->cstate &= ~(CS_UPTODATE|CS_BUSY);
slave->cstate |= CS_EXPECTCB;
spinlock_release(&slave->catch_lock);
poll_fake_write_event(slave->dcb);
}
}
}
else if (action == 3)
{

View File

@ -2925,6 +2925,7 @@ blr_start_slave(ROUTER_INSTANCE* router, ROUTER_SLAVE* slave)
router->master_state = BLRM_UNCONNECTED;
router->current_pos = 4;
router->binlog_position = 4;
router->current_safe_event = 4;
spinlock_release(&router->lock);
@ -3225,6 +3226,7 @@ int blr_handle_change_master(ROUTER_INSTANCE* router, char *command, char *error
router->current_pos = 4;
router->binlog_position = 4;
router->current_safe_event = 4;
LOGIF(LT, (skygw_log_write(LOGFILE_TRACE, "%s: New MASTER_LOG_FILE is [%s]",
router->service->name,
@ -3282,6 +3284,7 @@ int blr_handle_change_master(ROUTER_INSTANCE* router, char *command, char *error
if (router->master_state == BLRM_UNCONFIGURED) {
router->current_pos = 4;
router->binlog_position = 4;
router->current_safe_event = 4;
memset(router->binlog_name, '\0', sizeof(router->binlog_name));
strncpy(router->binlog_name, master_logfile, BINLOG_FNAMELEN);
@ -3550,6 +3553,7 @@ blr_master_set_empty_config(ROUTER_INSTANCE *router) {
router->current_pos = 4;
router->binlog_position = 4;
router->current_safe_event = 4;
strcpy(router->binlog_name, "");
}
@ -3565,6 +3569,7 @@ blr_master_apply_config(ROUTER_INSTANCE *router, MASTER_SERVER_CFG *prev_master)
server_update_port(router->service->dbref->server, prev_master->port);
router->current_pos = prev_master->pos;
router->binlog_position = prev_master->safe_pos;
router->current_safe_event = prev_master->safe_pos;
strcpy(router->binlog_name, prev_master->logfile);
if (router->user) {
free(router->user);