MXS-1588: Wait on all slaves during switchover
During switchover, MASTER_GTID_WAIT is now called on all slaves. This causes switchover to complete slower than before but is safer if log_slave_updates is not on on the new master server. Also, read_only is disabled on the demoted server if waiting on slaves or promotion fails. This should effectively cancel the failover for the old master.
This commit is contained in:
@ -3360,7 +3360,7 @@ bool failover_wait_relay_log(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_maste
|
|||||||
* @param err_out json object for error printing. Can be NULL.
|
* @param err_out json object for error printing. Can be NULL.
|
||||||
* @return True if successful
|
* @return True if successful
|
||||||
*/
|
*/
|
||||||
bool promote_new_master(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_master, json_t** err_out)
|
bool promote_new_master(MXS_MONITORED_SERVER* new_master, json_t** err_out)
|
||||||
{
|
{
|
||||||
bool success = false;
|
bool success = false;
|
||||||
MXS_NOTICE("Promoting server '%s' to master.", new_master->server->unique_name);
|
MXS_NOTICE("Promoting server '%s' to master.", new_master->server->unique_name);
|
||||||
@ -3478,7 +3478,7 @@ static bool do_failover(MYSQL_MONITOR* mon, json_t** err_out)
|
|||||||
// Step 2: Wait until relay log consumed.
|
// Step 2: Wait until relay log consumed.
|
||||||
if (failover_wait_relay_log(mon, new_master, err_out) &&
|
if (failover_wait_relay_log(mon, new_master, err_out) &&
|
||||||
// Step 3: Stop and reset slave, set read-only to 0.
|
// Step 3: Stop and reset slave, set read-only to 0.
|
||||||
promote_new_master(mon, new_master, err_out))
|
promote_new_master(new_master, err_out))
|
||||||
{
|
{
|
||||||
// Step 4: Redirect slaves.
|
// Step 4: Redirect slaves.
|
||||||
int redirects = redirect_slaves(mon, slaves, new_master);
|
int redirects = redirect_slaves(mon, slaves, new_master);
|
||||||
@ -3714,6 +3714,48 @@ static bool switchover_wait_slave_catchup(MXS_MONITORED_SERVER* slave, const Gti
|
|||||||
return gtid_reached;
|
return gtid_reached;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait until slave replication catches up with the master gtid for all slaves in the vector.
|
||||||
|
*
|
||||||
|
* @param slave Slaves to wait on
|
||||||
|
* @param gtid Which gtid must be reached
|
||||||
|
* @param total_timeout Maximum wait time in seconds
|
||||||
|
* @param read_timeout The value of read_timeout for the connection
|
||||||
|
* @param err_out json object for error printing. Can be NULL.
|
||||||
|
* @return True, if target gtid was reached within allotted time for all servers
|
||||||
|
*/
|
||||||
|
static bool switchover_wait_slaves_catchup(const ServerVector& slaves, const Gtid& gtid,
|
||||||
|
int total_timeout, int read_timeout,
|
||||||
|
json_t** err_out)
|
||||||
|
{
|
||||||
|
bool success = true;
|
||||||
|
int seconds_remaining = total_timeout;
|
||||||
|
|
||||||
|
for (ServerVector::const_iterator iter = slaves.begin();
|
||||||
|
iter != slaves.end() && success;
|
||||||
|
iter++)
|
||||||
|
{
|
||||||
|
if (seconds_remaining < 0)
|
||||||
|
{
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
time_t begin = time(NULL);
|
||||||
|
MXS_MONITORED_SERVER* slave = *iter;
|
||||||
|
if (switchover_wait_slave_catchup(slave, gtid, seconds_remaining, read_timeout, err_out))
|
||||||
|
{
|
||||||
|
seconds_remaining -= difftime(time(NULL), begin);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Starts a new slave connection on a server. Should be used on a demoted master server.
|
* Starts a new slave connection on a server. Should be used on a demoted master server.
|
||||||
*
|
*
|
||||||
@ -3818,12 +3860,16 @@ static bool do_switchover(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* current_mast
|
|||||||
bool rval = false;
|
bool rval = false;
|
||||||
MySqlServerInfo* curr_master_info = get_server_info(mon, demotion_target);
|
MySqlServerInfo* curr_master_info = get_server_info(mon, demotion_target);
|
||||||
// Step 2: Set read-only to 1, flush logs.
|
// Step 2: Set read-only to 1, flush logs.
|
||||||
if (switchover_demote_master(mon, demotion_target, curr_master_info, err_out) &&
|
if (switchover_demote_master(mon, demotion_target, curr_master_info, err_out))
|
||||||
// Step 3: Wait for the selected slave to catch up with master.
|
{
|
||||||
switchover_wait_slave_catchup(promotion_target, curr_master_info->gtid_binlog_pos,
|
// Step 3a: Wait for the selected slave to catch up with master.
|
||||||
|
if (switchover_wait_slave_catchup(promotion_target, curr_master_info->gtid_binlog_pos,
|
||||||
|
mon->switchover_timeout, mon->monitor->read_timeout, err_out) &&
|
||||||
|
// Step 3b: Wait for other slaves to catch up with master.
|
||||||
|
switchover_wait_slaves_catchup(slaves, curr_master_info->gtid_binlog_pos,
|
||||||
mon->switchover_timeout, mon->monitor->read_timeout, err_out) &&
|
mon->switchover_timeout, mon->monitor->read_timeout, err_out) &&
|
||||||
// Step 4: Stop and reset slave, set read-only to 0.
|
// Step 4: Stop and reset slave, set read-only to 0.
|
||||||
promote_new_master(mon, promotion_target, err_out))
|
promote_new_master(promotion_target, err_out))
|
||||||
{
|
{
|
||||||
// Step 5: Redirect slaves.
|
// Step 5: Redirect slaves.
|
||||||
int redirects = redirect_slaves(mon, slaves, promotion_target);
|
int redirects = redirect_slaves(mon, slaves, promotion_target);
|
||||||
@ -3843,6 +3889,22 @@ static bool do_switchover(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* current_mast
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Step 3a, 3b or 4 failed, try to undo step 2.
|
||||||
|
const char QUERY_UNDO[] = "SET GLOBAL read_only=0;";
|
||||||
|
if (mxs_mysql_query(demotion_target->con, QUERY_UNDO) == 0)
|
||||||
|
{
|
||||||
|
PRINT_MXS_JSON_ERROR(err_out, "read_only disabled on server %s.",
|
||||||
|
demotion_target->server->unique_name);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PRINT_MXS_JSON_ERROR(err_out, "Could not disable read_only on server %s: '%s'.",
|
||||||
|
demotion_target->server->unique_name, mysql_error(demotion_target->con));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user