MXS-1588: Wait on all slaves during switchover
During switchover, MASTER_GTID_WAIT is now called on all slaves. This causes switchover to complete slower than before but is safer if log_slave_updates is not on on the new master server. Also, read_only is disabled on the demoted server if waiting on slaves or promotion fails. This should effectively cancel the failover for the old master.
This commit is contained in:
parent
200657e2f6
commit
047c08f577
@ -3360,7 +3360,7 @@ bool failover_wait_relay_log(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_maste
|
||||
* @param err_out json object for error printing. Can be NULL.
|
||||
* @return True if successful
|
||||
*/
|
||||
bool promote_new_master(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_master, json_t** err_out)
|
||||
bool promote_new_master(MXS_MONITORED_SERVER* new_master, json_t** err_out)
|
||||
{
|
||||
bool success = false;
|
||||
MXS_NOTICE("Promoting server '%s' to master.", new_master->server->unique_name);
|
||||
@ -3478,7 +3478,7 @@ static bool do_failover(MYSQL_MONITOR* mon, json_t** err_out)
|
||||
// Step 2: Wait until relay log consumed.
|
||||
if (failover_wait_relay_log(mon, new_master, err_out) &&
|
||||
// Step 3: Stop and reset slave, set read-only to 0.
|
||||
promote_new_master(mon, new_master, err_out))
|
||||
promote_new_master(new_master, err_out))
|
||||
{
|
||||
// Step 4: Redirect slaves.
|
||||
int redirects = redirect_slaves(mon, slaves, new_master);
|
||||
@ -3714,6 +3714,48 @@ static bool switchover_wait_slave_catchup(MXS_MONITORED_SERVER* slave, const Gti
|
||||
return gtid_reached;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait until slave replication catches up with the master gtid for all slaves in the vector.
|
||||
*
|
||||
* @param slave Slaves to wait on
|
||||
* @param gtid Which gtid must be reached
|
||||
* @param total_timeout Maximum wait time in seconds
|
||||
* @param read_timeout The value of read_timeout for the connection
|
||||
* @param err_out json object for error printing. Can be NULL.
|
||||
* @return True, if target gtid was reached within allotted time for all servers
|
||||
*/
|
||||
static bool switchover_wait_slaves_catchup(const ServerVector& slaves, const Gtid& gtid,
|
||||
int total_timeout, int read_timeout,
|
||||
json_t** err_out)
|
||||
{
|
||||
bool success = true;
|
||||
int seconds_remaining = total_timeout;
|
||||
|
||||
for (ServerVector::const_iterator iter = slaves.begin();
|
||||
iter != slaves.end() && success;
|
||||
iter++)
|
||||
{
|
||||
if (seconds_remaining < 0)
|
||||
{
|
||||
success = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
time_t begin = time(NULL);
|
||||
MXS_MONITORED_SERVER* slave = *iter;
|
||||
if (switchover_wait_slave_catchup(slave, gtid, seconds_remaining, read_timeout, err_out))
|
||||
{
|
||||
seconds_remaining -= difftime(time(NULL), begin);
|
||||
}
|
||||
else
|
||||
{
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts a new slave connection on a server. Should be used on a demoted master server.
|
||||
*
|
||||
@ -3818,28 +3860,48 @@ static bool do_switchover(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* current_mast
|
||||
bool rval = false;
|
||||
MySqlServerInfo* curr_master_info = get_server_info(mon, demotion_target);
|
||||
// Step 2: Set read-only to 1, flush logs.
|
||||
if (switchover_demote_master(mon, demotion_target, curr_master_info, err_out) &&
|
||||
// Step 3: Wait for the selected slave to catch up with master.
|
||||
switchover_wait_slave_catchup(promotion_target, curr_master_info->gtid_binlog_pos,
|
||||
mon->switchover_timeout, mon->monitor->read_timeout, err_out) &&
|
||||
// Step 4: Stop and reset slave, set read-only to 0.
|
||||
promote_new_master(mon, promotion_target, err_out))
|
||||
if (switchover_demote_master(mon, demotion_target, curr_master_info, err_out))
|
||||
{
|
||||
// Step 5: Redirect slaves.
|
||||
int redirects = redirect_slaves(mon, slaves, promotion_target);
|
||||
// Step 6: Set the old master to replicate from the new.
|
||||
bool start_ok = switchover_start_slave(mon, demotion_target, promotion_target);
|
||||
rval = slaves.empty() ? start_ok : start_ok || redirects > 0;
|
||||
if (rval == false)
|
||||
// Step 3a: Wait for the selected slave to catch up with master.
|
||||
if (switchover_wait_slave_catchup(promotion_target, curr_master_info->gtid_binlog_pos,
|
||||
mon->switchover_timeout, mon->monitor->read_timeout, err_out) &&
|
||||
// Step 3b: Wait for other slaves to catch up with master.
|
||||
switchover_wait_slaves_catchup(slaves, curr_master_info->gtid_binlog_pos,
|
||||
mon->switchover_timeout, mon->monitor->read_timeout, err_out) &&
|
||||
// Step 4: Stop and reset slave, set read-only to 0.
|
||||
promote_new_master(promotion_target, err_out))
|
||||
{
|
||||
// This is a special case. Individual server errors have already been printed to the log.
|
||||
// For JSON, gather the errors again.
|
||||
const char MSG[] = "Could not redirect any slaves to the new master.";
|
||||
MXS_ERROR(MSG);
|
||||
if (err_out)
|
||||
// Step 5: Redirect slaves.
|
||||
int redirects = redirect_slaves(mon, slaves, promotion_target);
|
||||
// Step 6: Set the old master to replicate from the new.
|
||||
bool start_ok = switchover_start_slave(mon, demotion_target, promotion_target);
|
||||
rval = slaves.empty() ? start_ok : start_ok || redirects > 0;
|
||||
if (rval == false)
|
||||
{
|
||||
string combined_error = get_connection_errors(slaves, demotion_target);
|
||||
*err_out = mxs_json_error_append(*err_out, "%s Errors: %s.", MSG, combined_error.c_str());
|
||||
// This is a special case. Individual server errors have already been printed to the log.
|
||||
// For JSON, gather the errors again.
|
||||
const char MSG[] = "Could not redirect any slaves to the new master.";
|
||||
MXS_ERROR(MSG);
|
||||
if (err_out)
|
||||
{
|
||||
string combined_error = get_connection_errors(slaves, demotion_target);
|
||||
*err_out = mxs_json_error_append(*err_out, "%s Errors: %s.", MSG, combined_error.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Step 3a, 3b or 4 failed, try to undo step 2.
|
||||
const char QUERY_UNDO[] = "SET GLOBAL read_only=0;";
|
||||
if (mxs_mysql_query(demotion_target->con, QUERY_UNDO) == 0)
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(err_out, "read_only disabled on server %s.",
|
||||
demotion_target->server->unique_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(err_out, "Could not disable read_only on server %s: '%s'.",
|
||||
demotion_target->server->unique_name, mysql_error(demotion_target->con));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user