MXS-1845 Switchover cleanup
Several small changes: Binlog is flushed at the end of old master demotion. Only new master is required to catch up to old master. Use the same replication check method as failover.
This commit is contained in:
parent
49e85d9a28
commit
30eb21914f
@ -733,29 +733,31 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
|
||||
ServerArray redirectable_slaves = get_redirectables(promotion_target, demotion_target);
|
||||
|
||||
bool rval = false;
|
||||
// Step 2: Set read-only to on, flush logs, update master gtid:s
|
||||
// Step 2: Set read-only to on, flush logs, update gtid:s.
|
||||
if (demotion_target->demote(op))
|
||||
{
|
||||
m_cluster_modified = true;
|
||||
bool catchup_and_promote_success = false;
|
||||
maxbase::StopWatch timer;
|
||||
// Step 3: Wait for the slaves (including promotion target) to catch up with master.
|
||||
ServerArray catchup_slaves = redirectable_slaves;
|
||||
catchup_slaves.push_back(promotion_target);
|
||||
if (switchover_wait_slaves_catchup(catchup_slaves,
|
||||
demotion_target->m_gtid_binlog_pos,
|
||||
op.time_remaining.secs(),
|
||||
error_out))
|
||||
StopWatch timer;
|
||||
// Step 3: Wait for the promotion target to catch up with the demotion target. Disregard the other
|
||||
// slaves of the promotion target to avoid needless waiting.
|
||||
// The gtid:s of the demotion target were updated at the end of demotion.
|
||||
if (promotion_target->catchup_to_master(op))
|
||||
{
|
||||
auto step3_duration = timer.lap();
|
||||
MXS_DEBUG("Switchover: slave catchup took %.1f seconds.", step3_duration.secs());
|
||||
op.time_remaining -= step3_duration;
|
||||
|
||||
// Step 4: On new master STOP and RESET SLAVE, set read-only to off.
|
||||
MXS_INFO("Switchover: Catchup took %.1f seconds.", timer.lap().secs());
|
||||
// Step 4: On new master: remove slave connections, set read-only to OFF etc.
|
||||
if (promotion_target->promote(op))
|
||||
{
|
||||
// Point of no return. Even if following steps fail, do not try to undo.
|
||||
// Switchover considered at least partially successful.
|
||||
catchup_and_promote_success = true;
|
||||
m_next_master = promotion_target;
|
||||
rval = true;
|
||||
if (op.demotion_target_is_master)
|
||||
{
|
||||
// Force a master swap on next tick.
|
||||
m_next_master = promotion_target;
|
||||
}
|
||||
|
||||
timer.restart();
|
||||
// Step 5: Redirect slaves and start replication on old master.
|
||||
ServerArray redirected_slaves;
|
||||
@ -764,35 +766,20 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
|
||||
{
|
||||
redirected_slaves.push_back(demotion_target);
|
||||
}
|
||||
op.time_remaining -= timer.lap();
|
||||
|
||||
int redirects = redirect_slaves_ex(op, redirectable_slaves, &redirected_slaves);
|
||||
|
||||
bool success = redirectable_slaves.empty() ? start_ok : start_ok || redirects > 0;
|
||||
if (success)
|
||||
{
|
||||
op.time_remaining -= timer.lap();
|
||||
// Step 6: Finally, add an event to the new master to advance gtid and wait for the slaves
|
||||
// to receive it. If using external replication, skip this step. Come up with an
|
||||
// alternative later.
|
||||
if (m_external_master_port != PORT_UNKNOWN)
|
||||
{
|
||||
MXS_WARNING("Replicating from external master, skipping final check.");
|
||||
rval = true;
|
||||
}
|
||||
else if (wait_cluster_stabilization(promotion_target,
|
||||
redirected_slaves,
|
||||
op.time_remaining.secs()))
|
||||
{
|
||||
rval = true;
|
||||
auto step6_duration = timer.lap();
|
||||
op.time_remaining -= step6_duration;
|
||||
MXS_DEBUG("Switchover: slave replication confirmation took %.1f seconds with "
|
||||
"%.1f seconds to spare.",
|
||||
step6_duration.secs(), op.time_remaining.secs());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
print_redirect_errors(demotion_target, redirectable_slaves, error_out);
|
||||
timer.restart();
|
||||
// Step 6: Finally, check that slaves are replicating.
|
||||
wait_cluster_stabilization_ex(op, redirected_slaves);
|
||||
auto step6_duration = timer.lap();
|
||||
MXS_INFO("Switchover: slave replication confirmation took %.1f seconds with "
|
||||
"%.1f seconds to spare.",
|
||||
step6_duration.secs(), op.time_remaining.secs());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -994,46 +981,6 @@ bool MariaDBMonitor::switchover_demote_master(MariaDBServer* current_master, jso
|
||||
return !any_error();
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait until slave replication catches up with the master gtid for all slaves in the vector.
|
||||
*
|
||||
* @param slave Slaves to wait on
|
||||
* @param gtid Which gtid must be reached
|
||||
* @param total_timeout Maximum wait time in seconds
|
||||
* @param err_out json object for error printing. Can be NULL.
|
||||
* @return True, if target gtid was reached within allotted time for all servers
|
||||
*/
|
||||
bool MariaDBMonitor::switchover_wait_slaves_catchup(const ServerArray& slaves,
|
||||
const GtidList& gtid,
|
||||
int total_timeout,
|
||||
json_t** err_out)
|
||||
{
|
||||
bool success = true;
|
||||
int seconds_remaining = total_timeout;
|
||||
|
||||
for (auto iter = slaves.begin(); iter != slaves.end() && success; iter++)
|
||||
{
|
||||
if (seconds_remaining <= 0)
|
||||
{
|
||||
success = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
time_t begin = time(NULL);
|
||||
MariaDBServer* slave_server = *iter;
|
||||
if (slave_server->wait_until_gtid(gtid, seconds_remaining, err_out))
|
||||
{
|
||||
seconds_remaining -= difftime(time(NULL), begin);
|
||||
}
|
||||
else
|
||||
{
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send an event to new master and wait for slaves to get the event.
|
||||
*
|
||||
|
@ -234,10 +234,6 @@ private:
|
||||
Log log_mode, json_t** error_out);
|
||||
bool switchover_perform(ClusterOperation& operation);
|
||||
bool switchover_demote_master(MariaDBServer* current_master, json_t** err_out);
|
||||
bool switchover_wait_slaves_catchup(const ServerArray& slaves,
|
||||
const GtidList& gtid,
|
||||
int total_timeout,
|
||||
json_t** err_out);
|
||||
bool switchover_start_slave(MariaDBServer* old_master, MariaDBServer* new_master);
|
||||
bool manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);
|
||||
void handle_low_disk_space_master();
|
||||
|
@ -502,20 +502,24 @@ void MariaDBServer::warn_replication_settings() const
|
||||
}
|
||||
}
|
||||
|
||||
bool MariaDBServer::wait_until_gtid(const GtidList& target, int timeout, json_t** err_out)
|
||||
bool MariaDBServer::catchup_to_master(ClusterOperation& op)
|
||||
{
|
||||
bool gtid_reached = false;
|
||||
bool error = false;
|
||||
/* Prefer to use gtid_binlog_pos, as that is more reliable. But if log_slave_updates is not on,
|
||||
* use gtid_current_pos. */
|
||||
const bool use_binlog_pos = m_rpl_settings.log_bin && m_rpl_settings.log_slave_updates;
|
||||
bool time_is_up = false; // Check at least once.
|
||||
bool gtid_reached = false;
|
||||
bool error = false;
|
||||
const GtidList& target = op.demotion_target->m_gtid_binlog_pos;
|
||||
json_t** error_out = op.error_out;
|
||||
|
||||
int seconds_remaining = 1; // Cheat a bit here to allow at least one iteration.
|
||||
int sleep_ms = 200; // How long to sleep on next iteration. Incremented slowly.
|
||||
time_t start_time = time(NULL);
|
||||
while (seconds_remaining > 0 && !gtid_reached && !error)
|
||||
Duration sleep_time(0.2); // How long to sleep before next iteration. Incremented slowly.
|
||||
StopWatch timer;
|
||||
|
||||
while (!time_is_up && !gtid_reached && !error)
|
||||
{
|
||||
if (update_gtids())
|
||||
string error_msg;
|
||||
if (update_gtids(&error_msg))
|
||||
{
|
||||
const GtidList& compare_to = use_binlog_pos ? m_gtid_binlog_pos : m_gtid_current_pos;
|
||||
if (target.events_ahead(compare_to, GtidList::MISSING_DOMAIN_IGNORE) == 0)
|
||||
@ -524,31 +528,33 @@ bool MariaDBServer::wait_until_gtid(const GtidList& target, int timeout, json_t*
|
||||
}
|
||||
else
|
||||
{
|
||||
// Query was successful but target gtid not yet reached. Check elapsed time.
|
||||
seconds_remaining = timeout - difftime(time(NULL), start_time);
|
||||
if (seconds_remaining > 0)
|
||||
// Query was successful but target gtid not yet reached. Check how much time left.
|
||||
op.time_remaining -= timer.lap();
|
||||
if (op.time_remaining.secs() > 0)
|
||||
{
|
||||
// Sleep for a moment, then try again.
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms));
|
||||
sleep_ms += 100; // Sleep a bit more next iteration.
|
||||
Duration this_sleep = MXS_MIN(sleep_time, op.time_remaining);
|
||||
std::this_thread::sleep_for(this_sleep);
|
||||
sleep_time += Duration(0.1); // Sleep a bit more next iteration.
|
||||
}
|
||||
else
|
||||
{
|
||||
time_is_up = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
error = true;
|
||||
PRINT_MXS_JSON_ERROR(error_out,
|
||||
"Failed to update gtid on %s while waiting for catchup: %s",
|
||||
name(), error_msg.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (error)
|
||||
if (!error && !gtid_reached)
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(err_out,
|
||||
"Failed to update gtid on server '%s' while waiting for catchup.",
|
||||
name());
|
||||
}
|
||||
else if (!gtid_reached)
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(err_out, "Slave catchup timed out on slave '%s'.", name());
|
||||
PRINT_MXS_JSON_ERROR(error_out, "Slave catchup timed out on slave '%s'.", name());
|
||||
}
|
||||
return gtid_reached;
|
||||
}
|
||||
@ -1568,13 +1574,19 @@ bool MariaDBServer::demote(ClusterOperation& op)
|
||||
|
||||
if (!stop_slave_error)
|
||||
{
|
||||
// Step 2: If this server is master, disable writes and scheduled events.
|
||||
// Flush logs, update gtid:s, run demotion_sql_file.
|
||||
// Step 2: If this server is master, disable writes and scheduled events, flush logs,
|
||||
// update gtid:s, run demotion_sql_file.
|
||||
|
||||
// In theory, this part should be ran in the opposite order so it would "reverse"
|
||||
// the promotion code. However, it's probably better to run the most
|
||||
// likely part to fail, setting read_only=1, first to make undoing easier. Setting
|
||||
// read_only may fail if another session has table locks or is doing long writes.
|
||||
bool demotion_error = false;
|
||||
if (op.demotion_target_is_master)
|
||||
{
|
||||
mxb_assert(is_master());
|
||||
// Step 2a: Enabling read-only can take time if writes are on or table locks taken.
|
||||
// TODO: use max_statement_time to be safe!
|
||||
bool ro_enabled = set_read_only(ReadOnlySetting::ENABLE, op.time_remaining, error_out);
|
||||
op.time_remaining -= timer.lap();
|
||||
if (!ro_enabled)
|
||||
@ -1597,33 +1609,7 @@ bool MariaDBServer::demote(ClusterOperation& op)
|
||||
}
|
||||
}
|
||||
|
||||
if (!demotion_error)
|
||||
{
|
||||
// Step 2c: FLUSH LOGS to ensure that all events have been written to binlog,
|
||||
// then update gtid:s.
|
||||
string error_msg;
|
||||
bool logs_flushed = execute_cmd_time_limit("FLUSH LOGS;", op.time_remaining, &error_msg);
|
||||
op.time_remaining -= timer.lap();
|
||||
if (logs_flushed)
|
||||
{
|
||||
if (!update_gtids(&error_msg))
|
||||
{
|
||||
demotion_error = true;
|
||||
PRINT_MXS_JSON_ERROR(error_out,
|
||||
"Failed to update gtid:s of %s during demotion: %s.",
|
||||
name(), error_msg.c_str());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
demotion_error = true;
|
||||
PRINT_MXS_JSON_ERROR(error_out,
|
||||
"Failed to flush binary logs of %s during demotion: %s.",
|
||||
name(), error_msg.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2d: Run demotion_sql_file if no errors so far.
|
||||
// Step 2c: Run demotion_sql_file if no errors so far.
|
||||
if (!demotion_error && !op.demotion_sql_file.empty())
|
||||
{
|
||||
bool file_ran_ok = run_sql_from_file(op.demotion_sql_file, error_out);
|
||||
@ -1637,19 +1623,48 @@ bool MariaDBServer::demote(ClusterOperation& op)
|
||||
}
|
||||
}
|
||||
|
||||
if (demotion_error)
|
||||
if (!demotion_error)
|
||||
{
|
||||
// Read_only was enabled but a later step failed. Disable read_only. Connection is
|
||||
// likely broken so use a short time limit.
|
||||
// TODO: add smarter undo
|
||||
set_read_only(ReadOnlySetting::DISABLE, Duration((double)0), NULL);
|
||||
// Step 2d: FLUSH LOGS to ensure that all events have been written to binlog.
|
||||
string error_msg;
|
||||
bool logs_flushed = execute_cmd_time_limit("FLUSH LOGS;", op.time_remaining, &error_msg);
|
||||
op.time_remaining -= timer.lap();
|
||||
if (!logs_flushed)
|
||||
{
|
||||
demotion_error = true;
|
||||
PRINT_MXS_JSON_ERROR(error_out,
|
||||
"Failed to flush binary logs of %s during demotion: %s.",
|
||||
name(), error_msg.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!demotion_error)
|
||||
{
|
||||
success = true;
|
||||
// Finally, update gtid:s.
|
||||
string error_msg;
|
||||
if (update_gtids(&error_msg))
|
||||
{
|
||||
success = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
demotion_error = true;
|
||||
PRINT_MXS_JSON_ERROR(error_out,
|
||||
"Failed to update gtid:s of %s during demotion: %s.",
|
||||
name(), error_msg.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (demotion_error && op.demotion_target_is_master)
|
||||
{
|
||||
// Read_only was enabled (or tried to be enabled) but a later step failed.
|
||||
// Disable read_only. Connection is likely broken so use a short time limit.
|
||||
// Even this is insufficient, because the server may still be executing the old
|
||||
// 'SET GLOBAL read_only=1' query.
|
||||
// TODO: add smarter undo, KILL QUERY etc.
|
||||
set_read_only(ReadOnlySetting::DISABLE, Duration((double)0), NULL);
|
||||
}
|
||||
}
|
||||
return success;
|
||||
|
@ -260,16 +260,17 @@ public:
|
||||
void warn_replication_settings() const;
|
||||
|
||||
/**
|
||||
* Wait until server catches up to the target gtid. Only considers gtid domains common to this server
|
||||
* and the target gtid. The gtid compared is the gtid_binlog_pos if this server has both log_bin and
|
||||
* log_slave_updates on, and gtid_current_pos otherwise.
|
||||
* Wait until server catches up to demotion target. Only considers gtid domains common
|
||||
* to this server and the target. The gtid compared to on the demotion target is 'gtid_binlog_pos'.
|
||||
* It is not updated during this method.
|
||||
*
|
||||
* @param target Which gtid must be reached
|
||||
* @param timeout Maximum wait time in seconds
|
||||
* @param err_out json object for error printing. Can be NULL.
|
||||
* @return True, if target gtid was reached within allotted time
|
||||
* The gtid used for comparison on this server is 'gtid_binlog_pos' if this server has both 'log_bin'
|
||||
* and 'log_slave_updates' on, and 'gtid_current_pos' otherwise. This server is updated during the
|
||||
* method.
|
||||
*
|
||||
* @return True, if target server gtid was reached within allotted time
|
||||
*/
|
||||
bool wait_until_gtid(const GtidList& target, int timeout, json_t** err_out);
|
||||
bool catchup_to_master(ClusterOperation& op);
|
||||
|
||||
/**
|
||||
* Find slave connection to the target server. If the IO thread is trying to connect
|
||||
|
Loading…
x
Reference in New Issue
Block a user