Continue separation of ClusterOperation elements

This commit is contained in:
Esa Korhonen
2018-10-18 19:21:22 +03:00
parent 90e6ff078a
commit 8877e7180b
4 changed files with 50 additions and 85 deletions

View File

@ -796,7 +796,7 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
// Switchover considered at least partially successful. // Switchover considered at least partially successful.
catchup_and_promote_success = true; catchup_and_promote_success = true;
rval = true; rval = true;
if (op.demotion_target_is_master) if (op.promotion->to_from_master)
{ {
// Force a master swap on next tick. // Force a master swap on next tick.
m_next_master = promotion_target; m_next_master = promotion_target;
@ -804,7 +804,7 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
// Step 4: Start replication on old master and redirect slaves. // Step 4: Start replication on old master and redirect slaves.
ServerArray redirected_to_promo_target; ServerArray redirected_to_promo_target;
if (demotion_target->copy_slave_conns(op, op.promotion_target_conns, promotion_target)) if (demotion_target->copy_slave_conns(op, op.demotion->conns_to_copy, promotion_target))
{ {
redirected_to_promo_target.push_back(demotion_target); redirected_to_promo_target.push_back(demotion_target);
} }
@ -825,7 +825,7 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
auto step6_duration = timer.lap(); auto step6_duration = timer.lap();
MXS_INFO("Switchover: slave replication confirmation took %.1f seconds with " MXS_INFO("Switchover: slave replication confirmation took %.1f seconds with "
"%.1f seconds to spare.", "%.1f seconds to spare.",
step6_duration.secs(), op.time_remaining.secs()); step6_duration.secs(), op.general.time_remaining.secs());
} }
} }
} }
@ -875,7 +875,7 @@ bool MariaDBMonitor::failover_perform(ClusterOperation& op)
// at least partially successful. // at least partially successful.
rval = true; rval = true;
m_cluster_modified = true; m_cluster_modified = true;
if (op.demotion_target_is_master) if (op.promotion->to_from_master)
{ {
// Force a master swap on next tick. // Force a master swap on next tick.
m_next_master = promotion_target; m_next_master = promotion_target;
@ -893,7 +893,7 @@ bool MariaDBMonitor::failover_perform(ClusterOperation& op)
wait_cluster_stabilization(op, redirected_slaves, promotion_target); wait_cluster_stabilization(op, redirected_slaves, promotion_target);
MXS_INFO("Failover: slave replication confirmation took %.1f seconds with " MXS_INFO("Failover: slave replication confirmation took %.1f seconds with "
"%.1f seconds to spare.", "%.1f seconds to spare.",
timer.lap().secs(), op.time_remaining.secs()); timer.lap().secs(), op.general.time_remaining.secs());
} }
} }
return rval; return rval;
@ -978,10 +978,10 @@ void MariaDBMonitor::wait_cluster_stabilization(ClusterOperation& op, const Serv
} }
} }
op.time_remaining -= timer.lap(); op.general.time_remaining -= timer.lap();
if (!unconfirmed.empty()) if (!unconfirmed.empty())
{ {
if (op.time_remaining.secs() > 0) if (op.general.time_remaining.secs() > 0)
{ {
double standard_sleep = 0.5; // In seconds. double standard_sleep = 0.5; // In seconds.
// If we have unconfirmed slaves and have time remaining, sleep a bit and try again. // If we have unconfirmed slaves and have time remaining, sleep a bit and try again.
@ -989,8 +989,8 @@ void MariaDBMonitor::wait_cluster_stabilization(ClusterOperation& op, const Serv
* all operations for failover/switchover are complete. The sleep is only required to * all operations for failover/switchover are complete. The sleep is only required to
* get correct messages to the user. Think about removing it, or shortening the maximum * get correct messages to the user. Think about removing it, or shortening the maximum
* time of this function. */ * time of this function. */
Duration sleep_time = (op.time_remaining.secs() > standard_sleep) ? Duration sleep_time = (op.general.time_remaining.secs() > standard_sleep) ?
Duration(standard_sleep) : op.time_remaining; Duration(standard_sleep) : op.general.time_remaining;
std::this_thread::sleep_for(sleep_time); std::this_thread::sleep_for(sleep_time);
} }
else else
@ -1021,7 +1021,7 @@ void MariaDBMonitor::wait_cluster_stabilization(ClusterOperation& op, const Serv
MXS_WARNING(MSG, fails, new_master->name(), repl_fails.size(), query_fails.size(), MXS_WARNING(MSG, fails, new_master->name(), repl_fails.size(), query_fails.size(),
unconfirmed.size(), new_master->name()); unconfirmed.size(), new_master->name());
} }
op.time_remaining -= timer.lap(); op.general.time_remaining -= timer.lap();
} }
/** /**
@ -1355,9 +1355,6 @@ unique_ptr<ClusterOperation> MariaDBMonitor::failover_prepare(Log log_mode, json
demotion_target->m_slave_status); demotion_target->m_slave_status);
rval.reset(new ClusterOperation(OperationType::FAILOVER, NULL, promotion_op, rval.reset(new ClusterOperation(OperationType::FAILOVER, NULL, promotion_op,
promotion_target, demotion_target, promotion_target, demotion_target,
promotion_target->m_slave_status, demotion_target->m_slave_status,
demotion_target == m_master, m_handle_event_scheduler,
m_promote_sql_file, m_demote_sql_file,
m_replication_user, m_replication_password, m_replication_user, m_replication_password,
error_out, time_limit)); error_out, time_limit));
} }
@ -1657,9 +1654,6 @@ unique_ptr<ClusterOperation> MariaDBMonitor::switchover_prepare(SERVER* promotio
demotion_target->m_slave_status); demotion_target->m_slave_status);
rval.reset(new ClusterOperation(op_type, demotion_op, promotion_op, rval.reset(new ClusterOperation(op_type, demotion_op, promotion_op,
promotion_target, demotion_target, promotion_target, demotion_target,
promotion_target->m_slave_status, demotion_target->m_slave_status,
demotion_target == m_master, m_handle_event_scheduler,
m_promote_sql_file, m_demote_sql_file,
m_replication_user, m_replication_password, m_replication_user, m_replication_password,
error_out, time_limit)); error_out, time_limit));
} }

View File

@ -507,7 +507,7 @@ bool MariaDBServer::catchup_to_master(ClusterOperation& op)
bool gtid_reached = false; bool gtid_reached = false;
bool error = false; bool error = false;
const GtidList& target = op.demotion_target->m_gtid_binlog_pos; const GtidList& target = op.demotion_target->m_gtid_binlog_pos;
json_t** error_out = op.error_out; json_t** error_out = op.general.error_out;
Duration sleep_time(0.2); // How long to sleep before next iteration. Incremented slowly. Duration sleep_time(0.2); // How long to sleep before next iteration. Incremented slowly.
StopWatch timer; StopWatch timer;
@ -525,11 +525,11 @@ bool MariaDBServer::catchup_to_master(ClusterOperation& op)
else else
{ {
// Query was successful but target gtid not yet reached. Check how much time left. // Query was successful but target gtid not yet reached. Check how much time left.
op.time_remaining -= timer.lap(); op.general.time_remaining -= timer.lap();
if (op.time_remaining.secs() > 0) if (op.general.time_remaining.secs() > 0)
{ {
// Sleep for a moment, then try again. // Sleep for a moment, then try again.
Duration this_sleep = MXS_MIN(sleep_time, op.time_remaining); Duration this_sleep = MXS_MIN(sleep_time, op.general.time_remaining);
std::this_thread::sleep_for(this_sleep); std::this_thread::sleep_for(this_sleep);
sleep_time += Duration(0.1); // Sleep a bit more next iteration. sleep_time += Duration(0.1); // Sleep a bit more next iteration.
} }
@ -1451,7 +1451,7 @@ bool MariaDBServer::reset_all_slave_conns(json_t** error_out)
bool MariaDBServer::promote(ClusterOperation& op) bool MariaDBServer::promote(ClusterOperation& op)
{ {
mxb_assert(op.type == OperationType::SWITCHOVER || op.type == OperationType::FAILOVER); mxb_assert(op.type == OperationType::SWITCHOVER || op.type == OperationType::FAILOVER);
json_t** const error_out = op.error_out; json_t** const error_out = op.general.error_out;
// Function should only be called for a master-slave pair. // Function should only be called for a master-slave pair.
auto master_conn = slave_connection_status(op.demotion_target); auto master_conn = slave_connection_status(op.demotion_target);
mxb_assert(master_conn); mxb_assert(master_conn);
@ -1484,22 +1484,22 @@ bool MariaDBServer::promote(ClusterOperation& op)
// Step 2: If demotion target is master, meaning this server will become the master, // Step 2: If demotion target is master, meaning this server will become the master,
// enable writing and scheduled events. Also, run promotion_sql_file. // enable writing and scheduled events. Also, run promotion_sql_file.
bool promotion_error = false; bool promotion_error = false;
if (op.demotion_target_is_master) if (op.demotion->to_from_master)
{ {
// Disabling read-only should be quick. // Disabling read-only should be quick.
bool ro_disabled = set_read_only(ReadOnlySetting::DISABLE, op.time_remaining, error_out); bool ro_disabled = set_read_only(ReadOnlySetting::DISABLE, op.general.time_remaining, error_out);
op.time_remaining -= timer.restart(); op.general.time_remaining -= timer.restart();
if (!ro_disabled) if (!ro_disabled)
{ {
promotion_error = true; promotion_error = true;
} }
else else
{ {
if (op.handle_events) if (op.promotion->handle_events)
{ {
// TODO: Add query replying to enable_events // TODO: Add query replying to enable_events
bool events_enabled = enable_events(error_out); bool events_enabled = enable_events(error_out);
op.time_remaining -= timer.restart(); op.general.time_remaining -= timer.restart();
if (!events_enabled) if (!events_enabled)
{ {
promotion_error = true; promotion_error = true;
@ -1508,16 +1508,17 @@ bool MariaDBServer::promote(ClusterOperation& op)
} }
// Run promotion_sql_file if no errors so far. // Run promotion_sql_file if no errors so far.
if (!promotion_error && !op.promotion_sql_file.empty()) const string& sql_file = op.promotion->sql_file;
if (!promotion_error && !sql_file.empty())
{ {
bool file_ran_ok = run_sql_from_file(op.promotion_sql_file, error_out); bool file_ran_ok = run_sql_from_file(sql_file, error_out);
op.time_remaining -= timer.restart(); op.general.time_remaining -= timer.restart();
if (!file_ran_ok) if (!file_ran_ok)
{ {
promotion_error = true; promotion_error = true;
PRINT_MXS_JSON_ERROR(error_out, PRINT_MXS_JSON_ERROR(error_out,
"Execution of file '%s' failed during promotion of server '%s'.", "Execution of file '%s' failed during promotion of server '%s'.",
op.promotion_sql_file.c_str(), name()); sql_file.c_str(), name());
} }
} }
} }
@ -1529,7 +1530,7 @@ bool MariaDBServer::promote(ClusterOperation& op)
{ {
if (op.type == OperationType::SWITCHOVER) if (op.type == OperationType::SWITCHOVER)
{ {
if (copy_slave_conns(op, op.demotion_target_conns, op.demotion_target)) if (copy_slave_conns(op, op.promotion->conns_to_copy, op.demotion_target))
{ {
success = true; success = true;
} }
@ -1542,7 +1543,7 @@ bool MariaDBServer::promote(ClusterOperation& op)
} }
else if (op.type == OperationType::FAILOVER) else if (op.type == OperationType::FAILOVER)
{ {
if (merge_slave_conns(op, op.demotion_target_conns)) if (merge_slave_conns(op, op.promotion->conns_to_copy))
{ {
success = true; success = true;
} }
@ -1576,7 +1577,7 @@ bool MariaDBServer::demote(ServerOperation& demo_op, GeneralOpData& general)
// likely part to fail, setting read_only=1, first to make undoing easier. Setting // likely part to fail, setting read_only=1, first to make undoing easier. Setting
// read_only may fail if another session has table locks or is doing long writes. // read_only may fail if another session has table locks or is doing long writes.
bool demotion_error = false; bool demotion_error = false;
if (demo_op.was_is_master) if (demo_op.to_from_master)
{ {
mxb_assert(is_master()); mxb_assert(is_master());
StopWatch timer; StopWatch timer;
@ -1605,16 +1606,17 @@ bool MariaDBServer::demote(ServerOperation& demo_op, GeneralOpData& general)
} }
// Step 2c: Run demotion_sql_file if no errors so far. // Step 2c: Run demotion_sql_file if no errors so far.
if (!demotion_error && !demo_op.sql_file.empty()) const string& sql_file = demo_op.sql_file;
if (!demotion_error && !sql_file.empty())
{ {
bool file_ran_ok = run_sql_from_file(demo_op.sql_file, error_out); bool file_ran_ok = run_sql_from_file(sql_file, error_out);
general.time_remaining -= timer.lap(); general.time_remaining -= timer.lap();
if (!file_ran_ok) if (!file_ran_ok)
{ {
demotion_error = true; demotion_error = true;
PRINT_MXS_JSON_ERROR(error_out, PRINT_MXS_JSON_ERROR(error_out,
"Execution of file '%s' failed during demotion of server %s.", "Execution of file '%s' failed during demotion of server %s.",
demo_op.sql_file.c_str(), name()); sql_file.c_str(), name());
} }
} }
@ -1653,7 +1655,7 @@ bool MariaDBServer::demote(ServerOperation& demo_op, GeneralOpData& general)
} }
} }
if (demotion_error && demo_op.was_is_master) if (demotion_error && demo_op.to_from_master)
{ {
// Read_only was enabled (or tried to be enabled) but a later step failed. // Read_only was enabled (or tried to be enabled) but a later step failed.
// Disable read_only. Connection is likely broken so use a short time limit. // Disable read_only. Connection is likely broken so use a short time limit.
@ -2018,13 +2020,13 @@ bool MariaDBServer::create_start_slave(ClusterOperation& op, const SlaveStatus&
SlaveStatus new_conn = slave_conn; SlaveStatus new_conn = slave_conn;
new_conn.owning_server = name(); new_conn.owning_server = name();
string change_master = generate_change_master_cmd(op, new_conn); string change_master = generate_change_master_cmd(op, new_conn);
bool conn_created = execute_cmd_time_limit(change_master, op.time_remaining, &error_msg); bool conn_created = execute_cmd_time_limit(change_master, op.general.time_remaining, &error_msg);
op.time_remaining -= timer.restart(); op.general.time_remaining -= timer.restart();
if (conn_created) if (conn_created)
{ {
string start_slave = string_printf("START SLAVE '%s';", new_conn.name.c_str()); string start_slave = string_printf("START SLAVE '%s';", new_conn.name.c_str());
bool slave_started = execute_cmd_time_limit(start_slave, op.time_remaining, &error_msg); bool slave_started = execute_cmd_time_limit(start_slave, op.general.time_remaining, &error_msg);
op.time_remaining -= timer.restart(); op.general.time_remaining -= timer.restart();
if (slave_started) if (slave_started)
{ {
success = true; success = true;
@ -2059,14 +2061,14 @@ string MariaDBServer::generate_change_master_cmd(ClusterOperation& op, const Sla
slave_conn.name.c_str(), slave_conn.name.c_str(),
slave_conn.master_host.c_str(), slave_conn.master_port); slave_conn.master_host.c_str(), slave_conn.master_port);
change_cmd += "MASTER_USE_GTID = current_pos, "; change_cmd += "MASTER_USE_GTID = current_pos, ";
change_cmd += string_printf("MASTER_USER = '%s', ", op.replication_user.c_str()); change_cmd += string_printf("MASTER_USER = '%s', ", op.general.replication_user.c_str());
const char MASTER_PW[] = "MASTER_PASSWORD = '%s';"; const char MASTER_PW[] = "MASTER_PASSWORD = '%s';";
#if defined (SS_DEBUG) #if defined (SS_DEBUG)
string change_cmd_nopw = change_cmd; string change_cmd_nopw = change_cmd;
change_cmd_nopw += string_printf(MASTER_PW, "******"); change_cmd_nopw += string_printf(MASTER_PW, "******");
MXS_DEBUG("Change master command is '%s'.", change_cmd_nopw.c_str()); MXS_DEBUG("Change master command is '%s'.", change_cmd_nopw.c_str());
#endif #endif
change_cmd += string_printf(MASTER_PW, op.replication_password.c_str()); change_cmd += string_printf(MASTER_PW, op.general.replication_password.c_str());
return change_cmd; return change_cmd;
} }
@ -2079,8 +2081,9 @@ bool MariaDBServer::redirect_existing_slave_conn(ClusterOperation& op, const Mar
bool success = false; bool success = false;
// First, just stop the slave connection. // First, just stop the slave connection.
bool stopped = stop_slave_conn(old_conn->name, StopMode::STOP_ONLY, op.time_remaining, op.error_out); bool stopped = stop_slave_conn(old_conn->name, StopMode::STOP_ONLY, op.general.time_remaining,
op.time_remaining -= timer.restart(); op.general.error_out);
op.general.time_remaining -= timer.restart();
if (stopped) if (stopped)
{ {
SlaveStatus modified_conn = *old_conn; SlaveStatus modified_conn = *old_conn;
@ -2089,20 +2092,20 @@ bool MariaDBServer::redirect_existing_slave_conn(ClusterOperation& op, const Mar
modified_conn.master_port = target_server->port; modified_conn.master_port = target_server->port;
string change_master = generate_change_master_cmd(op, modified_conn); string change_master = generate_change_master_cmd(op, modified_conn);
string error_msg; string error_msg;
bool changed = execute_cmd_time_limit(change_master, op.time_remaining, &error_msg); bool changed = execute_cmd_time_limit(change_master, op.general.time_remaining, &error_msg);
op.time_remaining -= timer.restart(); op.general.time_remaining -= timer.restart();
if (changed) if (changed)
{ {
string start = string_printf("START SLAVE '%s';", old_conn->name.c_str()); string start = string_printf("START SLAVE '%s';", old_conn->name.c_str());
bool started = execute_cmd_time_limit(start, op.time_remaining, &error_msg); bool started = execute_cmd_time_limit(start, op.general.time_remaining, &error_msg);
op.time_remaining -= timer.restart(); op.general.time_remaining -= timer.restart();
if (started) if (started)
{ {
success = true; success = true;
} }
else else
{ {
PRINT_MXS_JSON_ERROR(op.error_out, PRINT_MXS_JSON_ERROR(op.general.error_out,
"%s could not be started: %s", "%s could not be started: %s",
modified_conn.to_short_string().c_str(), error_msg.c_str()); modified_conn.to_short_string().c_str(), error_msg.c_str());
} }
@ -2110,7 +2113,7 @@ bool MariaDBServer::redirect_existing_slave_conn(ClusterOperation& op, const Mar
else else
{ {
// TODO: This may currently print out passwords. // TODO: This may currently print out passwords.
PRINT_MXS_JSON_ERROR(op.error_out, PRINT_MXS_JSON_ERROR(op.general.error_out,
"%s could not be redirected to [%s]:%i: %s", "%s could not be redirected to [%s]:%i: %s",
old_conn->to_short_string().c_str(), old_conn->to_short_string().c_str(),
modified_conn.master_host.c_str(), modified_conn.master_port, modified_conn.master_host.c_str(), modified_conn.master_port,

View File

@ -157,10 +157,6 @@ bool SlaveStatus::should_be_copied(string* ignore_reason_out) const
ClusterOperation::ClusterOperation(OperationType type, ServerOperation* dem_op, ServerOperation* prom_op, ClusterOperation::ClusterOperation(OperationType type, ServerOperation* dem_op, ServerOperation* prom_op,
MariaDBServer* promotion_target, MariaDBServer* demotion_target, MariaDBServer* promotion_target, MariaDBServer* demotion_target,
const SlaveStatusArray& promo_target_conns,
const SlaveStatusArray& demo_target_conns,
bool demo_target_is_master, bool handle_events,
string& promotion_sql_file, string& demotion_sql_file,
string& replication_user, string& replication_password, string& replication_user, string& replication_password,
json_t** error, maxbase::Duration time_remaining) json_t** error, maxbase::Duration time_remaining)
: type(type) : type(type)
@ -169,16 +165,6 @@ ClusterOperation::ClusterOperation(OperationType type, ServerOperation* dem_op,
, general(type, replication_user, replication_password, error, time_remaining) , general(type, replication_user, replication_password, error, time_remaining)
, promotion_target(promotion_target) , promotion_target(promotion_target)
, demotion_target(demotion_target) , demotion_target(demotion_target)
, demotion_target_is_master(demo_target_is_master)
, handle_events(handle_events)
, promotion_sql_file(promotion_sql_file)
, demotion_sql_file(demotion_sql_file)
, replication_user(replication_user)
, replication_password(replication_password)
, error_out(error)
, time_remaining(time_remaining)
, demotion_target_conns(demo_target_conns)
, promotion_target_conns(promo_target_conns)
{ {
} }
@ -192,7 +178,7 @@ ServerOperation::ServerOperation(MariaDBServer* target, bool was_is_master,
bool handle_events, const std::string& sql_file, bool handle_events, const std::string& sql_file,
const SlaveStatusArray& conns_to_copy) const SlaveStatusArray& conns_to_copy)
: target(target) : target(target)
, was_is_master(was_is_master) , to_from_master(was_is_master)
, handle_events(handle_events) , handle_events(handle_events)
, sql_file(sql_file) , sql_file(sql_file)
, conns_to_copy(conns_to_copy) , conns_to_copy(conns_to_copy)

View File

@ -251,27 +251,9 @@ public:
MariaDBServer* const promotion_target; // Which server will be promoted MariaDBServer* const promotion_target; // Which server will be promoted
MariaDBServer* const demotion_target; // Which server will be demoted MariaDBServer* const demotion_target; // Which server will be demoted
const bool demotion_target_is_master; // Was the demotion target the master?
const bool handle_events; // Should scheduled server events be disabled/enabled?
const std::string promotion_sql_file; // SQL commands ran on a server promoted to master
const std::string demotion_sql_file; // SQL commands ran on a server demoted from master
const std::string replication_user; // User for CHANGE MASTER TO ...
const std::string replication_password; // Password for CHANGE MASTER TO ...
json_t** const error_out; // Json error output
maxbase::Duration time_remaining; // How much time remains to complete the operation
/* Slave connections of the demotion target. Saved here in case the data in the server object is
* modified before promoted server has copied the connections. */
SlaveStatusArray demotion_target_conns;
/* Similar copy for promotion target connections. */
SlaveStatusArray promotion_target_conns;
ClusterOperation(OperationType type, ServerOperation* dem_op, ServerOperation* prom_op, ClusterOperation(OperationType type, ServerOperation* dem_op, ServerOperation* prom_op,
MariaDBServer* promotion_target, MariaDBServer* demotion_target, MariaDBServer* promotion_target, MariaDBServer* demotion_target,
const SlaveStatusArray& promo_target_conns, const SlaveStatusArray& demo_target_conns,
bool demo_target_is_master, bool handle_events,
std::string& promotion_sql_file, std::string& demotion_sql_file,
std::string& replication_user, std::string& replication_password, std::string& replication_user, std::string& replication_password,
json_t** error, maxbase::Duration time_remaining); json_t** error, maxbase::Duration time_remaining);
~ClusterOperation(); ~ClusterOperation();
@ -282,7 +264,7 @@ class ServerOperation
{ {
public: public:
MariaDBServer* const target; // Target server MariaDBServer* const target; // Target server
const bool was_is_master; // Was the target a master / should it become one const bool to_from_master; // Was the target a master / should it become one
const bool handle_events; // Should scheduled server events be disabled/enabled? const bool handle_events; // Should scheduled server events be disabled/enabled?
const std::string sql_file; // Path to file with SQL commands to run during op const std::string sql_file; // Path to file with SQL commands to run during op
const SlaveStatusArray conns_to_copy; // Slave connections the target should copy/merge const SlaveStatusArray conns_to_copy; // Slave connections the target should copy/merge