Divide ClusterOperation to two types
The main class was getting unwieldly and too general. Dividing the fields helps adding support for other operation types. This commit leaves most data duplicated, later commits clean up the affected code.
This commit is contained in:
parent
09c5e295d0
commit
90e6ff078a
@ -771,14 +771,14 @@ bool MariaDBMonitor::server_is_rejoin_suspect(MariaDBServer* rejoin_cand, json_t
|
||||
*/
|
||||
bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
|
||||
{
|
||||
MariaDBServer* const promotion_target = op.promotion_target;
|
||||
MariaDBServer* const demotion_target = op.demotion_target;
|
||||
json_t** const error_out = op.error_out;
|
||||
mxb_assert(promotion_target && demotion_target);
|
||||
mxb_assert(op.demotion && op.promotion);
|
||||
MariaDBServer* const promotion_target = op.promotion->target;
|
||||
MariaDBServer* const demotion_target = op.demotion->target;
|
||||
json_t** const error_out = op.general.error_out;
|
||||
|
||||
bool rval = false;
|
||||
// Step 1: Set read-only to on, flush logs, update gtid:s.
|
||||
if (demotion_target->demote(op))
|
||||
if (demotion_target->demote(*op.demotion, op.general))
|
||||
{
|
||||
m_cluster_modified = true;
|
||||
bool catchup_and_promote_success = false;
|
||||
@ -1349,7 +1349,11 @@ unique_ptr<ClusterOperation> MariaDBMonitor::failover_prepare(Log log_mode, json
|
||||
{
|
||||
// The Duration ctor taking a double interprets is as seconds.
|
||||
auto time_limit = maxbase::Duration((double)m_failover_timeout);
|
||||
rval.reset(new ClusterOperation(OperationType::FAILOVER,
|
||||
bool promoting_to_master = (demotion_target == m_master);
|
||||
ServerOperation* promotion_op = new ServerOperation(promotion_target, promoting_to_master,
|
||||
m_handle_event_scheduler, m_promote_sql_file,
|
||||
demotion_target->m_slave_status);
|
||||
rval.reset(new ClusterOperation(OperationType::FAILOVER, NULL, promotion_op,
|
||||
promotion_target, demotion_target,
|
||||
promotion_target->m_slave_status, demotion_target->m_slave_status,
|
||||
demotion_target == m_master, m_handle_event_scheduler,
|
||||
@ -1644,7 +1648,14 @@ unique_ptr<ClusterOperation> MariaDBMonitor::switchover_prepare(SERVER* promotio
|
||||
if (promotion_target && demotion_target && gtid_ok)
|
||||
{
|
||||
maxbase::Duration time_limit((double)m_switchover_timeout);
|
||||
rval.reset(new ClusterOperation(op_type,
|
||||
bool master_swap = demotion_target->is_master();
|
||||
ServerOperation* demotion_op = new ServerOperation(demotion_target, master_swap,
|
||||
m_handle_event_scheduler, m_demote_sql_file,
|
||||
promotion_target->m_slave_status);
|
||||
ServerOperation* promotion_op = new ServerOperation(promotion_target, master_swap,
|
||||
m_handle_event_scheduler, m_promote_sql_file,
|
||||
demotion_target->m_slave_status);
|
||||
rval.reset(new ClusterOperation(op_type, demotion_op, promotion_op,
|
||||
promotion_target, demotion_target,
|
||||
promotion_target->m_slave_status, demotion_target->m_slave_status,
|
||||
demotion_target == m_master, m_handle_event_scheduler,
|
||||
|
@ -1471,12 +1471,12 @@ bool MariaDBServer::promote(ClusterOperation& op)
|
||||
bool stopped = false;
|
||||
if (op.type == OperationType::SWITCHOVER)
|
||||
{
|
||||
stopped = remove_slave_conns(op, m_slave_status);
|
||||
stopped = remove_slave_conns(op.general, m_slave_status);
|
||||
}
|
||||
else if (op.type == OperationType::FAILOVER)
|
||||
{
|
||||
stopped = remove_slave_conns(op, {*master_conn});
|
||||
master_conn = NULL; // The connection pointed to may no longer exist.
|
||||
stopped = remove_slave_conns(op.general, {*master_conn});
|
||||
master_conn = NULL; // The connection pointed to may no longer exist.
|
||||
}
|
||||
|
||||
if (stopped)
|
||||
@ -1558,15 +1558,15 @@ bool MariaDBServer::promote(ClusterOperation& op)
|
||||
return success;
|
||||
}
|
||||
|
||||
bool MariaDBServer::demote(ClusterOperation& op)
|
||||
bool MariaDBServer::demote(ServerOperation& demo_op, GeneralOpData& general)
|
||||
{
|
||||
mxb_assert(op.type == OperationType::SWITCHOVER && op.demotion_target == this);
|
||||
json_t** error_out = op.error_out;
|
||||
mxb_assert(demo_op.target == this);
|
||||
json_t** const error_out = general.error_out;
|
||||
bool success = false;
|
||||
|
||||
// Step 1: Stop & reset slave connections. The promotion target will copy them. The connection
|
||||
// information has been backed up in the operation object.
|
||||
if (remove_slave_conns(op, m_slave_status))
|
||||
if (remove_slave_conns(general, m_slave_status))
|
||||
{
|
||||
// Step 2: If this server is master, disable writes and scheduled events, flush logs,
|
||||
// update gtid:s, run demotion_sql_file.
|
||||
@ -1576,27 +1576,27 @@ bool MariaDBServer::demote(ClusterOperation& op)
|
||||
// likely part to fail, setting read_only=1, first to make undoing easier. Setting
|
||||
// read_only may fail if another session has table locks or is doing long writes.
|
||||
bool demotion_error = false;
|
||||
if (op.demotion_target_is_master)
|
||||
if (demo_op.was_is_master)
|
||||
{
|
||||
mxb_assert(is_master());
|
||||
StopWatch timer;
|
||||
// Step 2a: Enabling read-only can take time if writes are on or table locks taken.
|
||||
// TODO: use max_statement_time to be safe!
|
||||
bool ro_enabled = set_read_only(ReadOnlySetting::ENABLE, op.time_remaining, error_out);
|
||||
op.time_remaining -= timer.lap();
|
||||
bool ro_enabled = set_read_only(ReadOnlySetting::ENABLE, general.time_remaining, error_out);
|
||||
general.time_remaining -= timer.lap();
|
||||
if (!ro_enabled)
|
||||
{
|
||||
demotion_error = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (op.handle_events)
|
||||
if (demo_op.handle_events)
|
||||
{
|
||||
// TODO: Add query replying to enable_events
|
||||
// Step 2b: Using BINLOG_OFF to avoid adding any gtid events,
|
||||
// which could break external replication.
|
||||
bool events_disabled = disable_events(BinlogMode::BINLOG_OFF, error_out);
|
||||
op.time_remaining -= timer.lap();
|
||||
general.time_remaining -= timer.lap();
|
||||
if (!events_disabled)
|
||||
{
|
||||
demotion_error = true;
|
||||
@ -1605,16 +1605,16 @@ bool MariaDBServer::demote(ClusterOperation& op)
|
||||
}
|
||||
|
||||
// Step 2c: Run demotion_sql_file if no errors so far.
|
||||
if (!demotion_error && !op.demotion_sql_file.empty())
|
||||
if (!demotion_error && !demo_op.sql_file.empty())
|
||||
{
|
||||
bool file_ran_ok = run_sql_from_file(op.demotion_sql_file, error_out);
|
||||
op.time_remaining -= timer.lap();
|
||||
bool file_ran_ok = run_sql_from_file(demo_op.sql_file, error_out);
|
||||
general.time_remaining -= timer.lap();
|
||||
if (!file_ran_ok)
|
||||
{
|
||||
demotion_error = true;
|
||||
PRINT_MXS_JSON_ERROR(error_out,
|
||||
"Execution of file '%s' failed during demotion of server %s.",
|
||||
op.demotion_sql_file.c_str(), name());
|
||||
demo_op.sql_file.c_str(), name());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1622,8 +1622,9 @@ bool MariaDBServer::demote(ClusterOperation& op)
|
||||
{
|
||||
// Step 2d: FLUSH LOGS to ensure that all events have been written to binlog.
|
||||
string error_msg;
|
||||
bool logs_flushed = execute_cmd_time_limit("FLUSH LOGS;", op.time_remaining, &error_msg);
|
||||
op.time_remaining -= timer.lap();
|
||||
bool logs_flushed = execute_cmd_time_limit("FLUSH LOGS;", general.time_remaining,
|
||||
&error_msg);
|
||||
general.time_remaining -= timer.lap();
|
||||
if (!logs_flushed)
|
||||
{
|
||||
demotion_error = true;
|
||||
@ -1652,7 +1653,7 @@ bool MariaDBServer::demote(ClusterOperation& op)
|
||||
}
|
||||
}
|
||||
|
||||
if (demotion_error && op.demotion_target_is_master)
|
||||
if (demotion_error && demo_op.was_is_master)
|
||||
{
|
||||
// Read_only was enabled (or tried to be enabled) but a later step failed.
|
||||
// Disable read_only. Connection is likely broken so use a short time limit.
|
||||
@ -1730,11 +1731,12 @@ bool MariaDBServer::stop_slave_conn(const std::string& conn_name, StopMode mode,
|
||||
*
|
||||
* @param op Operation descriptor
|
||||
* @param conns_to_remove Which connections should be removed
|
||||
* @return True if succesfull
|
||||
* @return True if successful
|
||||
*/
|
||||
bool MariaDBServer::remove_slave_conns(ClusterOperation& op, const SlaveStatusArray& conns_to_remove)
|
||||
bool MariaDBServer::remove_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_remove)
|
||||
{
|
||||
json_t** error_out = op.error_out;
|
||||
maxbase::Duration& time_remaining = op.time_remaining;
|
||||
StopWatch timer;
|
||||
// Take a backup of the soon to be removed connections so they can be compared properly after an update.
|
||||
SlaveStatusArray conns_to_remove_copy = conns_to_remove;
|
||||
@ -1742,11 +1744,11 @@ bool MariaDBServer::remove_slave_conns(ClusterOperation& op, const SlaveStatusAr
|
||||
bool stop_slave_error = false;
|
||||
for (size_t i = 0; !stop_slave_error && i < conns_to_remove.size(); i++)
|
||||
{
|
||||
if (!stop_slave_conn(conns_to_remove[i].name, StopMode::RESET_ALL, op.time_remaining, error_out))
|
||||
if (!stop_slave_conn(conns_to_remove[i].name, StopMode::RESET_ALL, time_remaining, error_out))
|
||||
{
|
||||
stop_slave_error = true;
|
||||
}
|
||||
op.time_remaining -= timer.lap();
|
||||
time_remaining -= timer.lap();
|
||||
}
|
||||
|
||||
bool success = false;
|
||||
@ -1797,7 +1799,7 @@ bool MariaDBServer::remove_slave_conns(ClusterOperation& op, const SlaveStatusAr
|
||||
name(), error_msg.c_str());
|
||||
}
|
||||
}
|
||||
op.time_remaining -= timer.lap();
|
||||
time_remaining -= timer.lap();
|
||||
return success;
|
||||
}
|
||||
|
||||
|
@ -365,7 +365,7 @@ public:
|
||||
* @param op Cluster operation descriptor
|
||||
* @return True if successful
|
||||
*/
|
||||
bool demote(ClusterOperation& op);
|
||||
bool demote(ServerOperation& op, GeneralOpData& general);
|
||||
|
||||
/**
|
||||
* Redirect the slave connection going to old master to replicate from new master.
|
||||
@ -535,7 +535,7 @@ private:
|
||||
bool stop_slave_conn(const std::string& conn_name, StopMode mode, maxbase::Duration time_limit,
|
||||
json_t** error_out);
|
||||
|
||||
bool remove_slave_conns(ClusterOperation& op, const SlaveStatusArray& conns_to_remove);
|
||||
bool remove_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_remove);
|
||||
bool execute_cmd_ex(const std::string& cmd, QueryRetryMode mode,
|
||||
std::string* errmsg_out = NULL, unsigned int* errno_out = NULL);
|
||||
|
||||
|
@ -155,7 +155,7 @@ bool SlaveStatus::should_be_copied(string* ignore_reason_out) const
|
||||
return accepted;
|
||||
}
|
||||
|
||||
ClusterOperation::ClusterOperation(OperationType type,
|
||||
ClusterOperation::ClusterOperation(OperationType type, ServerOperation* dem_op, ServerOperation* prom_op,
|
||||
MariaDBServer* promotion_target, MariaDBServer* demotion_target,
|
||||
const SlaveStatusArray& promo_target_conns,
|
||||
const SlaveStatusArray& demo_target_conns,
|
||||
@ -164,6 +164,9 @@ ClusterOperation::ClusterOperation(OperationType type,
|
||||
string& replication_user, string& replication_password,
|
||||
json_t** error, maxbase::Duration time_remaining)
|
||||
: type(type)
|
||||
, demotion(dem_op)
|
||||
, promotion(prom_op)
|
||||
, general(type, replication_user, replication_password, error, time_remaining)
|
||||
, promotion_target(promotion_target)
|
||||
, demotion_target(demotion_target)
|
||||
, demotion_target_is_master(demo_target_is_master)
|
||||
@ -179,6 +182,33 @@ ClusterOperation::ClusterOperation(OperationType type,
|
||||
{
|
||||
}
|
||||
|
||||
ClusterOperation::~ClusterOperation()
|
||||
{
|
||||
delete demotion;
|
||||
delete promotion;
|
||||
}
|
||||
|
||||
ServerOperation::ServerOperation(MariaDBServer* target, bool was_is_master,
|
||||
bool handle_events, const std::string& sql_file,
|
||||
const SlaveStatusArray& conns_to_copy)
|
||||
: target(target)
|
||||
, was_is_master(was_is_master)
|
||||
, handle_events(handle_events)
|
||||
, sql_file(sql_file)
|
||||
, conns_to_copy(conns_to_copy)
|
||||
{
|
||||
}
|
||||
|
||||
GeneralOpData::GeneralOpData(OperationType type, const string& replication_user, const string& replication_password,
|
||||
json_t** error, maxbase::Duration time_remaining)
|
||||
: type(type)
|
||||
, replication_user(replication_user)
|
||||
, replication_password(replication_password)
|
||||
, error_out(error)
|
||||
, time_remaining(time_remaining)
|
||||
{
|
||||
}
|
||||
|
||||
GtidList GtidList::from_string(const string& gtid_string)
|
||||
{
|
||||
mxb_assert(gtid_string.size());
|
||||
|
@ -217,6 +217,21 @@ enum class OperationType
|
||||
FAILOVER
|
||||
};
|
||||
|
||||
class ServerOperation;
|
||||
class GeneralOpData
|
||||
{
|
||||
public:
|
||||
const OperationType type;
|
||||
const std::string replication_user; // User for CHANGE MASTER TO ...
|
||||
const std::string replication_password; // Password for CHANGE MASTER TO ...
|
||||
json_t** const error_out; // Json error output
|
||||
maxbase::Duration time_remaining; // How much time remains to complete the operation
|
||||
|
||||
GeneralOpData(OperationType type,
|
||||
const std::string& replication_user, const std::string& replication_password,
|
||||
json_t** error, maxbase::Duration time_remaining);
|
||||
};
|
||||
|
||||
/**
|
||||
* Class which encapsulates many settings and status descriptors for a failover/switchover.
|
||||
* Is more convenient to pass around than the separate elements. Most fields are constants or constant
|
||||
@ -229,7 +244,11 @@ private:
|
||||
ClusterOperation& operator=(const ClusterOperation&) = delete;
|
||||
|
||||
public:
|
||||
const OperationType type; // Failover or switchover
|
||||
const OperationType type; // Failover or switchover
|
||||
ServerOperation* const demotion; // Required by MariaDBServer->demote()
|
||||
ServerOperation* const promotion; // Required by MariaDBServer->promote()
|
||||
GeneralOpData general; // General operation data
|
||||
|
||||
MariaDBServer* const promotion_target; // Which server will be promoted
|
||||
MariaDBServer* const demotion_target; // Which server will be demoted
|
||||
const bool demotion_target_is_master; // Was the demotion target the master?
|
||||
@ -248,13 +267,28 @@ public:
|
||||
/* Similar copy for promotion target connections. */
|
||||
SlaveStatusArray promotion_target_conns;
|
||||
|
||||
ClusterOperation(OperationType type,
|
||||
ClusterOperation(OperationType type, ServerOperation* dem_op, ServerOperation* prom_op,
|
||||
MariaDBServer* promotion_target, MariaDBServer* demotion_target,
|
||||
const SlaveStatusArray& promo_target_conns, const SlaveStatusArray& demo_target_conns,
|
||||
bool demo_target_is_master, bool handle_events,
|
||||
std::string& promotion_sql_file, std::string& demotion_sql_file,
|
||||
std::string& replication_user, std::string& replication_password,
|
||||
json_t** error, maxbase::Duration time_remaining);
|
||||
~ClusterOperation();
|
||||
};
|
||||
|
||||
// Operation data which concerns a single server
|
||||
class ServerOperation
|
||||
{
|
||||
public:
|
||||
MariaDBServer* const target; // Target server
|
||||
const bool was_is_master; // Was the target a master / should it become one
|
||||
const bool handle_events; // Should scheduled server events be disabled/enabled?
|
||||
const std::string sql_file; // Path to file with SQL commands to run during op
|
||||
const SlaveStatusArray conns_to_copy; // Slave connections the target should copy/merge
|
||||
|
||||
ServerOperation(MariaDBServer* target, bool was_is_master, bool handle_events,
|
||||
const std::string& sql_file, const SlaveStatusArray& conns_to_copy);
|
||||
};
|
||||
|
||||
/**
|
||||
|
Loading…
x
Reference in New Issue
Block a user