Divide ClusterOperation to two types
The main class was getting unwieldly and too general. Dividing the fields helps adding support for other operation types. This commit leaves most data duplicated, later commits clean up the affected code.
This commit is contained in:
@ -771,14 +771,14 @@ bool MariaDBMonitor::server_is_rejoin_suspect(MariaDBServer* rejoin_cand, json_t
|
|||||||
*/
|
*/
|
||||||
bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
|
bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
|
||||||
{
|
{
|
||||||
MariaDBServer* const promotion_target = op.promotion_target;
|
mxb_assert(op.demotion && op.promotion);
|
||||||
MariaDBServer* const demotion_target = op.demotion_target;
|
MariaDBServer* const promotion_target = op.promotion->target;
|
||||||
json_t** const error_out = op.error_out;
|
MariaDBServer* const demotion_target = op.demotion->target;
|
||||||
mxb_assert(promotion_target && demotion_target);
|
json_t** const error_out = op.general.error_out;
|
||||||
|
|
||||||
bool rval = false;
|
bool rval = false;
|
||||||
// Step 1: Set read-only to on, flush logs, update gtid:s.
|
// Step 1: Set read-only to on, flush logs, update gtid:s.
|
||||||
if (demotion_target->demote(op))
|
if (demotion_target->demote(*op.demotion, op.general))
|
||||||
{
|
{
|
||||||
m_cluster_modified = true;
|
m_cluster_modified = true;
|
||||||
bool catchup_and_promote_success = false;
|
bool catchup_and_promote_success = false;
|
||||||
@ -1349,7 +1349,11 @@ unique_ptr<ClusterOperation> MariaDBMonitor::failover_prepare(Log log_mode, json
|
|||||||
{
|
{
|
||||||
// The Duration ctor taking a double interprets is as seconds.
|
// The Duration ctor taking a double interprets is as seconds.
|
||||||
auto time_limit = maxbase::Duration((double)m_failover_timeout);
|
auto time_limit = maxbase::Duration((double)m_failover_timeout);
|
||||||
rval.reset(new ClusterOperation(OperationType::FAILOVER,
|
bool promoting_to_master = (demotion_target == m_master);
|
||||||
|
ServerOperation* promotion_op = new ServerOperation(promotion_target, promoting_to_master,
|
||||||
|
m_handle_event_scheduler, m_promote_sql_file,
|
||||||
|
demotion_target->m_slave_status);
|
||||||
|
rval.reset(new ClusterOperation(OperationType::FAILOVER, NULL, promotion_op,
|
||||||
promotion_target, demotion_target,
|
promotion_target, demotion_target,
|
||||||
promotion_target->m_slave_status, demotion_target->m_slave_status,
|
promotion_target->m_slave_status, demotion_target->m_slave_status,
|
||||||
demotion_target == m_master, m_handle_event_scheduler,
|
demotion_target == m_master, m_handle_event_scheduler,
|
||||||
@ -1644,7 +1648,14 @@ unique_ptr<ClusterOperation> MariaDBMonitor::switchover_prepare(SERVER* promotio
|
|||||||
if (promotion_target && demotion_target && gtid_ok)
|
if (promotion_target && demotion_target && gtid_ok)
|
||||||
{
|
{
|
||||||
maxbase::Duration time_limit((double)m_switchover_timeout);
|
maxbase::Duration time_limit((double)m_switchover_timeout);
|
||||||
rval.reset(new ClusterOperation(op_type,
|
bool master_swap = demotion_target->is_master();
|
||||||
|
ServerOperation* demotion_op = new ServerOperation(demotion_target, master_swap,
|
||||||
|
m_handle_event_scheduler, m_demote_sql_file,
|
||||||
|
promotion_target->m_slave_status);
|
||||||
|
ServerOperation* promotion_op = new ServerOperation(promotion_target, master_swap,
|
||||||
|
m_handle_event_scheduler, m_promote_sql_file,
|
||||||
|
demotion_target->m_slave_status);
|
||||||
|
rval.reset(new ClusterOperation(op_type, demotion_op, promotion_op,
|
||||||
promotion_target, demotion_target,
|
promotion_target, demotion_target,
|
||||||
promotion_target->m_slave_status, demotion_target->m_slave_status,
|
promotion_target->m_slave_status, demotion_target->m_slave_status,
|
||||||
demotion_target == m_master, m_handle_event_scheduler,
|
demotion_target == m_master, m_handle_event_scheduler,
|
||||||
|
|||||||
@ -1471,12 +1471,12 @@ bool MariaDBServer::promote(ClusterOperation& op)
|
|||||||
bool stopped = false;
|
bool stopped = false;
|
||||||
if (op.type == OperationType::SWITCHOVER)
|
if (op.type == OperationType::SWITCHOVER)
|
||||||
{
|
{
|
||||||
stopped = remove_slave_conns(op, m_slave_status);
|
stopped = remove_slave_conns(op.general, m_slave_status);
|
||||||
}
|
}
|
||||||
else if (op.type == OperationType::FAILOVER)
|
else if (op.type == OperationType::FAILOVER)
|
||||||
{
|
{
|
||||||
stopped = remove_slave_conns(op, {*master_conn});
|
stopped = remove_slave_conns(op.general, {*master_conn});
|
||||||
master_conn = NULL; // The connection pointed to may no longer exist.
|
master_conn = NULL; // The connection pointed to may no longer exist.
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stopped)
|
if (stopped)
|
||||||
@ -1558,15 +1558,15 @@ bool MariaDBServer::promote(ClusterOperation& op)
|
|||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MariaDBServer::demote(ClusterOperation& op)
|
bool MariaDBServer::demote(ServerOperation& demo_op, GeneralOpData& general)
|
||||||
{
|
{
|
||||||
mxb_assert(op.type == OperationType::SWITCHOVER && op.demotion_target == this);
|
mxb_assert(demo_op.target == this);
|
||||||
json_t** error_out = op.error_out;
|
json_t** const error_out = general.error_out;
|
||||||
bool success = false;
|
bool success = false;
|
||||||
|
|
||||||
// Step 1: Stop & reset slave connections. The promotion target will copy them. The connection
|
// Step 1: Stop & reset slave connections. The promotion target will copy them. The connection
|
||||||
// information has been backed up in the operation object.
|
// information has been backed up in the operation object.
|
||||||
if (remove_slave_conns(op, m_slave_status))
|
if (remove_slave_conns(general, m_slave_status))
|
||||||
{
|
{
|
||||||
// Step 2: If this server is master, disable writes and scheduled events, flush logs,
|
// Step 2: If this server is master, disable writes and scheduled events, flush logs,
|
||||||
// update gtid:s, run demotion_sql_file.
|
// update gtid:s, run demotion_sql_file.
|
||||||
@ -1576,27 +1576,27 @@ bool MariaDBServer::demote(ClusterOperation& op)
|
|||||||
// likely part to fail, setting read_only=1, first to make undoing easier. Setting
|
// likely part to fail, setting read_only=1, first to make undoing easier. Setting
|
||||||
// read_only may fail if another session has table locks or is doing long writes.
|
// read_only may fail if another session has table locks or is doing long writes.
|
||||||
bool demotion_error = false;
|
bool demotion_error = false;
|
||||||
if (op.demotion_target_is_master)
|
if (demo_op.was_is_master)
|
||||||
{
|
{
|
||||||
mxb_assert(is_master());
|
mxb_assert(is_master());
|
||||||
StopWatch timer;
|
StopWatch timer;
|
||||||
// Step 2a: Enabling read-only can take time if writes are on or table locks taken.
|
// Step 2a: Enabling read-only can take time if writes are on or table locks taken.
|
||||||
// TODO: use max_statement_time to be safe!
|
// TODO: use max_statement_time to be safe!
|
||||||
bool ro_enabled = set_read_only(ReadOnlySetting::ENABLE, op.time_remaining, error_out);
|
bool ro_enabled = set_read_only(ReadOnlySetting::ENABLE, general.time_remaining, error_out);
|
||||||
op.time_remaining -= timer.lap();
|
general.time_remaining -= timer.lap();
|
||||||
if (!ro_enabled)
|
if (!ro_enabled)
|
||||||
{
|
{
|
||||||
demotion_error = true;
|
demotion_error = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (op.handle_events)
|
if (demo_op.handle_events)
|
||||||
{
|
{
|
||||||
// TODO: Add query replying to enable_events
|
// TODO: Add query replying to enable_events
|
||||||
// Step 2b: Using BINLOG_OFF to avoid adding any gtid events,
|
// Step 2b: Using BINLOG_OFF to avoid adding any gtid events,
|
||||||
// which could break external replication.
|
// which could break external replication.
|
||||||
bool events_disabled = disable_events(BinlogMode::BINLOG_OFF, error_out);
|
bool events_disabled = disable_events(BinlogMode::BINLOG_OFF, error_out);
|
||||||
op.time_remaining -= timer.lap();
|
general.time_remaining -= timer.lap();
|
||||||
if (!events_disabled)
|
if (!events_disabled)
|
||||||
{
|
{
|
||||||
demotion_error = true;
|
demotion_error = true;
|
||||||
@ -1605,16 +1605,16 @@ bool MariaDBServer::demote(ClusterOperation& op)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Step 2c: Run demotion_sql_file if no errors so far.
|
// Step 2c: Run demotion_sql_file if no errors so far.
|
||||||
if (!demotion_error && !op.demotion_sql_file.empty())
|
if (!demotion_error && !demo_op.sql_file.empty())
|
||||||
{
|
{
|
||||||
bool file_ran_ok = run_sql_from_file(op.demotion_sql_file, error_out);
|
bool file_ran_ok = run_sql_from_file(demo_op.sql_file, error_out);
|
||||||
op.time_remaining -= timer.lap();
|
general.time_remaining -= timer.lap();
|
||||||
if (!file_ran_ok)
|
if (!file_ran_ok)
|
||||||
{
|
{
|
||||||
demotion_error = true;
|
demotion_error = true;
|
||||||
PRINT_MXS_JSON_ERROR(error_out,
|
PRINT_MXS_JSON_ERROR(error_out,
|
||||||
"Execution of file '%s' failed during demotion of server %s.",
|
"Execution of file '%s' failed during demotion of server %s.",
|
||||||
op.demotion_sql_file.c_str(), name());
|
demo_op.sql_file.c_str(), name());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1622,8 +1622,9 @@ bool MariaDBServer::demote(ClusterOperation& op)
|
|||||||
{
|
{
|
||||||
// Step 2d: FLUSH LOGS to ensure that all events have been written to binlog.
|
// Step 2d: FLUSH LOGS to ensure that all events have been written to binlog.
|
||||||
string error_msg;
|
string error_msg;
|
||||||
bool logs_flushed = execute_cmd_time_limit("FLUSH LOGS;", op.time_remaining, &error_msg);
|
bool logs_flushed = execute_cmd_time_limit("FLUSH LOGS;", general.time_remaining,
|
||||||
op.time_remaining -= timer.lap();
|
&error_msg);
|
||||||
|
general.time_remaining -= timer.lap();
|
||||||
if (!logs_flushed)
|
if (!logs_flushed)
|
||||||
{
|
{
|
||||||
demotion_error = true;
|
demotion_error = true;
|
||||||
@ -1652,7 +1653,7 @@ bool MariaDBServer::demote(ClusterOperation& op)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (demotion_error && op.demotion_target_is_master)
|
if (demotion_error && demo_op.was_is_master)
|
||||||
{
|
{
|
||||||
// Read_only was enabled (or tried to be enabled) but a later step failed.
|
// Read_only was enabled (or tried to be enabled) but a later step failed.
|
||||||
// Disable read_only. Connection is likely broken so use a short time limit.
|
// Disable read_only. Connection is likely broken so use a short time limit.
|
||||||
@ -1730,11 +1731,12 @@ bool MariaDBServer::stop_slave_conn(const std::string& conn_name, StopMode mode,
|
|||||||
*
|
*
|
||||||
* @param op Operation descriptor
|
* @param op Operation descriptor
|
||||||
* @param conns_to_remove Which connections should be removed
|
* @param conns_to_remove Which connections should be removed
|
||||||
* @return True if succesfull
|
* @return True if successful
|
||||||
*/
|
*/
|
||||||
bool MariaDBServer::remove_slave_conns(ClusterOperation& op, const SlaveStatusArray& conns_to_remove)
|
bool MariaDBServer::remove_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_remove)
|
||||||
{
|
{
|
||||||
json_t** error_out = op.error_out;
|
json_t** error_out = op.error_out;
|
||||||
|
maxbase::Duration& time_remaining = op.time_remaining;
|
||||||
StopWatch timer;
|
StopWatch timer;
|
||||||
// Take a backup of the soon to be removed connections so they can be compared properly after an update.
|
// Take a backup of the soon to be removed connections so they can be compared properly after an update.
|
||||||
SlaveStatusArray conns_to_remove_copy = conns_to_remove;
|
SlaveStatusArray conns_to_remove_copy = conns_to_remove;
|
||||||
@ -1742,11 +1744,11 @@ bool MariaDBServer::remove_slave_conns(ClusterOperation& op, const SlaveStatusAr
|
|||||||
bool stop_slave_error = false;
|
bool stop_slave_error = false;
|
||||||
for (size_t i = 0; !stop_slave_error && i < conns_to_remove.size(); i++)
|
for (size_t i = 0; !stop_slave_error && i < conns_to_remove.size(); i++)
|
||||||
{
|
{
|
||||||
if (!stop_slave_conn(conns_to_remove[i].name, StopMode::RESET_ALL, op.time_remaining, error_out))
|
if (!stop_slave_conn(conns_to_remove[i].name, StopMode::RESET_ALL, time_remaining, error_out))
|
||||||
{
|
{
|
||||||
stop_slave_error = true;
|
stop_slave_error = true;
|
||||||
}
|
}
|
||||||
op.time_remaining -= timer.lap();
|
time_remaining -= timer.lap();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool success = false;
|
bool success = false;
|
||||||
@ -1797,7 +1799,7 @@ bool MariaDBServer::remove_slave_conns(ClusterOperation& op, const SlaveStatusAr
|
|||||||
name(), error_msg.c_str());
|
name(), error_msg.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
op.time_remaining -= timer.lap();
|
time_remaining -= timer.lap();
|
||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -365,7 +365,7 @@ public:
|
|||||||
* @param op Cluster operation descriptor
|
* @param op Cluster operation descriptor
|
||||||
* @return True if successful
|
* @return True if successful
|
||||||
*/
|
*/
|
||||||
bool demote(ClusterOperation& op);
|
bool demote(ServerOperation& op, GeneralOpData& general);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Redirect the slave connection going to old master to replicate from new master.
|
* Redirect the slave connection going to old master to replicate from new master.
|
||||||
@ -535,7 +535,7 @@ private:
|
|||||||
bool stop_slave_conn(const std::string& conn_name, StopMode mode, maxbase::Duration time_limit,
|
bool stop_slave_conn(const std::string& conn_name, StopMode mode, maxbase::Duration time_limit,
|
||||||
json_t** error_out);
|
json_t** error_out);
|
||||||
|
|
||||||
bool remove_slave_conns(ClusterOperation& op, const SlaveStatusArray& conns_to_remove);
|
bool remove_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_remove);
|
||||||
bool execute_cmd_ex(const std::string& cmd, QueryRetryMode mode,
|
bool execute_cmd_ex(const std::string& cmd, QueryRetryMode mode,
|
||||||
std::string* errmsg_out = NULL, unsigned int* errno_out = NULL);
|
std::string* errmsg_out = NULL, unsigned int* errno_out = NULL);
|
||||||
|
|
||||||
|
|||||||
@ -155,7 +155,7 @@ bool SlaveStatus::should_be_copied(string* ignore_reason_out) const
|
|||||||
return accepted;
|
return accepted;
|
||||||
}
|
}
|
||||||
|
|
||||||
ClusterOperation::ClusterOperation(OperationType type,
|
ClusterOperation::ClusterOperation(OperationType type, ServerOperation* dem_op, ServerOperation* prom_op,
|
||||||
MariaDBServer* promotion_target, MariaDBServer* demotion_target,
|
MariaDBServer* promotion_target, MariaDBServer* demotion_target,
|
||||||
const SlaveStatusArray& promo_target_conns,
|
const SlaveStatusArray& promo_target_conns,
|
||||||
const SlaveStatusArray& demo_target_conns,
|
const SlaveStatusArray& demo_target_conns,
|
||||||
@ -164,6 +164,9 @@ ClusterOperation::ClusterOperation(OperationType type,
|
|||||||
string& replication_user, string& replication_password,
|
string& replication_user, string& replication_password,
|
||||||
json_t** error, maxbase::Duration time_remaining)
|
json_t** error, maxbase::Duration time_remaining)
|
||||||
: type(type)
|
: type(type)
|
||||||
|
, demotion(dem_op)
|
||||||
|
, promotion(prom_op)
|
||||||
|
, general(type, replication_user, replication_password, error, time_remaining)
|
||||||
, promotion_target(promotion_target)
|
, promotion_target(promotion_target)
|
||||||
, demotion_target(demotion_target)
|
, demotion_target(demotion_target)
|
||||||
, demotion_target_is_master(demo_target_is_master)
|
, demotion_target_is_master(demo_target_is_master)
|
||||||
@ -179,6 +182,33 @@ ClusterOperation::ClusterOperation(OperationType type,
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ClusterOperation::~ClusterOperation()
|
||||||
|
{
|
||||||
|
delete demotion;
|
||||||
|
delete promotion;
|
||||||
|
}
|
||||||
|
|
||||||
|
ServerOperation::ServerOperation(MariaDBServer* target, bool was_is_master,
|
||||||
|
bool handle_events, const std::string& sql_file,
|
||||||
|
const SlaveStatusArray& conns_to_copy)
|
||||||
|
: target(target)
|
||||||
|
, was_is_master(was_is_master)
|
||||||
|
, handle_events(handle_events)
|
||||||
|
, sql_file(sql_file)
|
||||||
|
, conns_to_copy(conns_to_copy)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
GeneralOpData::GeneralOpData(OperationType type, const string& replication_user, const string& replication_password,
|
||||||
|
json_t** error, maxbase::Duration time_remaining)
|
||||||
|
: type(type)
|
||||||
|
, replication_user(replication_user)
|
||||||
|
, replication_password(replication_password)
|
||||||
|
, error_out(error)
|
||||||
|
, time_remaining(time_remaining)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
GtidList GtidList::from_string(const string& gtid_string)
|
GtidList GtidList::from_string(const string& gtid_string)
|
||||||
{
|
{
|
||||||
mxb_assert(gtid_string.size());
|
mxb_assert(gtid_string.size());
|
||||||
|
|||||||
@ -217,6 +217,21 @@ enum class OperationType
|
|||||||
FAILOVER
|
FAILOVER
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class ServerOperation;
|
||||||
|
class GeneralOpData
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
const OperationType type;
|
||||||
|
const std::string replication_user; // User for CHANGE MASTER TO ...
|
||||||
|
const std::string replication_password; // Password for CHANGE MASTER TO ...
|
||||||
|
json_t** const error_out; // Json error output
|
||||||
|
maxbase::Duration time_remaining; // How much time remains to complete the operation
|
||||||
|
|
||||||
|
GeneralOpData(OperationType type,
|
||||||
|
const std::string& replication_user, const std::string& replication_password,
|
||||||
|
json_t** error, maxbase::Duration time_remaining);
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class which encapsulates many settings and status descriptors for a failover/switchover.
|
* Class which encapsulates many settings and status descriptors for a failover/switchover.
|
||||||
* Is more convenient to pass around than the separate elements. Most fields are constants or constant
|
* Is more convenient to pass around than the separate elements. Most fields are constants or constant
|
||||||
@ -229,7 +244,11 @@ private:
|
|||||||
ClusterOperation& operator=(const ClusterOperation&) = delete;
|
ClusterOperation& operator=(const ClusterOperation&) = delete;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
const OperationType type; // Failover or switchover
|
const OperationType type; // Failover or switchover
|
||||||
|
ServerOperation* const demotion; // Required by MariaDBServer->demote()
|
||||||
|
ServerOperation* const promotion; // Required by MariaDBServer->promote()
|
||||||
|
GeneralOpData general; // General operation data
|
||||||
|
|
||||||
MariaDBServer* const promotion_target; // Which server will be promoted
|
MariaDBServer* const promotion_target; // Which server will be promoted
|
||||||
MariaDBServer* const demotion_target; // Which server will be demoted
|
MariaDBServer* const demotion_target; // Which server will be demoted
|
||||||
const bool demotion_target_is_master; // Was the demotion target the master?
|
const bool demotion_target_is_master; // Was the demotion target the master?
|
||||||
@ -248,13 +267,28 @@ public:
|
|||||||
/* Similar copy for promotion target connections. */
|
/* Similar copy for promotion target connections. */
|
||||||
SlaveStatusArray promotion_target_conns;
|
SlaveStatusArray promotion_target_conns;
|
||||||
|
|
||||||
ClusterOperation(OperationType type,
|
ClusterOperation(OperationType type, ServerOperation* dem_op, ServerOperation* prom_op,
|
||||||
MariaDBServer* promotion_target, MariaDBServer* demotion_target,
|
MariaDBServer* promotion_target, MariaDBServer* demotion_target,
|
||||||
const SlaveStatusArray& promo_target_conns, const SlaveStatusArray& demo_target_conns,
|
const SlaveStatusArray& promo_target_conns, const SlaveStatusArray& demo_target_conns,
|
||||||
bool demo_target_is_master, bool handle_events,
|
bool demo_target_is_master, bool handle_events,
|
||||||
std::string& promotion_sql_file, std::string& demotion_sql_file,
|
std::string& promotion_sql_file, std::string& demotion_sql_file,
|
||||||
std::string& replication_user, std::string& replication_password,
|
std::string& replication_user, std::string& replication_password,
|
||||||
json_t** error, maxbase::Duration time_remaining);
|
json_t** error, maxbase::Duration time_remaining);
|
||||||
|
~ClusterOperation();
|
||||||
|
};
|
||||||
|
|
||||||
|
// Operation data which concerns a single server
|
||||||
|
class ServerOperation
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
MariaDBServer* const target; // Target server
|
||||||
|
const bool was_is_master; // Was the target a master / should it become one
|
||||||
|
const bool handle_events; // Should scheduled server events be disabled/enabled?
|
||||||
|
const std::string sql_file; // Path to file with SQL commands to run during op
|
||||||
|
const SlaveStatusArray conns_to_copy; // Slave connections the target should copy/merge
|
||||||
|
|
||||||
|
ServerOperation(MariaDBServer* target, bool was_is_master, bool handle_events,
|
||||||
|
const std::string& sql_file, const SlaveStatusArray& conns_to_copy);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
Reference in New Issue
Block a user