Cleanup slave connection copy & merge

The two cases are now separated. In switchover, the promotion and
demotion targets can swap connections between each other without worry.
In failover, the two connection lists must be merged semi-intelligently.

The slave connections of the two servers are now saved to the operation
descriptor object at the start of the operation. This allows slave status
updating during the operation.
This commit is contained in:
Esa Korhonen
2018-10-08 09:42:51 +03:00
parent 4d6f961695
commit c10fab977d
6 changed files with 249 additions and 144 deletions

View File

@ -490,38 +490,6 @@ bool MariaDBMonitor::start_external_replication(MariaDBServer* new_master, json_
return rval;
}
/**
* Starts a new slave connection on a server. Should be used on a demoted master server.
*
* @param old_master The server which will start replication
* @param new_master Replication target
* @return True if commands were accepted. This does not guarantee that replication proceeds
* successfully.
*/
bool MariaDBMonitor::switchover_start_slave(MariaDBServer* old_master, MariaDBServer* new_master)
{
bool rval = false;
MYSQL* old_master_con = old_master->m_server_base->con;
SERVER* new_master_server = new_master->m_server_base->server;
string change_cmd = generate_change_master_cmd(new_master_server->address, new_master_server->port);
if (mxs_mysql_query(old_master_con, change_cmd.c_str()) == 0
&& mxs_mysql_query(old_master_con, "START SLAVE;") == 0)
{
MXS_NOTICE("Old master '%s' starting replication from '%s'.",
old_master->name(),
new_master->name());
rval = true;
}
else
{
MXS_ERROR("Old master '%s' could not start replication: '%s'.",
old_master->name(),
mysql_error(old_master_con));
}
return rval;
}
/**
* (Re)join given servers to the cluster. The servers in the array are assumed to be joinable.
* Usually the list is created by get_joinable_servers().
@ -758,20 +726,20 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
m_next_master = promotion_target;
}
timer.restart();
// Step 5: Redirect slaves and start replication on old master.
// Step 5: Start replication on old master and redirect slaves.
ServerArray redirected_slaves;
bool start_ok = switchover_start_slave(demotion_target, promotion_target);
if (start_ok)
if (demotion_target->copy_slave_conns(op, op.promotion_target_conns, promotion_target))
{
redirected_slaves.push_back(demotion_target);
}
op.time_remaining -= timer.lap();
else
{
MXS_WARNING("Could not copy slave connections from %s to %s.",
promotion_target->name(), demotion_target->name());
}
redirect_slaves_ex(op, redirectable_slaves, &redirected_slaves);
int redirects = redirect_slaves_ex(op, redirectable_slaves, &redirected_slaves);
bool success = redirectable_slaves.empty() ? start_ok : start_ok || redirects > 0;
if (success)
if (!redirected_slaves.empty())
{
timer.restart();
// Step 6: Finally, check that slaves are replicating.
@ -1311,6 +1279,7 @@ unique_ptr<ClusterOperation> MariaDBMonitor::failover_prepare(Log log_mode, json
auto time_limit = maxbase::Duration((double)m_failover_timeout);
rval.reset(new ClusterOperation(OperationType::FAILOVER,
promotion_target, demotion_target,
promotion_target->m_slave_status, demotion_target->m_slave_status,
demotion_target == m_master, m_handle_event_scheduler,
m_promote_sql_file, m_demote_sql_file,
m_replication_user, m_replication_password,
@ -1641,6 +1610,7 @@ unique_ptr<ClusterOperation> MariaDBMonitor::switchover_prepare(SERVER* promotio
maxbase::Duration time_limit((double)m_switchover_timeout);
rval.reset(new ClusterOperation(op_type,
promotion_target, demotion_target,
promotion_target->m_slave_status, demotion_target->m_slave_status,
demotion_target == m_master, m_handle_event_scheduler,
m_promote_sql_file, m_demote_sql_file,
m_replication_user, m_replication_password,

View File

@ -289,7 +289,6 @@ private:
ServerArray* redirected_slaves);
int redirect_slaves_ex(ClusterOperation& op, const ServerArray& slaves,
ServerArray* redirected_slaves);
bool switchover_start_slave(MariaDBServer* old_master, MariaDBServer* new_master);
bool start_external_replication(MariaDBServer* new_master, json_t** err_out);
std::string generate_change_master_cmd(const std::string& master_host, int master_port);
void wait_cluster_stabilization(ClusterOperation& op, const ServerArray& slaves);

View File

@ -1484,6 +1484,42 @@ bool MariaDBServer::promote(ClusterOperation& op)
stop_slave_helper(master_conn);
}
if (!stop_slave_error)
{
// Check that the deleted connection(s) are really gone.
string error_msg;
if (do_show_slave_status(&error_msg))
{
if (op.type == OperationType::SWITCHOVER)
{
if (!m_slave_status.empty())
{
stop_slave_error = true;
PRINT_MXS_JSON_ERROR(error_out,
"%s has %lu slave connections although all connections should "
"have been removed.", name(), m_slave_status.size());
}
}
else
{
if (slave_connection_status(op.demotion_target) != NULL)
{
stop_slave_error = true;
PRINT_MXS_JSON_ERROR(error_out,
"%s has a slave connection to %s although it should "
"have been removed.", name(), op.demotion_target->name());
}
}
}
else
{
stop_slave_error = true;
PRINT_MXS_JSON_ERROR(error_out,
"Failed to update slave connections of %s: %s",
name(), error_msg.c_str());
}
}
if (!stop_slave_error)
{
// Step 2: If demotion target is master, meaning this server will become the master,
@ -1528,20 +1564,35 @@ bool MariaDBServer::promote(ClusterOperation& op)
}
}
// Step 3: Copy slave connections from demotion target. If demotion target was replicating from
// this server (circular topology), the connection should be ignored. Also, connections which
// already exist on this server (when failovering) need not be regenerated.
// Step 3: Copy or merge slave connections from demotion target. The logic used depends on the
// operation.
if (!promotion_error)
{
if (copy_master_slave_conns(op)) // The method updates 'time_remaining' itself.
if (op.type == OperationType::FAILOVER)
{
success = true;
if (merge_slave_conns(op, op.demotion_target_conns))
{
success = true;
}
else
{
PRINT_MXS_JSON_ERROR(error_out,
"Could not merge slave connections from %s to %s.",
op.demotion_target->name(), name());
}
}
else
{
PRINT_MXS_JSON_ERROR(error_out,
"Could not copy slave connections from '%s' to '%s'.",
op.demotion_target->name(), name());
if (copy_slave_conns(op, op.demotion_target_conns, op.demotion_target))
{
success = true;
}
else
{
PRINT_MXS_JSON_ERROR(error_out,
"Could not copy slave connections from %s to %s.",
op.demotion_target->name(), name());
}
}
}
}
@ -1555,8 +1606,8 @@ bool MariaDBServer::demote(ClusterOperation& op)
bool success = false;
StopWatch timer;
// Step 1: Stop & reset slave connections. The promotion target will copy them. The server object
// must not be updated before the connections have been copied.
// Step 1: Stop & reset slave connections. The promotion target will copy them. The connection
// information has been backed up in the operation object.
bool stop_slave_error = false;
for (size_t i = 0; !stop_slave_error && i < m_slave_status.size(); i++)
{
@ -1567,6 +1618,29 @@ bool MariaDBServer::demote(ClusterOperation& op)
op.time_remaining -= timer.lap();
}
if (!stop_slave_error)
{
// Check that slave connections are really gone.
string error_msg;
if (do_show_slave_status(&error_msg))
{
if (!m_slave_status.empty())
{
stop_slave_error = true;
PRINT_MXS_JSON_ERROR(error_out,
"%s has %lu slave connections although all connections should have been "
"removed.", name(), m_slave_status.size());
}
}
else
{
stop_slave_error = true;
PRINT_MXS_JSON_ERROR(error_out,
"Failed to update slave connections of %s: %s",
name(), error_msg.c_str());
}
}
if (!stop_slave_error)
{
// Step 2: If this server is master, disable writes and scheduled events, flush logs,
@ -1737,70 +1811,63 @@ bool MariaDBServer::set_read_only(ReadOnlySetting setting, maxbase::Duration tim
}
/**
* Copy slave connections from the demotion target to this server.
* Merge slave connections to this server (promotion target). This should only
* be used during failover promotion.
*
* @param op Operation descriptor
* @param conns_to_merge Connections which should be merged
* @return True on success
*/
bool MariaDBServer::copy_master_slave_conns(ClusterOperation& op)
bool MariaDBServer::merge_slave_conns(ClusterOperation& op, const SlaveStatusArray& conns_to_merge)
{
mxb_assert(this == op.promotion_target);
mxb_assert(op.promotion_target == this && op.type == OperationType::FAILOVER
&& slave_connection_status(op.demotion_target) == NULL);
/* When promoting a server during failover, the situation is more complicated than in switchover.
* Connections cannot be moved to the demotion target (= failed server) as it is off. This means
* that the promoting server must combine the roles of both itself and the failed server. Only the
* slave connection replicating from the failed server has been removed. This means that
* the promotion and demotion targets may have identical connections (connections going to
* the same server id or the same host:port). These connections should not be copied or modified.
* It's possible that the master had different settings for a duplicate slave connection,
* in this case the settings on the master are lost.
* TODO: think if the master's settings should take priority.
* Also, connection names may collide between the two servers, in this case try to generate
* a simple name for the new connection. */
// Helper function for checking if a slave connection should be ignored.
auto should_ignore_connection =
[this](const SlaveStatus& slave_conn, OperationType type, string* ignore_reason_out) -> bool {
bool conn_ignored = false;
auto conn_can_be_merged = [this](const SlaveStatus& slave_conn, string* ignore_reason_out) -> bool {
bool accepted = true;
auto master_id = slave_conn.master_server_id;
// The connection is only copied if it is running or at least has been seen running.
// Also, target should not be this server.
// The connection is only merged if it satisfies the copy-conditions. Merging has also
// additional requirements.
string ignore_reason;
if (!slave_conn.slave_sql_running)
if (!slave_conn.should_be_copied(&ignore_reason))
{
conn_ignored = true;
ignore_reason = "its slave sql thread is not running.";
}
else if (!slave_conn.seen_connected)
{
conn_ignored = true;
ignore_reason = "it has not been seen connected to master.";
}
else if (master_id <= 0)
{
conn_ignored = true;
ignore_reason = string_printf("its Master_Server_Id (%" PRIi64 ") is invalid .", master_id);
accepted = false;
}
else if (master_id == m_server_id)
{
// This is not really an error but indicates a complicated topology.
conn_ignored = true;
ignore_reason = "it points to the server being promoted (according to server id:s).";
// This is not an error but indicates a complicated topology. In any case, ignore this.
accepted = false;
ignore_reason = string_printf("it points to %s (according to server id:s).", name());
}
else if (type == OperationType::FAILOVER)
else
{
/* In the case of failover, not all connections have been removed from this server
* (promotion_target). The promotion and demotion targets may have identical connections
* (connections going to the same server id or the same host:port). These connections are
* ignored when copying slave connections. It's possible that the master had different
* settings for a duplicate slave connection, in this case the settings on the master are
* lost. */
/* This check should not be done in the case of switchover because while the connections
* have been removed on the real server (this), they are still present in the MariaDBServer
* object and could cause false detections. */
// Compare to connections already existing on this server.
for (const SlaveStatus& my_slave_conn : m_slave_status)
{
if (my_slave_conn.seen_connected
&& my_slave_conn.master_server_id == slave_conn.master_server_id)
if (my_slave_conn.seen_connected && my_slave_conn.master_server_id == master_id)
{
conn_ignored = true;
accepted = false;
const char format[] =
"its Master_Server_Id (%" PRIi64 ") matches an existing slave connection on %s.";
ignore_reason = string_printf(format, slave_conn.master_server_id, name());
ignore_reason = string_printf(format, master_id, name());
}
else if (my_slave_conn.master_host == slave_conn.master_host
&& my_slave_conn.master_port == slave_conn.master_port)
{
conn_ignored = true;
accepted = false;
ignore_reason = string_printf("its Master_Host (%s) and Master_Port (%i) match "
"an existing slave connection on %s.",
slave_conn.master_host.c_str(), slave_conn.master_port,
@ -1809,44 +1876,30 @@ bool MariaDBServer::copy_master_slave_conns(ClusterOperation& op)
}
}
if (conn_ignored)
if (!accepted)
{
*ignore_reason_out = ignore_reason;
}
return conn_ignored;
return accepted;
};
// Need to keep track of recently created connection names to avoid using an existing name.
std::set<string> created_connection_names;
// Helper function which checks that a connection name is unique and modifies it if not.
auto check_modify_conn_name = [this, &created_connection_names](SlaveStatus* slave_conn) -> bool {
// An inner lambda which checks name uniqueness.
auto name_is_used = [this, &created_connection_names](const string& name) -> bool {
if (created_connection_names.count(name) > 0)
{
return true;
}
else
{
for (const auto& slave_conn : m_slave_status)
{
if (slave_conn.exists && slave_conn.name == name)
{
return true;
}
}
}
return false;
};
// Need to keep track of connection names (both existing and new) to avoid using an existing name.
std::set<string> connection_names;
for (const auto& conn : m_slave_status)
{
connection_names.insert(conn.name);
}
// Helper function which checks that a connection name is unique and modifies it if not.
auto check_modify_conn_name = [this, &connection_names](SlaveStatus* slave_conn) -> bool {
bool name_is_unique = false;
if (name_is_used(slave_conn->name))
if (connection_names.count(slave_conn->name) > 0)
{
// If the name is used, generate a name using the host:port of the master, it should be
// unique.
// If the name is used, generate a name using the host:port of the master,
// it should be unique.
string second_try = string_printf("To [%s]:%i",
slave_conn->master_host.c_str(), slave_conn->master_port);
if (name_is_used(second_try))
if (connection_names.count(second_try) > 0)
{
// Even this one exists, something is really wrong. Give up.
MXS_ERROR("Could not generate a unique connection name for '%s': both '%s' and '%s' are "
@ -1868,13 +1921,12 @@ bool MariaDBServer::copy_master_slave_conns(ClusterOperation& op)
};
bool error = false;
const auto& conns_to_copy = op.demotion_target->m_slave_status;
for (size_t i = 0; !error && (i < conns_to_copy.size()); i++)
for (size_t i = 0; !error && (i < conns_to_merge.size()); i++)
{
// Need a copy of the array element here since it may be modified.
SlaveStatus slave_conn = conns_to_copy[i];
SlaveStatus slave_conn = conns_to_merge[i];
string ignore_reason;
if (should_ignore_connection(slave_conn, op.type, &ignore_reason))
if (conn_can_be_merged(slave_conn, &ignore_reason))
{
MXS_WARNING("%s was ignored when promoting %s because %s",
slave_conn.to_short_string(op.demotion_target->name()).c_str(), name(),
@ -1882,22 +1934,11 @@ bool MariaDBServer::copy_master_slave_conns(ClusterOperation& op)
}
else
{
/* If doing a switchover, all slave connections of this server should have been removed.
* It's safe to make new ones and connection names can be assumed unique since they come
* from an existing server. */
/* When doing failover, this server may have some of the connections of the demotion target.
* Those connections have already been detected by 'should_ignore_connection', but this
* server may still have connections with identical names. If so, modify the name. */
bool can_continue = true;
if (op.type == OperationType::FAILOVER)
{
can_continue = check_modify_conn_name(&slave_conn);
}
if (can_continue)
if (check_modify_conn_name(&slave_conn))
{
if (create_start_slave(op, slave_conn))
{
created_connection_names.insert(slave_conn.name);
connection_names.insert(slave_conn.name);
}
else
{
@ -1914,6 +1955,40 @@ bool MariaDBServer::copy_master_slave_conns(ClusterOperation& op)
return !error;
}
bool MariaDBServer::copy_slave_conns(ClusterOperation& op, const SlaveStatusArray& conns_to_copy,
const MariaDBServer* replacement)
{
mxb_assert(op.type == OperationType::SWITCHOVER && m_slave_status.empty());
bool start_slave_error = false;
for (size_t i = 0; i < conns_to_copy.size() && !start_slave_error; i++)
{
SlaveStatus slave_conn = conns_to_copy[i]; // slave_conn may be modified
string reason_not_copied;
if (slave_conn.should_be_copied(&reason_not_copied))
{
// Any slave connection that was going to this server itself are instead directed
// to the replacement server.
if (slave_conn.master_server_id == m_server_id)
{
slave_conn.master_host = replacement->m_server_base->server->address;
slave_conn.master_port = replacement->m_server_base->server->port;
}
if (!create_start_slave(op, slave_conn))
{
start_slave_error = true;
}
}
else
{
MXS_WARNING("%s was not copied to %s because %s",
slave_conn.to_short_string(replacement->name()).c_str(),
name(),
reason_not_copied.c_str());
}
}
return !start_slave_error;
}
/**
* Create a new slave connection on the server and start it.
*

View File

@ -72,7 +72,6 @@ struct NodeData
class MariaDBServer
{
public:
typedef std::vector<SlaveStatus> SlaveStatusArray;
enum class version
{
UNKNOWN, /* Totally unknown. Server has not been connected to yet. */
@ -376,6 +375,22 @@ public:
*/
bool redirect_existing_slave_conn(ClusterOperation& op);
/**
* Copy slave connections to this server. This is usually needed during switchover promotion and on
* the demoted server. It is assumed that all slave connections of this server have
* been stopped & removed so there will be no conflicts with existing connections.
* A special rule is applied to a connection which points to this server itself: that connection
* is directed towards the 'replacement'. This is required to properly handle connections between
* the promotion and demotion targets.
*
* @param op Operation descriptor
* @params conns_to_copy The connections to add to the server
* @params replacement Which server should rep
* @return True on success
*/
bool copy_slave_conns(ClusterOperation& op, const SlaveStatusArray& conns_to_copy,
const MariaDBServer* replacement);
/**
* Is binary log on? 'update_replication_settings' should be ran before this function to query the data.
*
@ -508,6 +523,7 @@ private:
bool update_slave_status(std::string* errmsg_out = NULL);
bool sstatus_array_topology_equal(const SlaveStatusArray& new_slave_status);
const SlaveStatus* sstatus_find_previous_row(const SlaveStatus& new_row, size_t guess);
SlaveStatus* slave_connection_status_mutable(const MariaDBServer* target);
void warn_event_scheduler();
bool events_foreach(ManipulatorFunc& func, json_t** error_out);
@ -523,9 +539,8 @@ private:
bool execute_cmd_time_limit(const std::string& cmd, maxbase::Duration time_limit,
std::string* errmsg_out);
bool set_read_only(ReadOnlySetting value, maxbase::Duration time_limit, json_t** error_out);
bool copy_master_slave_conns(ClusterOperation& op);
bool create_start_slave(ClusterOperation& op, const SlaveStatus& slave_conn);
SlaveStatus* slave_connection_status_mutable(const MariaDBServer* target);
std::string generate_change_master_cmd(ClusterOperation& op, const SlaveStatus& slave_conn);
bool set_read_only(ReadOnlySetting value, maxbase::Duration time_limit, json_t** error_out);
bool merge_slave_conns(ClusterOperation& op, const SlaveStatusArray& conns_to_merge);
bool create_start_slave(ClusterOperation& op, const SlaveStatus& slave_conn);
std::string generate_change_master_cmd(ClusterOperation& op, const SlaveStatus& slave_conn);
};

View File

@ -125,8 +125,40 @@ string SlaveStatus::slave_io_to_string(SlaveStatus::slave_io_running_t slave_io)
return rval;
}
bool SlaveStatus::should_be_copied(string* ignore_reason_out) const
{
bool accepted = true;
auto master_id = master_server_id;
// The connection is only copied if it is running or at least has been seen running.
// Also, target should not be this server.
string ignore_reason;
if (!slave_sql_running)
{
accepted = false;
ignore_reason = "its slave sql thread is not running.";
}
else if (!seen_connected)
{
accepted = false;
ignore_reason = "it has not been seen connected to master.";
}
else if (master_id <= 0)
{
accepted = false;
ignore_reason = string_printf("its Master_Server_Id (%" PRIi64 ") is invalid .", master_id);
}
if (!accepted)
{
*ignore_reason_out = ignore_reason;
}
return accepted;
}
ClusterOperation::ClusterOperation(OperationType type,
MariaDBServer* promotion_target, MariaDBServer* demotion_target,
const SlaveStatusArray& promo_target_conns,
const SlaveStatusArray& demo_target_conns,
bool demo_target_is_master, bool handle_events,
string& promotion_sql_file, string& demotion_sql_file,
string& replication_user, string& replication_password,
@ -142,7 +174,10 @@ ClusterOperation::ClusterOperation(OperationType type,
, replication_password(replication_password)
, error_out(error)
, time_remaining(time_remaining)
{}
, demotion_target_conns(demo_target_conns)
, promotion_target_conns(promo_target_conns)
{
}
GtidList GtidList::from_string(const string& gtid_string)
{

View File

@ -200,8 +200,11 @@ public:
std::string to_short_string(const std::string& owner) const;
static slave_io_running_t slave_io_from_string(const std::string& str);
static std::string slave_io_to_string(slave_io_running_t slave_io);
bool should_be_copied(std::string* ignore_reason_out) const;
};
typedef std::vector<SlaveStatus> SlaveStatusArray;
enum class OperationType
{
SWITCHOVER,
@ -232,8 +235,16 @@ public:
json_t** const error_out; // Json error output
maxbase::Duration time_remaining; // How much time remains to complete the operation
/* Slave connections of the demotion target. Saved here in case the data in the server object is
* modified before promoted server has copied the connections. */
SlaveStatusArray demotion_target_conns;
/* Similar copy for promotion target connections. */
SlaveStatusArray promotion_target_conns;
ClusterOperation(OperationType type,
MariaDBServer* promotion_target, MariaDBServer* demotion_target,
const SlaveStatusArray& promo_target_conns, const SlaveStatusArray& demo_target_conns,
bool demo_target_is_master, bool handle_events,
std::string& promotion_sql_file, std::string& demotion_sql_file,
std::string& replication_user, std::string& replication_password,