Don't redirect duplicate connections
The redirection method checks if a slave connection to the redirection target already exists. If so, the connection is not modified. Also, failover better detects duplicate connections during promotion.
This commit is contained in:
@ -417,45 +417,129 @@ int MariaDBMonitor::redirect_slaves(MariaDBServer* new_master, const ServerArray
|
||||
}
|
||||
|
||||
/**
|
||||
* Redirect slaves to replicate from the promotion target.
|
||||
* Redirect slave connections from the promotion target to replicate from the demotion target and vice versa.
|
||||
*
|
||||
* @param op Operation descriptor
|
||||
* @param slaves An array of slaves to redirect
|
||||
* @param old_master The connections to this server are redirected
|
||||
* @param new_master The new master for the redirected connections
|
||||
* @param redirected_slaves A vector where to insert successfully redirected slaves
|
||||
* @param redirected_to_promo Output for slaves successfully redirected to promotion target
|
||||
* @param redirected_to_demo Output for slaves successfully redirected to demotion target
|
||||
* @return The number of slaves successfully redirected
|
||||
*/
|
||||
int MariaDBMonitor::redirect_slaves_ex(ClusterOperation& op, const ServerArray& slaves,
|
||||
const MariaDBServer* old_master, const MariaDBServer* new_master,
|
||||
ServerArray* redirected_slaves)
|
||||
int MariaDBMonitor::redirect_slaves_ex(ClusterOperation& op,
|
||||
ServerArray* redirected_to_promo, ServerArray* redirected_to_demo)
|
||||
{
|
||||
mxb_assert(redirected_slaves != NULL);
|
||||
if (slaves.empty())
|
||||
mxb_assert(op.type == OperationType::SWITCHOVER || op.type == OperationType::FAILOVER);
|
||||
MariaDBServer* const promotion_target = op.promotion_target;
|
||||
MariaDBServer* const demotion_target = op.demotion_target;
|
||||
|
||||
// Slaves of demotion target are redirected to promotion target.
|
||||
// Try to redirect even disconnected slaves.
|
||||
ServerArray redirect_to_promo_target = get_redirectables(demotion_target, promotion_target);
|
||||
// Slaves of promotion target are redirected to demotion target in case of switchover.
|
||||
// This list contains elements only when promoting a relay in switchover.
|
||||
ServerArray redirect_to_demo_target;
|
||||
if (op.type == OperationType::SWITCHOVER)
|
||||
{
|
||||
redirect_to_demo_target = get_redirectables(promotion_target, demotion_target);
|
||||
}
|
||||
if (redirect_to_promo_target.empty() && redirect_to_demo_target.empty())
|
||||
{
|
||||
// This is ok, nothing to do.
|
||||
return 0;
|
||||
}
|
||||
|
||||
string slave_names = monitored_servers_to_string(slaves);
|
||||
MXS_NOTICE("Redirecting %s to replicate from %s instead of %s.",
|
||||
slave_names.c_str(), op.promotion_target->name(), op.demotion_target->name());
|
||||
/* In complicated topologies, this redirection can get tricky. It's possible that a slave is
|
||||
* replicating from both promotion and demotion targets and with different settings. This leads
|
||||
* to a somewhat similar situation as in promotion (connection copy/merge).
|
||||
*
|
||||
* Neither slave connection can be redirected since they would be conflicting. As a temporary
|
||||
* solution, such duplicate slave connections are for now avoided by not redirecting them. If this
|
||||
* becomes an issue (e.g. connection settings need to be properly preserved), add code which:
|
||||
* 1) In switchover, swaps the connections by first deleting or redirecting the other to a nonsensial
|
||||
* host to avoid host:port conflict.
|
||||
* 2) In failover, deletes the connection to promotion target and redirects the one to demotion target,
|
||||
* or does the same as in 1.
|
||||
*/
|
||||
|
||||
const char redir_fmt[] = "Redirecting %s to replicate from %s instead of %s.";
|
||||
string slave_names_to_promo = monitored_servers_to_string(redirect_to_promo_target);
|
||||
string slave_names_to_demo = monitored_servers_to_string(redirect_to_demo_target);
|
||||
mxb_assert(slave_names_to_demo.empty() || op.type == OperationType::SWITCHOVER);
|
||||
|
||||
// Print both name lists if both have items, otherwise just the one with items.
|
||||
if (!slave_names_to_promo.empty() && !slave_names_to_demo.empty())
|
||||
{
|
||||
MXS_NOTICE("Redirecting %s to replicate from %s instead of %s, and %s to replicate from "
|
||||
"%s instead of %s.",
|
||||
slave_names_to_promo.c_str(), promotion_target->name(), demotion_target->name(),
|
||||
slave_names_to_demo.c_str(), demotion_target->name(), promotion_target->name());
|
||||
}
|
||||
else if (!slave_names_to_promo.empty())
|
||||
{
|
||||
MXS_NOTICE(redir_fmt,
|
||||
slave_names_to_promo.c_str(), promotion_target->name(), demotion_target->name());
|
||||
}
|
||||
else if (!slave_names_to_demo.empty())
|
||||
{
|
||||
MXS_NOTICE(redir_fmt,
|
||||
slave_names_to_demo.c_str(), demotion_target->name(), promotion_target->name());
|
||||
}
|
||||
|
||||
int successes = 0;
|
||||
for (MariaDBServer* redirectable : slaves)
|
||||
int fails = 0;
|
||||
int conflicts = 0;
|
||||
auto redirection_helper =
|
||||
[this, &op, &conflicts, &successes, &fails](ServerArray& redirect_these,
|
||||
const MariaDBServer* from, const MariaDBServer* to,
|
||||
ServerArray* redirected) {
|
||||
for (MariaDBServer* redirectable : redirect_these)
|
||||
{
|
||||
if (redirectable->redirect_existing_slave_conn(op, old_master, new_master))
|
||||
mxb_assert(redirected != NULL);
|
||||
/* If the connection exists, even if disconnected, don't redirect.
|
||||
* Compare host:port, since that is how server detects duplicate connections.
|
||||
* Ignore for now the possibility of different host:ports having same server id:s
|
||||
* etc as such setups shouldn't try failover/switchover anyway. */
|
||||
auto existing_conn = redirectable->slave_connection_status_host_port(to);
|
||||
if (existing_conn)
|
||||
{
|
||||
successes++;
|
||||
redirected_slaves->push_back(redirectable);
|
||||
}
|
||||
}
|
||||
if (size_t(successes) == slaves.size())
|
||||
{
|
||||
MXS_NOTICE("All redirects successful.");
|
||||
// Already has a connection to redirect target.
|
||||
conflicts++;
|
||||
MXS_WARNING("%s already has a slave connection to %s, connection to %s was "
|
||||
"not redirected.",
|
||||
redirectable->name(), to->name(), from->name());
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_WARNING("%lu out of %lu redirects failed.", slaves.size() - successes, slaves.size());
|
||||
// No conflict, redirect as normal.
|
||||
if (redirectable->redirect_existing_slave_conn(op, from, to))
|
||||
{
|
||||
successes++;
|
||||
redirected->push_back(redirectable);
|
||||
}
|
||||
else
|
||||
{
|
||||
fails++;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
redirection_helper(redirect_to_promo_target, demotion_target, promotion_target, redirected_to_promo);
|
||||
redirection_helper(redirect_to_demo_target, promotion_target, demotion_target, redirected_to_demo);
|
||||
|
||||
if (fails == 0 && conflicts == 0)
|
||||
{
|
||||
MXS_NOTICE("All redirects successful.");
|
||||
}
|
||||
else if (fails == 0)
|
||||
{
|
||||
MXS_NOTICE("%i slave connections were redirected while %i connections were ignored.",
|
||||
successes, conflicts);
|
||||
}
|
||||
else
|
||||
{
|
||||
int total = fails + conflicts + successes;
|
||||
MXS_WARNING("%i redirects failed, %i slave connections ignored and %i redirects successful "
|
||||
"out of %i.", fails, conflicts, successes, total);
|
||||
}
|
||||
return successes;
|
||||
}
|
||||
@ -692,26 +776,20 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
|
||||
json_t** const error_out = op.error_out;
|
||||
mxb_assert(promotion_target && demotion_target);
|
||||
|
||||
// Step 1a: Save all slaves except promotion target to an array.
|
||||
// Try to redirect even disconnected slaves.
|
||||
ServerArray redirect_to_promo_target = get_redirectables(demotion_target, promotion_target);
|
||||
// Step 1b: The slaves of the promotion target must be redirected to the old master. This
|
||||
// list contains elements only when promoting a relay.
|
||||
ServerArray redirect_to_demo_target = get_redirectables(promotion_target, demotion_target);
|
||||
bool rval = false;
|
||||
// Step 2: Set read-only to on, flush logs, update gtid:s.
|
||||
// Step 1: Set read-only to on, flush logs, update gtid:s.
|
||||
if (demotion_target->demote(op))
|
||||
{
|
||||
m_cluster_modified = true;
|
||||
bool catchup_and_promote_success = false;
|
||||
StopWatch timer;
|
||||
// Step 3: Wait for the promotion target to catch up with the demotion target. Disregard the other
|
||||
// Step 2: Wait for the promotion target to catch up with the demotion target. Disregard the other
|
||||
// slaves of the promotion target to avoid needless waiting.
|
||||
// The gtid:s of the demotion target were updated at the end of demotion.
|
||||
if (promotion_target->catchup_to_master(op))
|
||||
{
|
||||
MXS_INFO("Switchover: Catchup took %.1f seconds.", timer.lap().secs());
|
||||
// Step 4: On new master: remove slave connections, set read-only to OFF etc.
|
||||
// Step 3: On new master: remove slave connections, set read-only to OFF etc.
|
||||
if (promotion_target->promote(op))
|
||||
{
|
||||
// Point of no return. Even if following steps fail, do not try to undo.
|
||||
@ -724,7 +802,7 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
|
||||
m_next_master = promotion_target;
|
||||
}
|
||||
|
||||
// Step 5: Start replication on old master and redirect slaves.
|
||||
// Step 4: Start replication on old master and redirect slaves.
|
||||
ServerArray redirected_to_promo_target;
|
||||
if (demotion_target->copy_slave_conns(op, op.promotion_target_conns, promotion_target))
|
||||
{
|
||||
@ -735,17 +813,13 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
|
||||
MXS_WARNING("Could not copy slave connections from %s to %s.",
|
||||
promotion_target->name(), demotion_target->name());
|
||||
}
|
||||
redirect_slaves_ex(op, redirect_to_promo_target, demotion_target, promotion_target,
|
||||
&redirected_to_promo_target);
|
||||
|
||||
ServerArray redirected_to_demo_target;
|
||||
redirect_slaves_ex(op, redirect_to_demo_target, promotion_target, demotion_target,
|
||||
&redirected_to_demo_target);
|
||||
redirect_slaves_ex(op, &redirected_to_promo_target, &redirected_to_demo_target);
|
||||
|
||||
if (!redirected_to_promo_target.empty() || !redirected_to_demo_target.empty())
|
||||
{
|
||||
timer.restart();
|
||||
// Step 6: Finally, check that slaves are replicating.
|
||||
// Step 5: Finally, check that slaves are replicating.
|
||||
wait_cluster_stabilization(op, redirected_to_promo_target, promotion_target);
|
||||
wait_cluster_stabilization(op, redirected_to_demo_target, demotion_target);
|
||||
auto step6_duration = timer.lap();
|
||||
@ -758,7 +832,7 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
|
||||
|
||||
if (!catchup_and_promote_success)
|
||||
{
|
||||
// Step 3 or 4 failed, try to undo step 2.
|
||||
// Step 2 or 3 failed, try to undo step 2.
|
||||
const char QUERY_UNDO[] = "SET GLOBAL read_only=0;";
|
||||
if (mxs_mysql_query(demotion_target->m_server_base->con, QUERY_UNDO) == 0)
|
||||
{
|
||||
@ -793,11 +867,8 @@ bool MariaDBMonitor::failover_perform(ClusterOperation& op)
|
||||
mxb_assert(op.promotion_target && op.demotion_target);
|
||||
MariaDBServer* const promotion_target = op.promotion_target;
|
||||
|
||||
// Step 1: Populate a vector with all slaves not the selected master.
|
||||
ServerArray redirectable_slaves = get_redirectables(op.demotion_target, promotion_target);
|
||||
|
||||
bool rval = false;
|
||||
// Step 2: Stop and reset slave, set read-only to OFF.
|
||||
// Step 1: Stop and reset slave, set read-only to OFF.
|
||||
if (promotion_target->promote(op))
|
||||
{
|
||||
// Point of no return. Even if following steps fail, do not try to undo. Failover considered
|
||||
@ -810,13 +881,13 @@ bool MariaDBMonitor::failover_perform(ClusterOperation& op)
|
||||
m_next_master = promotion_target;
|
||||
}
|
||||
|
||||
// Step 3: Redirect slaves.
|
||||
// Step 2: Redirect slaves.
|
||||
ServerArray redirected_slaves;
|
||||
redirect_slaves_ex(op, redirectable_slaves, op.demotion_target, promotion_target, &redirected_slaves);
|
||||
redirect_slaves_ex(op, &redirected_slaves, NULL);
|
||||
if (!redirected_slaves.empty())
|
||||
{
|
||||
StopWatch timer;
|
||||
/* Step 4: Finally, check that slaves are connected to the new master. Even if
|
||||
/* Step 3: Finally, check that slaves are connected to the new master. Even if
|
||||
* time is out at this point, wait_cluster_stabilization() will check the slaves
|
||||
* once so that latest status is printed. */
|
||||
wait_cluster_stabilization(op, redirected_slaves, promotion_target);
|
||||
|
@ -286,9 +286,8 @@ private:
|
||||
ServerArray get_redirectables(const MariaDBServer* old_master, const MariaDBServer* ignored_slave);
|
||||
int redirect_slaves(MariaDBServer* new_master, const ServerArray& slaves,
|
||||
ServerArray* redirected_slaves);
|
||||
int redirect_slaves_ex(ClusterOperation& op, const ServerArray& slaves,
|
||||
const MariaDBServer* old_master, const MariaDBServer* new_master,
|
||||
ServerArray* redirected_slaves);
|
||||
int redirect_slaves_ex(ClusterOperation& op,
|
||||
ServerArray* redirected_to_promo, ServerArray* redirected_to_demo);
|
||||
bool start_external_replication(MariaDBServer* new_master, json_t** err_out);
|
||||
std::string generate_change_master_cmd(const std::string& master_host, int master_port);
|
||||
void wait_cluster_stabilization(ClusterOperation& op, const ServerArray& slaves,
|
||||
|
@ -1838,6 +1838,8 @@ bool MariaDBServer::merge_slave_conns(ClusterOperation& op, const SlaveStatusArr
|
||||
auto conn_can_be_merged = [this](const SlaveStatus& slave_conn, string* ignore_reason_out) -> bool {
|
||||
bool accepted = true;
|
||||
auto master_id = slave_conn.master_server_id;
|
||||
string my_host = m_server_base->server->address;
|
||||
int my_port = m_server_base->server->port;
|
||||
// The connection is only merged if it satisfies the copy-conditions. Merging has also
|
||||
// additional requirements.
|
||||
string ignore_reason;
|
||||
@ -1851,6 +1853,11 @@ bool MariaDBServer::merge_slave_conns(ClusterOperation& op, const SlaveStatusArr
|
||||
accepted = false;
|
||||
ignore_reason = string_printf("it points to %s (according to server id:s).", name());
|
||||
}
|
||||
else if (slave_conn.master_host == my_host && slave_conn.master_port == my_port)
|
||||
{
|
||||
accepted = false;
|
||||
ignore_reason = string_printf("it points to %s (according to master host:port).", name());
|
||||
}
|
||||
else
|
||||
{
|
||||
// Compare to connections already existing on this server.
|
||||
|
Reference in New Issue
Block a user