Redirect slaves of promotion target
This affects situations where the promoted server is a relay or multimaster group member.
This commit is contained in:
@ -421,10 +421,13 @@ int MariaDBMonitor::redirect_slaves(MariaDBServer* new_master, const ServerArray
|
|||||||
*
|
*
|
||||||
* @param op Operation descriptor
|
* @param op Operation descriptor
|
||||||
* @param slaves An array of slaves to redirect
|
* @param slaves An array of slaves to redirect
|
||||||
|
* @param old_master The connections to this server are redirected
|
||||||
|
* @param new_master The new master for the redirected connections
|
||||||
* @param redirected_slaves A vector where to insert successfully redirected slaves
|
* @param redirected_slaves A vector where to insert successfully redirected slaves
|
||||||
* @return The number of slaves successfully redirected
|
* @return The number of slaves successfully redirected
|
||||||
*/
|
*/
|
||||||
int MariaDBMonitor::redirect_slaves_ex(ClusterOperation& op, const ServerArray& slaves,
|
int MariaDBMonitor::redirect_slaves_ex(ClusterOperation& op, const ServerArray& slaves,
|
||||||
|
const MariaDBServer* old_master, const MariaDBServer* new_master,
|
||||||
ServerArray* redirected_slaves)
|
ServerArray* redirected_slaves)
|
||||||
{
|
{
|
||||||
mxb_assert(redirected_slaves != NULL);
|
mxb_assert(redirected_slaves != NULL);
|
||||||
@ -440,7 +443,7 @@ int MariaDBMonitor::redirect_slaves_ex(ClusterOperation& op, const ServerArray&
|
|||||||
int successes = 0;
|
int successes = 0;
|
||||||
for (MariaDBServer* redirectable : slaves)
|
for (MariaDBServer* redirectable : slaves)
|
||||||
{
|
{
|
||||||
if (redirectable->redirect_existing_slave_conn(op))
|
if (redirectable->redirect_existing_slave_conn(op, old_master, new_master))
|
||||||
{
|
{
|
||||||
successes++;
|
successes++;
|
||||||
redirected_slaves->push_back(redirectable);
|
redirected_slaves->push_back(redirectable);
|
||||||
@ -689,11 +692,12 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
|
|||||||
json_t** const error_out = op.error_out;
|
json_t** const error_out = op.error_out;
|
||||||
mxb_assert(promotion_target && demotion_target);
|
mxb_assert(promotion_target && demotion_target);
|
||||||
|
|
||||||
// Step 1: Save all slaves except promotion target to an array.
|
// Step 1a: Save all slaves except promotion target to an array.
|
||||||
// Try to redirect even disconnected slaves.
|
// Try to redirect even disconnected slaves.
|
||||||
// TODO: 'switchover_wait_slaves_catchup' needs to be smarter and not bother with such slaves.
|
ServerArray redirect_to_promo_target = get_redirectables(demotion_target, promotion_target);
|
||||||
ServerArray redirectable_slaves = get_redirectables(promotion_target, demotion_target);
|
// Step 1b: The slaves of the promotion target must be redirected to the old master. This
|
||||||
|
// list contains elements only when promoting a relay.
|
||||||
|
ServerArray redirect_to_demo_target = get_redirectables(promotion_target, demotion_target);
|
||||||
bool rval = false;
|
bool rval = false;
|
||||||
// Step 2: Set read-only to on, flush logs, update gtid:s.
|
// Step 2: Set read-only to on, flush logs, update gtid:s.
|
||||||
if (demotion_target->demote(op))
|
if (demotion_target->demote(op))
|
||||||
@ -721,23 +725,29 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Step 5: Start replication on old master and redirect slaves.
|
// Step 5: Start replication on old master and redirect slaves.
|
||||||
ServerArray redirected_slaves;
|
ServerArray redirected_to_promo_target;
|
||||||
if (demotion_target->copy_slave_conns(op, op.promotion_target_conns, promotion_target))
|
if (demotion_target->copy_slave_conns(op, op.promotion_target_conns, promotion_target))
|
||||||
{
|
{
|
||||||
redirected_slaves.push_back(demotion_target);
|
redirected_to_promo_target.push_back(demotion_target);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MXS_WARNING("Could not copy slave connections from %s to %s.",
|
MXS_WARNING("Could not copy slave connections from %s to %s.",
|
||||||
promotion_target->name(), demotion_target->name());
|
promotion_target->name(), demotion_target->name());
|
||||||
}
|
}
|
||||||
redirect_slaves_ex(op, redirectable_slaves, &redirected_slaves);
|
redirect_slaves_ex(op, redirect_to_promo_target, demotion_target, promotion_target,
|
||||||
|
&redirected_to_promo_target);
|
||||||
|
|
||||||
if (!redirected_slaves.empty())
|
ServerArray redirected_to_demo_target;
|
||||||
|
redirect_slaves_ex(op, redirect_to_demo_target, promotion_target, demotion_target,
|
||||||
|
&redirected_to_demo_target);
|
||||||
|
|
||||||
|
if (!redirected_to_promo_target.empty() || !redirected_to_demo_target.empty())
|
||||||
{
|
{
|
||||||
timer.restart();
|
timer.restart();
|
||||||
// Step 6: Finally, check that slaves are replicating.
|
// Step 6: Finally, check that slaves are replicating.
|
||||||
wait_cluster_stabilization(op, redirected_slaves);
|
wait_cluster_stabilization(op, redirected_to_promo_target, promotion_target);
|
||||||
|
wait_cluster_stabilization(op, redirected_to_demo_target, demotion_target);
|
||||||
auto step6_duration = timer.lap();
|
auto step6_duration = timer.lap();
|
||||||
MXS_INFO("Switchover: slave replication confirmation took %.1f seconds with "
|
MXS_INFO("Switchover: slave replication confirmation took %.1f seconds with "
|
||||||
"%.1f seconds to spare.",
|
"%.1f seconds to spare.",
|
||||||
@ -784,7 +794,7 @@ bool MariaDBMonitor::failover_perform(ClusterOperation& op)
|
|||||||
MariaDBServer* const promotion_target = op.promotion_target;
|
MariaDBServer* const promotion_target = op.promotion_target;
|
||||||
|
|
||||||
// Step 1: Populate a vector with all slaves not the selected master.
|
// Step 1: Populate a vector with all slaves not the selected master.
|
||||||
ServerArray redirectable_slaves = get_redirectables(promotion_target, op.demotion_target);
|
ServerArray redirectable_slaves = get_redirectables(op.demotion_target, promotion_target);
|
||||||
|
|
||||||
bool rval = false;
|
bool rval = false;
|
||||||
// Step 2: Stop and reset slave, set read-only to OFF.
|
// Step 2: Stop and reset slave, set read-only to OFF.
|
||||||
@ -802,14 +812,14 @@ bool MariaDBMonitor::failover_perform(ClusterOperation& op)
|
|||||||
|
|
||||||
// Step 3: Redirect slaves.
|
// Step 3: Redirect slaves.
|
||||||
ServerArray redirected_slaves;
|
ServerArray redirected_slaves;
|
||||||
redirect_slaves_ex(op, redirectable_slaves, &redirected_slaves);
|
redirect_slaves_ex(op, redirectable_slaves, op.demotion_target, promotion_target, &redirected_slaves);
|
||||||
if (!redirected_slaves.empty())
|
if (!redirected_slaves.empty())
|
||||||
{
|
{
|
||||||
StopWatch timer;
|
StopWatch timer;
|
||||||
/* Step 4: Finally, check that slaves are connected to the new master. Even if
|
/* Step 4: Finally, check that slaves are connected to the new master. Even if
|
||||||
* time is out at this point, wait_cluster_stabilization() will check the slaves
|
* time is out at this point, wait_cluster_stabilization() will check the slaves
|
||||||
* once so that latest status is printed. */
|
* once so that latest status is printed. */
|
||||||
wait_cluster_stabilization(op, redirected_slaves);
|
wait_cluster_stabilization(op, redirected_slaves, promotion_target);
|
||||||
MXS_INFO("Failover: slave replication confirmation took %.1f seconds with "
|
MXS_INFO("Failover: slave replication confirmation took %.1f seconds with "
|
||||||
"%.1f seconds to spare.",
|
"%.1f seconds to spare.",
|
||||||
timer.lap().secs(), op.time_remaining.secs());
|
timer.lap().secs(), op.time_remaining.secs());
|
||||||
@ -824,8 +834,10 @@ bool MariaDBMonitor::failover_perform(ClusterOperation& op)
|
|||||||
*
|
*
|
||||||
* @param op Operation descriptor
|
* @param op Operation descriptor
|
||||||
* @param redirected_slaves Slaves to check
|
* @param redirected_slaves Slaves to check
|
||||||
|
* @param new_master The target server of the slave connections
|
||||||
*/
|
*/
|
||||||
void MariaDBMonitor::wait_cluster_stabilization(ClusterOperation& op, const ServerArray& redirected_slaves)
|
void MariaDBMonitor::wait_cluster_stabilization(ClusterOperation& op, const ServerArray& redirected_slaves,
|
||||||
|
const MariaDBServer* new_master)
|
||||||
{
|
{
|
||||||
if (redirected_slaves.empty())
|
if (redirected_slaves.empty())
|
||||||
{
|
{
|
||||||
@ -834,7 +846,6 @@ void MariaDBMonitor::wait_cluster_stabilization(ClusterOperation& op, const Serv
|
|||||||
}
|
}
|
||||||
|
|
||||||
StopWatch timer;
|
StopWatch timer;
|
||||||
const MariaDBServer* new_master = op.promotion_target;
|
|
||||||
// Check all the servers in the list. Using a set because erasing from container.
|
// Check all the servers in the list. Using a set because erasing from container.
|
||||||
std::set<MariaDBServer*> unconfirmed(redirected_slaves.begin(), redirected_slaves.end());
|
std::set<MariaDBServer*> unconfirmed(redirected_slaves.begin(), redirected_slaves.end());
|
||||||
ServerArray successes;
|
ServerArray successes;
|
||||||
@ -1733,19 +1744,19 @@ bool MariaDBMonitor::check_gtid_replication(Log log_mode, const MariaDBServer* d
|
|||||||
/**
|
/**
|
||||||
* List slaves which should be redirected to the new master.
|
* List slaves which should be redirected to the new master.
|
||||||
*
|
*
|
||||||
* @param promotion_target The server which will be promoted
|
* @param old_master The server whose slaves are listed
|
||||||
* @param demotion_target The server which will be demoted
|
* @param ignored_slave A slave which should not be listed even if otherwise valid
|
||||||
* @return A list of slaves to redirect
|
* @return A list of slaves to redirect
|
||||||
*/
|
*/
|
||||||
ServerArray MariaDBMonitor::get_redirectables(const MariaDBServer* promotion_target,
|
ServerArray MariaDBMonitor::get_redirectables(const MariaDBServer* old_master,
|
||||||
const MariaDBServer* demotion_target)
|
const MariaDBServer* ignored_slave)
|
||||||
{
|
{
|
||||||
ServerArray redirectable_slaves;
|
ServerArray redirectable_slaves;
|
||||||
for (MariaDBServer* slave : demotion_target->m_node.children)
|
for (MariaDBServer* slave : old_master->m_node.children)
|
||||||
{
|
{
|
||||||
if (slave->is_usable() && slave != promotion_target)
|
if (slave->is_usable() && slave != ignored_slave)
|
||||||
{
|
{
|
||||||
auto sstatus = slave->slave_connection_status(demotion_target);
|
auto sstatus = slave->slave_connection_status(old_master);
|
||||||
if (sstatus && !sstatus->gtid_io_pos.empty())
|
if (sstatus && !sstatus->gtid_io_pos.empty())
|
||||||
{
|
{
|
||||||
redirectable_slaves.push_back(slave);
|
redirectable_slaves.push_back(slave);
|
||||||
|
@ -283,15 +283,16 @@ private:
|
|||||||
bool check_gtid_replication(Log log_mode, const MariaDBServer* demotion_target,
|
bool check_gtid_replication(Log log_mode, const MariaDBServer* demotion_target,
|
||||||
json_t** error_out);
|
json_t** error_out);
|
||||||
|
|
||||||
ServerArray get_redirectables(const MariaDBServer* promotion_target,
|
ServerArray get_redirectables(const MariaDBServer* old_master, const MariaDBServer* ignored_slave);
|
||||||
const MariaDBServer* demotion_target);
|
|
||||||
int redirect_slaves(MariaDBServer* new_master, const ServerArray& slaves,
|
int redirect_slaves(MariaDBServer* new_master, const ServerArray& slaves,
|
||||||
ServerArray* redirected_slaves);
|
ServerArray* redirected_slaves);
|
||||||
int redirect_slaves_ex(ClusterOperation& op, const ServerArray& slaves,
|
int redirect_slaves_ex(ClusterOperation& op, const ServerArray& slaves,
|
||||||
|
const MariaDBServer* old_master, const MariaDBServer* new_master,
|
||||||
ServerArray* redirected_slaves);
|
ServerArray* redirected_slaves);
|
||||||
bool start_external_replication(MariaDBServer* new_master, json_t** err_out);
|
bool start_external_replication(MariaDBServer* new_master, json_t** err_out);
|
||||||
std::string generate_change_master_cmd(const std::string& master_host, int master_port);
|
std::string generate_change_master_cmd(const std::string& master_host, int master_port);
|
||||||
void wait_cluster_stabilization(ClusterOperation& op, const ServerArray& slaves);
|
void wait_cluster_stabilization(ClusterOperation& op, const ServerArray& slaves,
|
||||||
|
const MariaDBServer* new_master);
|
||||||
void report_and_disable(const std::string& operation, const std::string& setting_name,
|
void report_and_disable(const std::string& operation, const std::string& setting_name,
|
||||||
bool* setting_var);
|
bool* setting_var);
|
||||||
|
|
||||||
|
@ -2041,15 +2041,14 @@ string MariaDBServer::generate_change_master_cmd(ClusterOperation& op, const Sla
|
|||||||
return change_cmd;
|
return change_cmd;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MariaDBServer::redirect_existing_slave_conn(ClusterOperation& op)
|
bool MariaDBServer::redirect_existing_slave_conn(ClusterOperation& op, const MariaDBServer* old_master,
|
||||||
|
const MariaDBServer* new_master)
|
||||||
{
|
{
|
||||||
StopWatch timer;
|
StopWatch timer;
|
||||||
const MariaDBServer* old_master = op.demotion_target;
|
|
||||||
const MariaDBServer* new_master = op.promotion_target;
|
|
||||||
|
|
||||||
auto old_conn = slave_connection_status(old_master);
|
auto old_conn = slave_connection_status(old_master);
|
||||||
mxb_assert(old_conn);
|
mxb_assert(old_conn);
|
||||||
bool success = false;
|
bool success = false;
|
||||||
|
|
||||||
// First, just stop the slave connection.
|
// First, just stop the slave connection.
|
||||||
bool stopped = stop_slave_conn(old_conn->name, StopMode::STOP_ONLY, op.time_remaining, op.error_out);
|
bool stopped = stop_slave_conn(old_conn->name, StopMode::STOP_ONLY, op.time_remaining, op.error_out);
|
||||||
op.time_remaining -= timer.restart();
|
op.time_remaining -= timer.restart();
|
||||||
|
@ -368,12 +368,15 @@ public:
|
|||||||
bool demote(ClusterOperation& op);
|
bool demote(ClusterOperation& op);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Redirect the slave connection going to demotion target to replicate from promotion target.
|
* Redirect the slave connection going to old master to replicate from new master.
|
||||||
*
|
*
|
||||||
* @param op Operation descriptor
|
* @param op Operation descriptor
|
||||||
|
* @param old_master The connection to this server is redirected
|
||||||
|
* @param new_master The new master for the redirected connection
|
||||||
* @return True on success
|
* @return True on success
|
||||||
*/
|
*/
|
||||||
bool redirect_existing_slave_conn(ClusterOperation& op);
|
bool redirect_existing_slave_conn(ClusterOperation& op, const MariaDBServer* old_master,
|
||||||
|
const MariaDBServer* new_master);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy slave connections to this server. This is usually needed during switchover promotion and on
|
* Copy slave connections to this server. This is usually needed during switchover promotion and on
|
||||||
|
Reference in New Issue
Block a user