MXS-1845 Add redirection code
Should work with multimaster replication.
This commit is contained in:
@ -22,6 +22,7 @@
|
||||
using std::string;
|
||||
using std::unique_ptr;
|
||||
using maxscale::string_printf;
|
||||
using maxbase::StopWatch;
|
||||
|
||||
static const char RE_ENABLE_FMT[] = "To re-enable automatic %s, manually set '%s' to 'true' "
|
||||
"for monitor '%s' via MaxAdmin or the REST API, or restart MaxScale.";
|
||||
@ -417,6 +418,46 @@ int MariaDBMonitor::redirect_slaves(MariaDBServer* new_master,
|
||||
return successes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Redirect slaves to replicate from the promotion target.
|
||||
*
|
||||
* @param op Operation descriptor
|
||||
* @param slaves An array of slaves to redirect
|
||||
* @param redirected_slaves A vector where to insert successfully redirected slaves
|
||||
* @return The number of slaves successfully redirected
|
||||
*/
|
||||
int MariaDBMonitor::redirect_slaves_ex(ClusterOperation& op, const ServerArray& slaves,
|
||||
ServerArray* redirected_slaves)
|
||||
{
|
||||
mxb_assert(redirected_slaves != NULL);
|
||||
if (slaves.empty())
|
||||
{
|
||||
// This is ok, nothing to do.
|
||||
return 0;
|
||||
}
|
||||
|
||||
string slave_names = monitored_servers_to_string(slaves);
|
||||
MXS_NOTICE("Redirecting %s to replicate from %s instead of %s.",
|
||||
slave_names.c_str(), op.promotion_target->name(), op.demotion_target->name());
|
||||
int successes = 0;
|
||||
for (MariaDBServer* redirectable : slaves)
|
||||
{
|
||||
if (redirectable->redirect_existing_slave_conn(op))
|
||||
{
|
||||
successes++;
|
||||
redirected_slaves->push_back(redirectable);
|
||||
}
|
||||
}
|
||||
if (size_t(successes) == slaves.size())
|
||||
{
|
||||
MXS_NOTICE("All redirects successful.");
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_WARNING("%lu out of %lu redirects failed.", slaves.size() - successes, slaves.size());
|
||||
}
|
||||
return successes;
|
||||
}
|
||||
/**
|
||||
* Set the new master to replicate from the cluster external master.
|
||||
*
|
||||
@ -723,13 +764,11 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
|
||||
{
|
||||
redirected_slaves.push_back(demotion_target);
|
||||
}
|
||||
int redirects = redirect_slaves(promotion_target, redirectable_slaves, &redirected_slaves);
|
||||
int redirects = redirect_slaves_ex(op, redirectable_slaves, &redirected_slaves);
|
||||
|
||||
bool success = redirectable_slaves.empty() ? start_ok : start_ok || redirects > 0;
|
||||
if (success)
|
||||
{
|
||||
op.time_remaining -= timer.restart();
|
||||
|
||||
// Step 6: Finally, add an event to the new master to advance gtid and wait for the slaves
|
||||
// to receive it. If using external replication, skip this step. Come up with an
|
||||
// alternative later.
|
||||
@ -793,7 +832,6 @@ bool MariaDBMonitor::failover_perform(ClusterOperation& op)
|
||||
{
|
||||
mxb_assert(op.promotion_target && op.demotion_target);
|
||||
MariaDBServer* const promotion_target = op.promotion_target;
|
||||
maxbase::StopWatch timer;
|
||||
|
||||
// Step 1: Populate a vector with all slaves not the selected master.
|
||||
ServerArray redirectable_slaves = get_redirectables(promotion_target, op.demotion_target);
|
||||
@ -802,17 +840,17 @@ bool MariaDBMonitor::failover_perform(ClusterOperation& op)
|
||||
// Step 2: Stop and reset slave, set read-only to 0.
|
||||
if (promotion_target->promote(op))
|
||||
{
|
||||
// Point of no return. Even if following steps fail, do not try to undo.
|
||||
m_next_master = promotion_target;
|
||||
m_cluster_modified = true;
|
||||
|
||||
// Step 3: Redirect slaves.
|
||||
ServerArray redirected_slaves;
|
||||
int redirects = redirect_slaves(promotion_target, redirectable_slaves, &redirected_slaves);
|
||||
int redirects = redirect_slaves_ex(op, redirectable_slaves, &redirected_slaves);
|
||||
bool success = redirectable_slaves.empty() ? true : redirects > 0;
|
||||
if (success)
|
||||
{
|
||||
op.time_remaining -= timer.restart();
|
||||
|
||||
StopWatch timer;
|
||||
// Step 4: Finally, add an event to the new master to advance gtid and wait for the slaves
|
||||
// to receive it. seconds_remaining can be 0 or less at this point. Even in such a case
|
||||
// wait_cluster_stabilization() may succeed if replication is fast enough. If using external
|
||||
@ -1679,9 +1717,9 @@ void MariaDBMonitor::check_cluster_operations_support()
|
||||
* @return The first connected slave or NULL if none found
|
||||
*/
|
||||
const MariaDBServer* MariaDBMonitor::slave_receiving_events(const MariaDBServer* demotion_target,
|
||||
maxbase::Duration* event_age_out)
|
||||
maxbase::Duration* event_age_out)
|
||||
{
|
||||
auto time_now = maxbase::Clock::now();
|
||||
auto time_now = maxbase::Clock::now();
|
||||
maxbase::Clock::time_point alive_after = time_now - std::chrono::seconds(m_master_failure_timeout);
|
||||
|
||||
const MariaDBServer* connected_slave = NULL;
|
||||
|
@ -273,6 +273,8 @@ private:
|
||||
int redirect_slaves(MariaDBServer* new_master,
|
||||
const ServerArray& slaves,
|
||||
ServerArray* redirected_slaves);
|
||||
int redirect_slaves_ex(ClusterOperation& op, const ServerArray& slaves,
|
||||
ServerArray* redirected_slaves);
|
||||
std::string generate_change_master_cmd(const std::string& master_host, int master_port);
|
||||
bool start_external_replication(MariaDBServer* new_master, json_t** err_out);
|
||||
bool wait_cluster_stabilization(MariaDBServer* new_master,
|
||||
|
@ -1824,7 +1824,7 @@ bool MariaDBServer::create_start_slave(ClusterOperation& op, const SlaveStatus&
|
||||
/**
|
||||
* Generate a CHANGE MASTER TO-query.
|
||||
*
|
||||
* @param op Operation descriptor
|
||||
* @param op Operation descriptor, required for username and password
|
||||
* @param slave_conn Existing slave connection to emulate
|
||||
* @return Generated query
|
||||
*/
|
||||
@ -1832,8 +1832,8 @@ string MariaDBServer::generate_change_master_cmd(ClusterOperation& op, const Sla
|
||||
{
|
||||
string change_cmd;
|
||||
change_cmd += string_printf("CHANGE MASTER '%s' TO MASTER_HOST = '%s', MASTER_PORT = %i, ",
|
||||
slave_conn.name.c_str(), slave_conn.master_host.c_str(),
|
||||
slave_conn.master_port);
|
||||
slave_conn.name.c_str(),
|
||||
slave_conn.master_host.c_str(), slave_conn.master_port);
|
||||
change_cmd += "MASTER_USE_GTID = current_pos, ";
|
||||
change_cmd += string_printf("MASTER_USER = '%s', ", op.replication_user.c_str());
|
||||
const char MASTER_PW[] = "MASTER_PASSWORD = '%s';";
|
||||
@ -1846,6 +1846,58 @@ string MariaDBServer::generate_change_master_cmd(ClusterOperation& op, const Sla
|
||||
return change_cmd;
|
||||
}
|
||||
|
||||
bool MariaDBServer::redirect_existing_slave_conn(ClusterOperation& op)
|
||||
{
|
||||
StopWatch timer;
|
||||
const MariaDBServer* old_master = op.demotion_target;
|
||||
const MariaDBServer* new_master = op.promotion_target;
|
||||
|
||||
auto old_conn = slave_connection_status_mutable(old_master);
|
||||
mxb_assert(old_conn);
|
||||
bool success = false;
|
||||
// First, just stop the slave connection.
|
||||
bool stopped = stop_slave_conn(old_conn, StopMode::STOP_ONLY, op.time_remaining, op.error_out);
|
||||
op.time_remaining -= timer.restart();
|
||||
if (stopped)
|
||||
{
|
||||
SlaveStatus modified_conn = *old_conn;
|
||||
SERVER* target_server = new_master->m_server_base->server;
|
||||
modified_conn.master_host = target_server->address;
|
||||
modified_conn.master_port = target_server->port;
|
||||
string change_master = generate_change_master_cmd(op, modified_conn);
|
||||
string error_msg;
|
||||
bool changed = execute_cmd_time_limit(change_master, op.time_remaining, &error_msg);
|
||||
op.time_remaining -= timer.restart();
|
||||
if (changed)
|
||||
{
|
||||
string start = string_printf("START SLAVE '%s';", old_conn->name.c_str());
|
||||
bool started = execute_cmd_time_limit(start, op.time_remaining, &error_msg);
|
||||
op.time_remaining -= timer.restart();
|
||||
if (started)
|
||||
{
|
||||
success = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(op.error_out,
|
||||
"%s could not be started: %s",
|
||||
modified_conn.to_short_string(name()).c_str(),
|
||||
error_msg.c_str());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: This may currently print out passwords.
|
||||
PRINT_MXS_JSON_ERROR(op.error_out,
|
||||
"%s could not be redirected to [%s]:%i: %s",
|
||||
old_conn->to_short_string(name()).c_str(),
|
||||
modified_conn.master_host.c_str(), modified_conn.master_port,
|
||||
error_msg.c_str());
|
||||
}
|
||||
} // 'stop_slave_conn' prints its own errors
|
||||
return success;
|
||||
}
|
||||
|
||||
string SlaveStatus::to_string() const
|
||||
{
|
||||
// Print all of this on the same line to make things compact. Are the widths reasonable? The format is
|
||||
|
@ -502,6 +502,14 @@ public:
|
||||
*/
|
||||
bool promote(ClusterOperation& operation);
|
||||
|
||||
/**
|
||||
* Redirect the slave connection going to demotion target to replicate from promotion target.
|
||||
*
|
||||
* @param op Operation descriptor
|
||||
* @return True on success
|
||||
*/
|
||||
bool redirect_existing_slave_conn(ClusterOperation& op);
|
||||
|
||||
private:
|
||||
class EventInfo;
|
||||
typedef std::function<void (const EventInfo&, json_t** error_out)> ManipulatorFunc;
|
||||
|
Reference in New Issue
Block a user