MXS-1944 Use time limited methods in rejoin
Uses switchover time limit, since the typical rejoin of a standalone server is somewhat similar to a switchover.
This commit is contained in:
@ -510,7 +510,8 @@ int MariaDBMonitor::redirect_slaves_ex(GeneralOpData& general, OperationType typ
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// No conflict, redirect as normal.
|
// No conflict, redirect as normal.
|
||||||
if (redirectable->redirect_existing_slave_conn(general, from, to))
|
auto old_conn = redirectable->slave_connection_status(from);
|
||||||
|
if (redirectable->redirect_existing_slave_conn(general, *old_conn, to))
|
||||||
{
|
{
|
||||||
successes++;
|
successes++;
|
||||||
redirected->push_back(redirectable);
|
redirected->push_back(redirectable);
|
||||||
@ -587,35 +588,46 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers, json_t**
|
|||||||
uint32_t servers_joined = 0;
|
uint32_t servers_joined = 0;
|
||||||
if (!joinable_servers.empty())
|
if (!joinable_servers.empty())
|
||||||
{
|
{
|
||||||
string change_cmd = generate_change_master_cmd(master_server->address, master_server->port);
|
|
||||||
for (MariaDBServer* joinable : joinable_servers)
|
for (MariaDBServer* joinable : joinable_servers)
|
||||||
{
|
{
|
||||||
const char* name = joinable->name();
|
const char* name = joinable->name();
|
||||||
bool op_success = false;
|
bool op_success = false;
|
||||||
|
// Rejoin doesn't have its own time limit setting. Use switchover time limit for now since
|
||||||
|
// the first phase of standalone rejoin is similar to switchover.
|
||||||
|
maxbase::Duration time_limit((double)m_switchover_timeout);
|
||||||
|
GeneralOpData op(m_replication_user, m_replication_password, output, time_limit);
|
||||||
|
|
||||||
if (joinable->m_slave_status.empty())
|
if (joinable->m_slave_status.empty())
|
||||||
{
|
{
|
||||||
if (!m_demote_sql_file.empty() && !joinable->run_sql_from_file(m_demote_sql_file, output))
|
// Assume that server is an old master which was failed over. Even if this is not really
|
||||||
|
// the case, the following is unlikely to do damage.
|
||||||
|
ServerOperation demotion(joinable, true, /* treat as old master */
|
||||||
|
m_handle_event_scheduler, m_demote_sql_file, {} /* unused */);
|
||||||
|
if (joinable->demote(demotion, op))
|
||||||
{
|
{
|
||||||
PRINT_MXS_JSON_ERROR(output,
|
MXS_NOTICE("Directing standalone server '%s' to replicate from '%s'.", name, master_name);
|
||||||
"%s execution failed when attempting to rejoin server '%s'.",
|
// A slave connection description is required. As this is the only connection, no name
|
||||||
CN_DEMOTION_SQL_FILE,
|
// is required.
|
||||||
joinable->name());
|
SlaveStatus new_conn;
|
||||||
|
new_conn.master_host = master_server->address;
|
||||||
|
new_conn.master_port = master_server->port;
|
||||||
|
op_success = joinable->create_start_slave(op, new_conn);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MXS_NOTICE("Directing standalone server '%s' to replicate from '%s'.", name, master_name);
|
PRINT_MXS_JSON_ERROR(output,
|
||||||
op_success = joinable->join_cluster(change_cmd, m_handle_event_scheduler);
|
"Failed to prepare (demote) standalone server %s for rejoin.", name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MXS_NOTICE("Server '%s' is replicating from a server other than '%s', "
|
MXS_NOTICE("Server '%s' is replicating from a server other than '%s', "
|
||||||
"redirecting it to '%s'.",
|
"redirecting it to '%s'.",
|
||||||
name,
|
name, master_name, master_name);
|
||||||
master_name,
|
// Multisource replication does not get to this point.
|
||||||
master_name);
|
mxb_assert(joinable->m_slave_status.size() == 1);
|
||||||
op_success = joinable->redirect_one_slave(change_cmd);
|
op_success = joinable->redirect_existing_slave_conn(op, joinable->m_slave_status[0],
|
||||||
|
m_master);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (op_success)
|
if (op_success)
|
||||||
|
|||||||
@ -750,42 +750,6 @@ bool MariaDBServer::redirect_one_slave(const string& change_cmd)
|
|||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MariaDBServer::join_cluster(const string& change_cmd, bool disable_server_events)
|
|
||||||
{
|
|
||||||
/* Server does not have slave connections. This operation can fail, or the resulting
|
|
||||||
* replication may end up broken. */
|
|
||||||
bool success = false;
|
|
||||||
MYSQL* server_conn = m_server_base->con;
|
|
||||||
const char* query = "SET GLOBAL read_only=1;";
|
|
||||||
if (mxs_mysql_query(server_conn, query) == 0)
|
|
||||||
{
|
|
||||||
if (disable_server_events)
|
|
||||||
{
|
|
||||||
// This is unlikely to change anything, since a restarted server does not have event scheduler
|
|
||||||
// ON. If it were on and events were running while the server was standalone, its data would have
|
|
||||||
// diverged from the rest of the cluster.
|
|
||||||
disable_events(BinlogMode::BINLOG_OFF, NULL);
|
|
||||||
}
|
|
||||||
query = "CHANGE MASTER TO ..."; // Don't show the real query as it contains a password.
|
|
||||||
if (mxs_mysql_query(server_conn, change_cmd.c_str()) == 0)
|
|
||||||
{
|
|
||||||
query = "START SLAVE;";
|
|
||||||
if (mxs_mysql_query(server_conn, query) == 0)
|
|
||||||
{
|
|
||||||
success = true;
|
|
||||||
MXS_NOTICE("Standalone server '%s' starting replication.", name());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!success)
|
|
||||||
{
|
|
||||||
const char ERROR_MSG[] = "Standalone server '%s' failed to start replication: '%s'. Query: '%s'.";
|
|
||||||
MXS_WARNING(ERROR_MSG, name(), mysql_error(server_conn), query);
|
|
||||||
}
|
|
||||||
return success;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool MariaDBServer::run_sql_from_file(const string& path, json_t** error_out)
|
bool MariaDBServer::run_sql_from_file(const string& path, json_t** error_out)
|
||||||
{
|
{
|
||||||
MYSQL* conn = m_server_base->con;
|
MYSQL* conn = m_server_base->con;
|
||||||
@ -1579,7 +1543,8 @@ bool MariaDBServer::demote(ServerOperation& demo_op, GeneralOpData& general)
|
|||||||
bool demotion_error = false;
|
bool demotion_error = false;
|
||||||
if (demo_op.to_from_master)
|
if (demo_op.to_from_master)
|
||||||
{
|
{
|
||||||
mxb_assert(is_master());
|
// The server should either be the master or be a standalone being rejoined.
|
||||||
|
mxb_assert(is_master() || m_slave_status.empty());
|
||||||
StopWatch timer;
|
StopWatch timer;
|
||||||
// Step 2a: Enabling read-only can take time if writes are on or table locks taken.
|
// Step 2a: Enabling read-only can take time if writes are on or table locks taken.
|
||||||
// TODO: use max_statement_time to be safe!
|
// TODO: use max_statement_time to be safe!
|
||||||
@ -2002,13 +1967,6 @@ bool MariaDBServer::copy_slave_conns(GeneralOpData& op, const SlaveStatusArray&
|
|||||||
return !start_slave_error;
|
return !start_slave_error;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new slave connection on the server and start it.
|
|
||||||
*
|
|
||||||
* @param op Operation descriptor
|
|
||||||
* @param slave_conn Existing connection to emulate
|
|
||||||
* @return True on success
|
|
||||||
*/
|
|
||||||
bool MariaDBServer::create_start_slave(GeneralOpData& op, const SlaveStatus& slave_conn)
|
bool MariaDBServer::create_start_slave(GeneralOpData& op, const SlaveStatus& slave_conn)
|
||||||
{
|
{
|
||||||
maxbase::Duration& time_remaining = op.time_remaining;
|
maxbase::Duration& time_remaining = op.time_remaining;
|
||||||
@ -2070,22 +2028,20 @@ string MariaDBServer::generate_change_master_cmd(GeneralOpData& op, const SlaveS
|
|||||||
return change_cmd;
|
return change_cmd;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MariaDBServer::redirect_existing_slave_conn(GeneralOpData& op, const MariaDBServer* old_master,
|
bool MariaDBServer::redirect_existing_slave_conn(GeneralOpData& op, const SlaveStatus& old_conn,
|
||||||
const MariaDBServer* new_master)
|
const MariaDBServer* new_master)
|
||||||
{
|
{
|
||||||
auto error_out = op.error_out;
|
auto error_out = op.error_out;
|
||||||
maxbase::Duration& time_remaining = op.time_remaining;
|
maxbase::Duration& time_remaining = op.time_remaining;
|
||||||
StopWatch timer;
|
StopWatch timer;
|
||||||
auto old_conn = slave_connection_status(old_master);
|
|
||||||
mxb_assert(old_conn);
|
|
||||||
bool success = false;
|
bool success = false;
|
||||||
|
|
||||||
// First, just stop the slave connection.
|
// First, just stop the slave connection.
|
||||||
bool stopped = stop_slave_conn(old_conn->name, StopMode::STOP_ONLY, time_remaining, error_out);
|
bool stopped = stop_slave_conn(old_conn.name, StopMode::STOP_ONLY, time_remaining, error_out);
|
||||||
time_remaining -= timer.restart();
|
time_remaining -= timer.restart();
|
||||||
if (stopped)
|
if (stopped)
|
||||||
{
|
{
|
||||||
SlaveStatus modified_conn = *old_conn;
|
SlaveStatus modified_conn = old_conn;
|
||||||
SERVER* target_server = new_master->m_server_base->server;
|
SERVER* target_server = new_master->m_server_base->server;
|
||||||
modified_conn.master_host = target_server->address;
|
modified_conn.master_host = target_server->address;
|
||||||
modified_conn.master_port = target_server->port;
|
modified_conn.master_port = target_server->port;
|
||||||
@ -2095,7 +2051,7 @@ bool MariaDBServer::redirect_existing_slave_conn(GeneralOpData& op, const MariaD
|
|||||||
time_remaining -= timer.restart();
|
time_remaining -= timer.restart();
|
||||||
if (changed)
|
if (changed)
|
||||||
{
|
{
|
||||||
string start = string_printf("START SLAVE '%s';", old_conn->name.c_str());
|
string start = string_printf("START SLAVE '%s';", old_conn.name.c_str());
|
||||||
bool started = execute_cmd_time_limit(start, time_remaining, &error_msg);
|
bool started = execute_cmd_time_limit(start, time_remaining, &error_msg);
|
||||||
time_remaining -= timer.restart();
|
time_remaining -= timer.restart();
|
||||||
if (started)
|
if (started)
|
||||||
@ -2114,7 +2070,7 @@ bool MariaDBServer::redirect_existing_slave_conn(GeneralOpData& op, const MariaD
|
|||||||
// TODO: This may currently print out passwords.
|
// TODO: This may currently print out passwords.
|
||||||
PRINT_MXS_JSON_ERROR(error_out,
|
PRINT_MXS_JSON_ERROR(error_out,
|
||||||
"%s could not be redirected to [%s]:%i: %s",
|
"%s could not be redirected to [%s]:%i: %s",
|
||||||
old_conn->to_short_string().c_str(),
|
old_conn.to_short_string().c_str(),
|
||||||
modified_conn.master_host.c_str(), modified_conn.master_port,
|
modified_conn.master_host.c_str(), modified_conn.master_port,
|
||||||
error_msg.c_str());
|
error_msg.c_str());
|
||||||
}
|
}
|
||||||
|
|||||||
@ -278,15 +278,6 @@ public:
|
|||||||
*/
|
*/
|
||||||
bool redirect_one_slave(const std::string& change_cmd);
|
bool redirect_one_slave(const std::string& change_cmd);
|
||||||
|
|
||||||
/**
|
|
||||||
* Joins this standalone server to the cluster.
|
|
||||||
*
|
|
||||||
* @param change_cmd Change master command
|
|
||||||
* @param disable_server_events Should events be disabled on the server
|
|
||||||
* @return True if commands were accepted by server
|
|
||||||
*/
|
|
||||||
bool join_cluster(const std::string& change_cmd, bool disable_server_events);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if the server can be demoted by switchover.
|
* Check if the server can be demoted by switchover.
|
||||||
*
|
*
|
||||||
@ -372,11 +363,11 @@ public:
|
|||||||
* Redirect the slave connection going to old master to replicate from new master.
|
* Redirect the slave connection going to old master to replicate from new master.
|
||||||
*
|
*
|
||||||
* @param op Operation descriptor
|
* @param op Operation descriptor
|
||||||
* @param old_master The connection to this server is redirected
|
* @param old_conn The connection which is redirected
|
||||||
* @param new_master The new master for the redirected connection
|
* @param new_master The new master for the redirected connection
|
||||||
* @return True on success
|
* @return True on success
|
||||||
*/
|
*/
|
||||||
bool redirect_existing_slave_conn(GeneralOpData& op, const MariaDBServer* old_master,
|
bool redirect_existing_slave_conn(GeneralOpData& op, const SlaveStatus& old_conn,
|
||||||
const MariaDBServer* new_master);
|
const MariaDBServer* new_master);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -395,6 +386,15 @@ public:
|
|||||||
bool copy_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_copy,
|
bool copy_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_copy,
|
||||||
const MariaDBServer* replacement);
|
const MariaDBServer* replacement);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new slave connection on the server and start it.
|
||||||
|
*
|
||||||
|
* @param op Operation descriptor
|
||||||
|
* @param slave_conn Existing connection to emulate
|
||||||
|
* @return True on success
|
||||||
|
*/
|
||||||
|
bool create_start_slave(GeneralOpData& op, const SlaveStatus& slave_conn);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is binary log on? 'update_replication_settings' should be ran before this function to query the data.
|
* Is binary log on? 'update_replication_settings' should be ran before this function to query the data.
|
||||||
*
|
*
|
||||||
@ -545,6 +545,5 @@ private:
|
|||||||
|
|
||||||
bool set_read_only(ReadOnlySetting value, maxbase::Duration time_limit, json_t** error_out);
|
bool set_read_only(ReadOnlySetting value, maxbase::Duration time_limit, json_t** error_out);
|
||||||
bool merge_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_merge);
|
bool merge_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_merge);
|
||||||
bool create_start_slave(GeneralOpData& op, const SlaveStatus& slave_conn);
|
|
||||||
std::string generate_change_master_cmd(GeneralOpData& op, const SlaveStatus& slave_conn);
|
std::string generate_change_master_cmd(GeneralOpData& op, const SlaveStatus& slave_conn);
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user