MXS-1944 Use time limited methods in rejoin
Uses switchover time limit, since the typical rejoin of a standalone server is somewhat similar to a switchover.
This commit is contained in:
@ -510,7 +510,8 @@ int MariaDBMonitor::redirect_slaves_ex(GeneralOpData& general, OperationType typ
|
||||
else
|
||||
{
|
||||
// No conflict, redirect as normal.
|
||||
if (redirectable->redirect_existing_slave_conn(general, from, to))
|
||||
auto old_conn = redirectable->slave_connection_status(from);
|
||||
if (redirectable->redirect_existing_slave_conn(general, *old_conn, to))
|
||||
{
|
||||
successes++;
|
||||
redirected->push_back(redirectable);
|
||||
@ -587,35 +588,46 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers, json_t**
|
||||
uint32_t servers_joined = 0;
|
||||
if (!joinable_servers.empty())
|
||||
{
|
||||
string change_cmd = generate_change_master_cmd(master_server->address, master_server->port);
|
||||
for (MariaDBServer* joinable : joinable_servers)
|
||||
{
|
||||
const char* name = joinable->name();
|
||||
bool op_success = false;
|
||||
// Rejoin doesn't have its own time limit setting. Use switchover time limit for now since
|
||||
// the first phase of standalone rejoin is similar to switchover.
|
||||
maxbase::Duration time_limit((double)m_switchover_timeout);
|
||||
GeneralOpData op(m_replication_user, m_replication_password, output, time_limit);
|
||||
|
||||
if (joinable->m_slave_status.empty())
|
||||
{
|
||||
if (!m_demote_sql_file.empty() && !joinable->run_sql_from_file(m_demote_sql_file, output))
|
||||
// Assume that server is an old master which was failed over. Even if this is not really
|
||||
// the case, the following is unlikely to do damage.
|
||||
ServerOperation demotion(joinable, true, /* treat as old master */
|
||||
m_handle_event_scheduler, m_demote_sql_file, {} /* unused */);
|
||||
if (joinable->demote(demotion, op))
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output,
|
||||
"%s execution failed when attempting to rejoin server '%s'.",
|
||||
CN_DEMOTION_SQL_FILE,
|
||||
joinable->name());
|
||||
MXS_NOTICE("Directing standalone server '%s' to replicate from '%s'.", name, master_name);
|
||||
// A slave connection description is required. As this is the only connection, no name
|
||||
// is required.
|
||||
SlaveStatus new_conn;
|
||||
new_conn.master_host = master_server->address;
|
||||
new_conn.master_port = master_server->port;
|
||||
op_success = joinable->create_start_slave(op, new_conn);
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_NOTICE("Directing standalone server '%s' to replicate from '%s'.", name, master_name);
|
||||
op_success = joinable->join_cluster(change_cmd, m_handle_event_scheduler);
|
||||
PRINT_MXS_JSON_ERROR(output,
|
||||
"Failed to prepare (demote) standalone server %s for rejoin.", name);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_NOTICE("Server '%s' is replicating from a server other than '%s', "
|
||||
"redirecting it to '%s'.",
|
||||
name,
|
||||
master_name,
|
||||
master_name);
|
||||
op_success = joinable->redirect_one_slave(change_cmd);
|
||||
name, master_name, master_name);
|
||||
// Multisource replication does not get to this point.
|
||||
mxb_assert(joinable->m_slave_status.size() == 1);
|
||||
op_success = joinable->redirect_existing_slave_conn(op, joinable->m_slave_status[0],
|
||||
m_master);
|
||||
}
|
||||
|
||||
if (op_success)
|
||||
|
@ -750,42 +750,6 @@ bool MariaDBServer::redirect_one_slave(const string& change_cmd)
|
||||
return success;
|
||||
}
|
||||
|
||||
bool MariaDBServer::join_cluster(const string& change_cmd, bool disable_server_events)
|
||||
{
|
||||
/* Server does not have slave connections. This operation can fail, or the resulting
|
||||
* replication may end up broken. */
|
||||
bool success = false;
|
||||
MYSQL* server_conn = m_server_base->con;
|
||||
const char* query = "SET GLOBAL read_only=1;";
|
||||
if (mxs_mysql_query(server_conn, query) == 0)
|
||||
{
|
||||
if (disable_server_events)
|
||||
{
|
||||
// This is unlikely to change anything, since a restarted server does not have event scheduler
|
||||
// ON. If it were on and events were running while the server was standalone, its data would have
|
||||
// diverged from the rest of the cluster.
|
||||
disable_events(BinlogMode::BINLOG_OFF, NULL);
|
||||
}
|
||||
query = "CHANGE MASTER TO ..."; // Don't show the real query as it contains a password.
|
||||
if (mxs_mysql_query(server_conn, change_cmd.c_str()) == 0)
|
||||
{
|
||||
query = "START SLAVE;";
|
||||
if (mxs_mysql_query(server_conn, query) == 0)
|
||||
{
|
||||
success = true;
|
||||
MXS_NOTICE("Standalone server '%s' starting replication.", name());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!success)
|
||||
{
|
||||
const char ERROR_MSG[] = "Standalone server '%s' failed to start replication: '%s'. Query: '%s'.";
|
||||
MXS_WARNING(ERROR_MSG, name(), mysql_error(server_conn), query);
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
bool MariaDBServer::run_sql_from_file(const string& path, json_t** error_out)
|
||||
{
|
||||
MYSQL* conn = m_server_base->con;
|
||||
@ -1579,7 +1543,8 @@ bool MariaDBServer::demote(ServerOperation& demo_op, GeneralOpData& general)
|
||||
bool demotion_error = false;
|
||||
if (demo_op.to_from_master)
|
||||
{
|
||||
mxb_assert(is_master());
|
||||
// The server should either be the master or be a standalone being rejoined.
|
||||
mxb_assert(is_master() || m_slave_status.empty());
|
||||
StopWatch timer;
|
||||
// Step 2a: Enabling read-only can take time if writes are on or table locks taken.
|
||||
// TODO: use max_statement_time to be safe!
|
||||
@ -2002,13 +1967,6 @@ bool MariaDBServer::copy_slave_conns(GeneralOpData& op, const SlaveStatusArray&
|
||||
return !start_slave_error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new slave connection on the server and start it.
|
||||
*
|
||||
* @param op Operation descriptor
|
||||
* @param slave_conn Existing connection to emulate
|
||||
* @return True on success
|
||||
*/
|
||||
bool MariaDBServer::create_start_slave(GeneralOpData& op, const SlaveStatus& slave_conn)
|
||||
{
|
||||
maxbase::Duration& time_remaining = op.time_remaining;
|
||||
@ -2070,22 +2028,20 @@ string MariaDBServer::generate_change_master_cmd(GeneralOpData& op, const SlaveS
|
||||
return change_cmd;
|
||||
}
|
||||
|
||||
bool MariaDBServer::redirect_existing_slave_conn(GeneralOpData& op, const MariaDBServer* old_master,
|
||||
bool MariaDBServer::redirect_existing_slave_conn(GeneralOpData& op, const SlaveStatus& old_conn,
|
||||
const MariaDBServer* new_master)
|
||||
{
|
||||
auto error_out = op.error_out;
|
||||
maxbase::Duration& time_remaining = op.time_remaining;
|
||||
StopWatch timer;
|
||||
auto old_conn = slave_connection_status(old_master);
|
||||
mxb_assert(old_conn);
|
||||
bool success = false;
|
||||
|
||||
// First, just stop the slave connection.
|
||||
bool stopped = stop_slave_conn(old_conn->name, StopMode::STOP_ONLY, time_remaining, error_out);
|
||||
bool stopped = stop_slave_conn(old_conn.name, StopMode::STOP_ONLY, time_remaining, error_out);
|
||||
time_remaining -= timer.restart();
|
||||
if (stopped)
|
||||
{
|
||||
SlaveStatus modified_conn = *old_conn;
|
||||
SlaveStatus modified_conn = old_conn;
|
||||
SERVER* target_server = new_master->m_server_base->server;
|
||||
modified_conn.master_host = target_server->address;
|
||||
modified_conn.master_port = target_server->port;
|
||||
@ -2095,7 +2051,7 @@ bool MariaDBServer::redirect_existing_slave_conn(GeneralOpData& op, const MariaD
|
||||
time_remaining -= timer.restart();
|
||||
if (changed)
|
||||
{
|
||||
string start = string_printf("START SLAVE '%s';", old_conn->name.c_str());
|
||||
string start = string_printf("START SLAVE '%s';", old_conn.name.c_str());
|
||||
bool started = execute_cmd_time_limit(start, time_remaining, &error_msg);
|
||||
time_remaining -= timer.restart();
|
||||
if (started)
|
||||
@ -2114,7 +2070,7 @@ bool MariaDBServer::redirect_existing_slave_conn(GeneralOpData& op, const MariaD
|
||||
// TODO: This may currently print out passwords.
|
||||
PRINT_MXS_JSON_ERROR(error_out,
|
||||
"%s could not be redirected to [%s]:%i: %s",
|
||||
old_conn->to_short_string().c_str(),
|
||||
old_conn.to_short_string().c_str(),
|
||||
modified_conn.master_host.c_str(), modified_conn.master_port,
|
||||
error_msg.c_str());
|
||||
}
|
||||
|
@ -278,15 +278,6 @@ public:
|
||||
*/
|
||||
bool redirect_one_slave(const std::string& change_cmd);
|
||||
|
||||
/**
|
||||
* Joins this standalone server to the cluster.
|
||||
*
|
||||
* @param change_cmd Change master command
|
||||
* @param disable_server_events Should events be disabled on the server
|
||||
* @return True if commands were accepted by server
|
||||
*/
|
||||
bool join_cluster(const std::string& change_cmd, bool disable_server_events);
|
||||
|
||||
/**
|
||||
* Check if the server can be demoted by switchover.
|
||||
*
|
||||
@ -372,11 +363,11 @@ public:
|
||||
* Redirect the slave connection going to old master to replicate from new master.
|
||||
*
|
||||
* @param op Operation descriptor
|
||||
* @param old_master The connection to this server is redirected
|
||||
* @param old_conn The connection which is redirected
|
||||
* @param new_master The new master for the redirected connection
|
||||
* @return True on success
|
||||
*/
|
||||
bool redirect_existing_slave_conn(GeneralOpData& op, const MariaDBServer* old_master,
|
||||
bool redirect_existing_slave_conn(GeneralOpData& op, const SlaveStatus& old_conn,
|
||||
const MariaDBServer* new_master);
|
||||
|
||||
/**
|
||||
@ -395,6 +386,15 @@ public:
|
||||
bool copy_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_copy,
|
||||
const MariaDBServer* replacement);
|
||||
|
||||
/**
|
||||
* Create a new slave connection on the server and start it.
|
||||
*
|
||||
* @param op Operation descriptor
|
||||
* @param slave_conn Existing connection to emulate
|
||||
* @return True on success
|
||||
*/
|
||||
bool create_start_slave(GeneralOpData& op, const SlaveStatus& slave_conn);
|
||||
|
||||
/**
|
||||
* Is binary log on? 'update_replication_settings' should be ran before this function to query the data.
|
||||
*
|
||||
@ -545,6 +545,5 @@ private:
|
||||
|
||||
bool set_read_only(ReadOnlySetting value, maxbase::Duration time_limit, json_t** error_out);
|
||||
bool merge_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_merge);
|
||||
bool create_start_slave(GeneralOpData& op, const SlaveStatus& slave_conn);
|
||||
std::string generate_change_master_cmd(GeneralOpData& op, const SlaveStatus& slave_conn);
|
||||
};
|
||||
|
Reference in New Issue
Block a user