From e4e2235297e5eb017b11cacda71458bd5041aad9 Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Tue, 6 Nov 2018 11:12:51 +0200 Subject: [PATCH] MXS-1944 Use time limited methods in rejoin Uses switchover time limit, since the typical rejoin of a standalone server is somewhat similar to a switchover. --- .../mariadbmon/cluster_manipulation.cc | 38 +++++++----- .../monitor/mariadbmon/mariadbserver.cc | 58 +++---------------- .../monitor/mariadbmon/mariadbserver.hh | 23 ++++---- 3 files changed, 43 insertions(+), 76 deletions(-) diff --git a/server/modules/monitor/mariadbmon/cluster_manipulation.cc b/server/modules/monitor/mariadbmon/cluster_manipulation.cc index fb4233fb0..30b57ada6 100644 --- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc +++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc @@ -510,7 +510,8 @@ int MariaDBMonitor::redirect_slaves_ex(GeneralOpData& general, OperationType typ else { // No conflict, redirect as normal. - if (redirectable->redirect_existing_slave_conn(general, from, to)) + auto old_conn = redirectable->slave_connection_status(from); + if (redirectable->redirect_existing_slave_conn(general, *old_conn, to)) { successes++; redirected->push_back(redirectable); @@ -587,35 +588,46 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers, json_t** uint32_t servers_joined = 0; if (!joinable_servers.empty()) { - string change_cmd = generate_change_master_cmd(master_server->address, master_server->port); for (MariaDBServer* joinable : joinable_servers) { const char* name = joinable->name(); bool op_success = false; + // Rejoin doesn't have its own time limit setting. Use switchover time limit for now since + // the first phase of standalone rejoin is similar to switchover. + maxbase::Duration time_limit((double)m_switchover_timeout); + GeneralOpData op(m_replication_user, m_replication_password, output, time_limit); if (joinable->m_slave_status.empty()) { - if (!m_demote_sql_file.empty() && !joinable->run_sql_from_file(m_demote_sql_file, output)) + // Assume that server is an old master which was failed over. Even if this is not really + // the case, the following is unlikely to do damage. + ServerOperation demotion(joinable, true, /* treat as old master */ + m_handle_event_scheduler, m_demote_sql_file, {} /* unused */); + if (joinable->demote(demotion, op)) { - PRINT_MXS_JSON_ERROR(output, - "%s execution failed when attempting to rejoin server '%s'.", - CN_DEMOTION_SQL_FILE, - joinable->name()); + MXS_NOTICE("Directing standalone server '%s' to replicate from '%s'.", name, master_name); + // A slave connection description is required. As this is the only connection, no name + // is required. + SlaveStatus new_conn; + new_conn.master_host = master_server->address; + new_conn.master_port = master_server->port; + op_success = joinable->create_start_slave(op, new_conn); } else { - MXS_NOTICE("Directing standalone server '%s' to replicate from '%s'.", name, master_name); - op_success = joinable->join_cluster(change_cmd, m_handle_event_scheduler); + PRINT_MXS_JSON_ERROR(output, + "Failed to prepare (demote) standalone server %s for rejoin.", name); } } else { MXS_NOTICE("Server '%s' is replicating from a server other than '%s', " "redirecting it to '%s'.", - name, - master_name, - master_name); - op_success = joinable->redirect_one_slave(change_cmd); + name, master_name, master_name); + // Multisource replication does not get to this point. + mxb_assert(joinable->m_slave_status.size() == 1); + op_success = joinable->redirect_existing_slave_conn(op, joinable->m_slave_status[0], + m_master); } if (op_success) diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc index 9fe304037..4b7521ed6 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.cc +++ b/server/modules/monitor/mariadbmon/mariadbserver.cc @@ -750,42 +750,6 @@ bool MariaDBServer::redirect_one_slave(const string& change_cmd) return success; } -bool MariaDBServer::join_cluster(const string& change_cmd, bool disable_server_events) -{ - /* Server does not have slave connections. This operation can fail, or the resulting - * replication may end up broken. */ - bool success = false; - MYSQL* server_conn = m_server_base->con; - const char* query = "SET GLOBAL read_only=1;"; - if (mxs_mysql_query(server_conn, query) == 0) - { - if (disable_server_events) - { - // This is unlikely to change anything, since a restarted server does not have event scheduler - // ON. If it were on and events were running while the server was standalone, its data would have - // diverged from the rest of the cluster. - disable_events(BinlogMode::BINLOG_OFF, NULL); - } - query = "CHANGE MASTER TO ..."; // Don't show the real query as it contains a password. - if (mxs_mysql_query(server_conn, change_cmd.c_str()) == 0) - { - query = "START SLAVE;"; - if (mxs_mysql_query(server_conn, query) == 0) - { - success = true; - MXS_NOTICE("Standalone server '%s' starting replication.", name()); - } - } - } - - if (!success) - { - const char ERROR_MSG[] = "Standalone server '%s' failed to start replication: '%s'. Query: '%s'."; - MXS_WARNING(ERROR_MSG, name(), mysql_error(server_conn), query); - } - return success; -} - bool MariaDBServer::run_sql_from_file(const string& path, json_t** error_out) { MYSQL* conn = m_server_base->con; @@ -1579,7 +1543,8 @@ bool MariaDBServer::demote(ServerOperation& demo_op, GeneralOpData& general) bool demotion_error = false; if (demo_op.to_from_master) { - mxb_assert(is_master()); + // The server should either be the master or be a standalone being rejoined. + mxb_assert(is_master() || m_slave_status.empty()); StopWatch timer; // Step 2a: Enabling read-only can take time if writes are on or table locks taken. // TODO: use max_statement_time to be safe! @@ -2002,13 +1967,6 @@ bool MariaDBServer::copy_slave_conns(GeneralOpData& op, const SlaveStatusArray& return !start_slave_error; } -/** - * Create a new slave connection on the server and start it. - * - * @param op Operation descriptor - * @param slave_conn Existing connection to emulate - * @return True on success - */ bool MariaDBServer::create_start_slave(GeneralOpData& op, const SlaveStatus& slave_conn) { maxbase::Duration& time_remaining = op.time_remaining; @@ -2070,22 +2028,20 @@ string MariaDBServer::generate_change_master_cmd(GeneralOpData& op, const SlaveS return change_cmd; } -bool MariaDBServer::redirect_existing_slave_conn(GeneralOpData& op, const MariaDBServer* old_master, +bool MariaDBServer::redirect_existing_slave_conn(GeneralOpData& op, const SlaveStatus& old_conn, const MariaDBServer* new_master) { auto error_out = op.error_out; maxbase::Duration& time_remaining = op.time_remaining; StopWatch timer; - auto old_conn = slave_connection_status(old_master); - mxb_assert(old_conn); bool success = false; // First, just stop the slave connection. - bool stopped = stop_slave_conn(old_conn->name, StopMode::STOP_ONLY, time_remaining, error_out); + bool stopped = stop_slave_conn(old_conn.name, StopMode::STOP_ONLY, time_remaining, error_out); time_remaining -= timer.restart(); if (stopped) { - SlaveStatus modified_conn = *old_conn; + SlaveStatus modified_conn = old_conn; SERVER* target_server = new_master->m_server_base->server; modified_conn.master_host = target_server->address; modified_conn.master_port = target_server->port; @@ -2095,7 +2051,7 @@ bool MariaDBServer::redirect_existing_slave_conn(GeneralOpData& op, const MariaD time_remaining -= timer.restart(); if (changed) { - string start = string_printf("START SLAVE '%s';", old_conn->name.c_str()); + string start = string_printf("START SLAVE '%s';", old_conn.name.c_str()); bool started = execute_cmd_time_limit(start, time_remaining, &error_msg); time_remaining -= timer.restart(); if (started) @@ -2114,7 +2070,7 @@ bool MariaDBServer::redirect_existing_slave_conn(GeneralOpData& op, const MariaD // TODO: This may currently print out passwords. PRINT_MXS_JSON_ERROR(error_out, "%s could not be redirected to [%s]:%i: %s", - old_conn->to_short_string().c_str(), + old_conn.to_short_string().c_str(), modified_conn.master_host.c_str(), modified_conn.master_port, error_msg.c_str()); } diff --git a/server/modules/monitor/mariadbmon/mariadbserver.hh b/server/modules/monitor/mariadbmon/mariadbserver.hh index 1d084d675..82ba5200f 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.hh +++ b/server/modules/monitor/mariadbmon/mariadbserver.hh @@ -278,15 +278,6 @@ public: */ bool redirect_one_slave(const std::string& change_cmd); - /** - * Joins this standalone server to the cluster. - * - * @param change_cmd Change master command - * @param disable_server_events Should events be disabled on the server - * @return True if commands were accepted by server - */ - bool join_cluster(const std::string& change_cmd, bool disable_server_events); - /** * Check if the server can be demoted by switchover. * @@ -372,11 +363,11 @@ public: * Redirect the slave connection going to old master to replicate from new master. * * @param op Operation descriptor - * @param old_master The connection to this server is redirected + * @param old_conn The connection which is redirected * @param new_master The new master for the redirected connection * @return True on success */ - bool redirect_existing_slave_conn(GeneralOpData& op, const MariaDBServer* old_master, + bool redirect_existing_slave_conn(GeneralOpData& op, const SlaveStatus& old_conn, const MariaDBServer* new_master); /** @@ -395,6 +386,15 @@ public: bool copy_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_copy, const MariaDBServer* replacement); + /** + * Create a new slave connection on the server and start it. + * + * @param op Operation descriptor + * @param slave_conn Existing connection to emulate + * @return True on success + */ + bool create_start_slave(GeneralOpData& op, const SlaveStatus& slave_conn); + /** * Is binary log on? 'update_replication_settings' should be ran before this function to query the data. * @@ -545,6 +545,5 @@ private: bool set_read_only(ReadOnlySetting value, maxbase::Duration time_limit, json_t** error_out); bool merge_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_merge); - bool create_start_slave(GeneralOpData& op, const SlaveStatus& slave_conn); std::string generate_change_master_cmd(GeneralOpData& op, const SlaveStatus& slave_conn); };