MXS-1703 Move more methods to MariaDBServer

These methods only modify or update a single server.
2018-04-16 17:41:37 +03:00 · 2018-04-16 17:41:37 +03:00 · 02c57c98e4
commit 02c57c98e4
parent 50bc43e4bf
5 changed files with 216 additions and 223 deletions
--- a/server/modules/monitor/mariadbmon/cluster_discovery.cc
+++ b/server/modules/monitor/mariadbmon/cluster_discovery.cc
@ -712,18 +712,6 @@ void MariaDBMonitor::monitor_mysql_db(MariaDBServer* serv_info)
    }
 }

-/**
- * Update replication settings, gtid:s and slave status of the server.
- *
- * @param server Slave to update
- * @return True on success. False on error, or if server is not a slave (slave SQL not running).
- */
-bool MariaDBMonitor::update_slave_info(MariaDBServer* server)
-{
-    return (server->slave_status.slave_sql_running && server->update_replication_settings() &&
-            server->update_gtids() && server->do_show_slave_status());
-}
-
 /**
 * Check if the maxscale_schema.replication_heartbeat table is replicated on all
 * servers and log a warning if problems were found.
--- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc
+++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc
@ -131,11 +131,11 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
        {
            MariaDBServer* slave_cand = get_server_info(mon_slave_cand);

-            if (server_is_rejoin_suspect(slave_cand, m_master, output))
+            if (server_is_rejoin_suspect(slave_cand, output))
            {
                if (m_master->update_gtids())
                {
-                    if (can_replicate_from(slave_cand, m_master))
+                    if (slave_cand->can_replicate_from(m_master))
                    {
                        ServerArray joinable_server;
                        joinable_server.push_back(slave_cand);
@ -227,7 +227,7 @@ int MariaDBMonitor::redirect_slaves(MariaDBServer* new_master, const ServerArray
    int successes = 0;
    for (auto iter = slaves.begin(); iter != slaves.end(); iter++)
    {
-        if (redirect_one_slave(*iter, change_cmd))
+        if ((*iter)->redirect_one_slave(change_cmd))
        {
            successes++;
            redirected_slaves->push_back(*iter);
@ -293,44 +293,6 @@ bool MariaDBMonitor::switchover_start_slave(MariaDBServer* old_master, MariaDBSe
    return rval;
 }

-/**
- * Redirect one slave server to another master
- *
- * @param slave Server to redirect
- * @param change_cmd Change master command, usually generated by generate_change_master_cmd()
- * @return True if slave accepted all commands
- */
-bool MariaDBMonitor::redirect_one_slave(MariaDBServer* slave, const string& change_cmd)
-{
-    bool success = false;
-    MYSQL* slave_conn = slave->server_base->con;
-    const char* query = "STOP SLAVE;";
-    if (mxs_mysql_query(slave_conn, query) == 0)
-    {
-        query = "RESET SLAVE;"; // To erase any old I/O or SQL errors
-        if (mxs_mysql_query(slave_conn, query) == 0)
-        {
-            query = "CHANGE MASTER TO ..."; // Don't show the real query as it contains a password.
-            if (mxs_mysql_query(slave_conn, change_cmd.c_str()) == 0)
-            {
-                query = "START SLAVE;";
-                if (mxs_mysql_query(slave_conn, query) == 0)
-                {
-                    success = true;
-                    MXS_NOTICE("Slave '%s' redirected to new master.", slave->name());
-                }
-            }
-        }
-    }
-
-    if (!success)
-    {
-        MXS_WARNING("Slave '%s' redirection failed: '%s'. Query: '%s'.", slave->name(),
-                    mysql_error(slave_conn), query);
-    }
-    return success;
-}
-
 /**
 * (Re)join given servers to the cluster. The servers in the array are assumed to be joinable.
 * Usually the list is created by get_joinable_servers().
@ -355,13 +317,13 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers)
            if (joinable->n_slaves_configured == 0)
            {
                MXS_NOTICE("Directing standalone server '%s' to replicate from '%s'.", name, master_name);
-                op_success = join_cluster(joinable, change_cmd);
+                op_success = joinable->join_cluster(change_cmd);
            }
            else
            {
                MXS_NOTICE("Server '%s' is replicating from a server other than '%s', "
                           "redirecting it to '%s'.", name, master_name, master_name);
-                op_success = redirect_one_slave(joinable, change_cmd);
+                op_success = joinable->redirect_one_slave(change_cmd);
            }

            if (op_success)
@ -401,7 +363,7 @@ bool MariaDBMonitor::get_joinable_servers(ServerArray* output)
    ServerArray suspects;
    for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
    {
-        if (server_is_rejoin_suspect(*iter, m_master, NULL))
+        if (server_is_rejoin_suspect(*iter, NULL))
        {
            suspects.push_back(*iter);
        }
@ -415,7 +377,7 @@ bool MariaDBMonitor::get_joinable_servers(ServerArray* output)
        {
            for (size_t i = 0; i < suspects.size(); i++)
            {
-                if (can_replicate_from(suspects[i], m_master))
+                if (suspects[i]->can_replicate_from(m_master))
                {
                    output->push_back(suspects[i]);
                }
@ -429,65 +391,15 @@ bool MariaDBMonitor::get_joinable_servers(ServerArray* output)
    return comm_ok;
 }

-/**
- * Joins a standalone server to the cluster.
- *
- * @param server Server to join
- * @param change_cmd Change master command
- * @return True if commands were accepted by server
- */
-bool MariaDBMonitor::join_cluster(MariaDBServer* server, const string& change_cmd)
-{
-    /* Server does not have slave connections. This operation can fail, or the resulting
-     * replication may end up broken. */
-    bool success = false;
-    string error_msg;
-    MYSQL* server_conn = server->server_base->con;
-    const char* query = "SET GLOBAL read_only=1;";
-    if (mxs_mysql_query(server_conn, query) == 0)
-    {
-        query = "CHANGE MASTER TO ..."; // Don't show the real query as it contains a password.
-        if (mxs_mysql_query(server_conn, change_cmd.c_str()) == 0)
-        {
-            query = "START SLAVE;";
-            if (mxs_mysql_query(server_conn, query) == 0)
-            {
-                success = true;
-                MXS_NOTICE("Standalone server '%s' starting replication.", server->name());
-            }
-        }
-
-        if (!success)
-        {
-            // A step after "SET GLOBAL read_only=1" failed, try to undo. First, backup error message.
-            error_msg = mysql_error(server_conn);
-            mxs_mysql_query(server_conn, "SET GLOBAL read_only=0;");
-        }
-    }
-
-    if (!success)
-    {
-        if (error_msg.empty())
-        {
-            error_msg = mysql_error(server_conn);
-        }
-        MXS_WARNING("Standalone server '%s' failed to start replication: '%s'. Query: '%s'.",
-                    server->name(), error_msg.c_str(), query);
-    }
-    return success;
-}
-
 /**
 * Checks if a server is a possible rejoin candidate. A true result from this function is not yet sufficient
 * criteria and another call to can_replicate_from() should be made.
 *
 * @param rejoin_cand Server to check
- * @param master Master server info
 * @param output Error output. If NULL, no error is printed to log.
 * @return True, if server is a rejoin suspect.
 */
-bool MariaDBMonitor::server_is_rejoin_suspect(MariaDBServer* rejoin_cand, MariaDBServer* master,
-                                              json_t** output)
+bool MariaDBMonitor::server_is_rejoin_suspect(MariaDBServer* rejoin_cand, json_t** output)
 {
    bool is_suspect = false;
    if (rejoin_cand->is_running() && !rejoin_cand->is_master())
@ -502,8 +414,7 @@ bool MariaDBMonitor::server_is_rejoin_suspect(MariaDBServer* rejoin_cand, MariaD
        else if (rejoin_cand->n_slaves_configured == 1)
        {
            // which is connected to master but it's the wrong one
-            if (slave_status->slave_io_running  &&
-                slave_status->master_server_id != master->server_id)
+            if (slave_status->slave_io_running  && slave_status->master_server_id != m_master->server_id)
            {
                is_suspect = true;
            }
@ -621,7 +532,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer
            for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
            {
                MariaDBServer* server = *iter;
-                if (server != promotion_target && update_slave_info(server) && server != demotion_target)
+                if (server != promotion_target && server->update_slave_info() && server != demotion_target)
                {
                    redirectable_slaves.push_back(server);
                }
@ -753,7 +664,7 @@ bool MariaDBMonitor::do_failover(json_t** err_out)

    bool rval = false;
    // Step 2: Wait until relay log consumed.
-    if (failover_wait_relay_log(new_master, seconds_remaining, err_out))
+    if (new_master->failover_wait_relay_log(seconds_remaining, err_out))
    {
        time_t step2_time = time(NULL);
        int seconds_step2 = difftime(step2_time, step1_time);
@ -807,64 +718,6 @@ bool MariaDBMonitor::do_failover(json_t** err_out)
    return rval;
 }

-/**
- * Waits until the new master has processed all its relay log, or time is up.
- *
- * @param new_master The new master
- * @param seconds_remaining How much time left
- * @param err_out Json error output
- * @return True if relay log was processed within time limit, or false if time ran out or an error occurred.
- */
-bool MariaDBMonitor::failover_wait_relay_log(MariaDBServer* new_master, int seconds_remaining,
-                                             json_t** err_out)
-{
-    time_t begin = time(NULL);
-    bool query_ok = true;
-    bool io_pos_stable = true;
-    while (new_master->relay_log_events() > 0 &&
-           query_ok &&
-           io_pos_stable &&
-           difftime(time(NULL), begin) < seconds_remaining)
-    {
-        MXS_INFO("Relay log of server '%s' not yet empty, waiting to clear %" PRId64 " events.",
-                 new_master->name(), new_master->relay_log_events());
-        thread_millisleep(1000); // Sleep for a while before querying server again.
-        // Todo: check server version before entering failover.
-        GtidList old_gtid_io_pos = new_master->slave_status.gtid_io_pos;
-        // Update gtid:s first to make sure Gtid_IO_Pos is the more recent value.
-        // It doesn't matter here, but is a general rule.
-        query_ok = new_master->update_gtids() && new_master->do_show_slave_status();
-        io_pos_stable = (old_gtid_io_pos == new_master->slave_status.gtid_io_pos);
-    }
-
-    bool rval = false;
-    if (new_master->relay_log_events() == 0)
-    {
-        rval = true;
-    }
-    else
-    {
-        string reason = "Timeout";
-        if (!query_ok)
-        {
-            reason = "Query error";
-        }
-        else if (!io_pos_stable)
-        {
-            reason = "Old master sent new event(s)";
-        }
-        else if (new_master->relay_log_events() < 0) // TODO: This is currently impossible
-        {
-            reason = "Invalid Gtid(s) (current_pos: " + new_master->gtid_current_pos.to_string() +
-                     ", io_pos: " + new_master->slave_status.gtid_io_pos.to_string() + ")";
-        }
-        PRINT_MXS_JSON_ERROR(err_out, "Failover: %s while waiting for server '%s' to process relay log. "
-                             "Cancelling failover.", reason.c_str(), new_master->name());
-        rval = false;
-    }
-    return rval;
-}
-
 /**
 * Demotes the current master server, preparing it for replicating from another server. This step can take a
 * while if long writes are running on the server.
@ -1107,7 +960,7 @@ bool MariaDBMonitor::switchover_check_preferred_master(MariaDBServer* preferred,
 {
    ss_dassert(preferred);
    bool rval = true;
-    if (!update_slave_info(preferred) || !preferred->check_replication_settings())
+    if (!preferred->update_slave_info() || !preferred->check_replication_settings())
    {
        PRINT_MXS_JSON_ERROR(err_out, "The requested server '%s' is not a valid promotion candidate.",
                             preferred->name());
@ -1181,7 +1034,7 @@ MariaDBServer* MariaDBMonitor::select_new_master(ServerArray* slaves_out, json_t
         * If master is replicating from external master, it is updated by update_slave_info()
         * but not added to array. */
        MariaDBServer* cand = *iter;
-        if (update_slave_info(cand) && cand != m_master)
+        if (cand->update_slave_info() && cand != m_master)
        {
            slaves_out->push_back(cand);
            // Check that server is not in the exclusion list while still being a valid choice.
@ -1405,7 +1258,7 @@ bool MariaDBMonitor::failover_check(json_t** error_out)
        }
        else if (server->is_slave())
        {
-            if (uses_gtid(server, error_out))
+            if (server->uses_gtid(error_out))
            {
                slaves++;
            }
@ -1427,24 +1280,6 @@ bool MariaDBMonitor::failover_check(json_t** error_out)
    return !error && slaves > 0;
 }

-/**
- * Checks if slave candidate can replicate from master. Only considers gtid:s and only detects obvious errors.
- * The non-detected errors will mostly be detected once the slave tries to start replicating.
- *
- * @param slave_cand Slave candidate server
- * @param master_info Master server
- * @return True if slave can replicate from master
- */
-bool MariaDBMonitor::can_replicate_from(MariaDBServer* slave_cand, MariaDBServer* master)
-{
-    bool rval = false;
-    if (slave_cand->update_gtids())
-    {
-        rval = slave_cand->gtid_current_pos.can_replicate_from(master->gtid_binlog_pos);
-    }
-    return rval;
-}
-
 /**
 * @brief Process possible failover event
 *
@ -1540,30 +1375,6 @@ bool MariaDBMonitor::handle_auto_failover()
    return cluster_modified;
 }

-/**
- * Check if server is using gtid replication.
- *
- * @param mon_server Server to check
- * @param error_out Error output
- * @return True if using gtid-replication. False if not, or if server is not a slave or otherwise does
- * not have a gtid_IO_Pos.
- */
-bool MariaDBMonitor::uses_gtid(MariaDBServer* server, json_t** error_out)
-{
-    bool rval = false;
-    if (server->slave_status.gtid_io_pos.empty())
-    {
-        string slave_not_gtid_msg = string("Slave server ") + server->name() +
-                                    " is not using gtid replication.";
-        PRINT_MXS_JSON_ERROR(error_out, "%s", slave_not_gtid_msg.c_str());
-    }
-    else
-    {
-        rval = true;
-    }
-    return rval;
-}
-
 bool MariaDBMonitor::failover_not_possible()
 {
    bool rval = false;
@ -1703,7 +1514,7 @@ bool MariaDBMonitor::switchover_check(SERVER* new_master, SERVER* current_master
    bool gtid_ok = true;
    for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
    {
-        if ((*iter)->is_slave() && !uses_gtid(*iter, error_out))
+        if ((*iter)->is_slave() && !(*iter)->uses_gtid(error_out))
        {
            gtid_ok = false;
        }
--- a/server/modules/monitor/mariadbmon/mariadbmon.hh
+++ b/server/modules/monitor/mariadbmon/mariadbmon.hh
@ -204,28 +204,22 @@ private:
    bool slave_receiving_events();
    bool failover_check(json_t** error_out);
    bool do_failover(json_t** err_out);
-    bool failover_wait_relay_log(MariaDBServer* new_master, int seconds_remaining, json_t** err_out);

    // Rejoin methods
    bool cluster_can_be_joined();
    void handle_auto_rejoin();
    bool get_joinable_servers(ServerArray* output);
-    bool server_is_rejoin_suspect(MariaDBServer* rejoin_cand, MariaDBServer* master, json_t** output);
-    bool can_replicate_from(MariaDBServer* slave_cand, MariaDBServer* master);
+    bool server_is_rejoin_suspect(MariaDBServer* rejoin_cand, json_t** output);
    uint32_t do_rejoin(const ServerArray& joinable_servers);
-    bool join_cluster(MariaDBServer* server, const std::string& change_cmd);

    // Methods common to failover/switchover/rejoin
-    bool uses_gtid(MariaDBServer* mon_server, json_t** error_out);
    MariaDBServer* select_new_master(ServerArray* slaves_out, json_t** err_out);
-    bool update_slave_info(MariaDBServer* server);
    bool server_is_excluded(const MariaDBServer* server);
    bool is_candidate_better(const MariaDBServer* current_best, const MariaDBServer* candidate,
                             uint32_t gtid_domain);
    bool promote_new_master(MariaDBServer* new_master, json_t** err_out);
    int redirect_slaves(MariaDBServer* new_master, const ServerArray& slaves,
                        ServerArray* redirected_slaves);
-    bool redirect_one_slave(MariaDBServer* slave, const std::string& change_cmd);
    std::string generate_change_master_cmd(const std::string& master_host, int master_port);
    bool start_external_replication(MariaDBServer* new_master, json_t** err_out);
    bool wait_cluster_stabilization(MariaDBServer* new_master, const ServerArray& slaves,
--- a/server/modules/monitor/mariadbmon/mariadbserver.cc
+++ b/server/modules/monitor/mariadbmon/mariadbserver.cc
@ -489,6 +489,154 @@ json_t* MariaDBServer::diagnostics_json(bool multimaster) const
    return srv;
 }

+bool MariaDBServer::uses_gtid(json_t** error_out)
+{
+    bool using_gtid = !slave_status.gtid_io_pos.empty();
+    if (!using_gtid)
+    {
+        string slave_not_gtid_msg = string("Slave server ") + name() + " is not using gtid replication.";
+        PRINT_MXS_JSON_ERROR(error_out, "%s", slave_not_gtid_msg.c_str());
+    }
+    return using_gtid;
+}
+
+bool MariaDBServer::update_slave_info()
+{
+    return (slave_status.slave_sql_running && update_replication_settings() &&
+            update_gtids() && do_show_slave_status());
+}
+
+bool MariaDBServer::can_replicate_from(MariaDBServer* master)
+{
+    bool rval = false;
+    if (update_gtids())
+    {
+        rval = gtid_current_pos.can_replicate_from(master->gtid_binlog_pos);
+    }
+    return rval;
+}
+
+bool MariaDBServer::redirect_one_slave(const string& change_cmd)
+{
+    bool success = false;
+    MYSQL* slave_conn = server_base->con;
+    const char* query = "STOP SLAVE;";
+    if (mxs_mysql_query(slave_conn, query) == 0)
+    {
+        query = "RESET SLAVE;"; // To erase any old I/O or SQL errors
+        if (mxs_mysql_query(slave_conn, query) == 0)
+        {
+            query = "CHANGE MASTER TO ..."; // Don't show the real query as it contains a password.
+            if (mxs_mysql_query(slave_conn, change_cmd.c_str()) == 0)
+            {
+                query = "START SLAVE;";
+                if (mxs_mysql_query(slave_conn, query) == 0)
+                {
+                    success = true;
+                    MXS_NOTICE("Slave '%s' redirected to new master.", name());
+                }
+            }
+        }
+    }
+
+    if (!success)
+    {
+        MXS_WARNING("Slave '%s' redirection failed: '%s'. Query: '%s'.", name(),
+                    mysql_error(slave_conn), query);
+    }
+    return success;
+}
+
+bool MariaDBServer::join_cluster(const string& change_cmd)
+{
+    /* Server does not have slave connections. This operation can fail, or the resulting
+     * replication may end up broken. */
+    bool success = false;
+    string error_msg;
+    MYSQL* server_conn = server_base->con;
+    const char* query = "SET GLOBAL read_only=1;";
+    if (mxs_mysql_query(server_conn, query) == 0)
+    {
+        query = "CHANGE MASTER TO ..."; // Don't show the real query as it contains a password.
+        if (mxs_mysql_query(server_conn, change_cmd.c_str()) == 0)
+        {
+            query = "START SLAVE;";
+            if (mxs_mysql_query(server_conn, query) == 0)
+            {
+                success = true;
+                MXS_NOTICE("Standalone server '%s' starting replication.", name());
+            }
+        }
+
+        if (!success)
+        {
+            // A step after "SET GLOBAL read_only=1" failed, try to undo. First, backup error message.
+            error_msg = mysql_error(server_conn);
+            mxs_mysql_query(server_conn, "SET GLOBAL read_only=0;");
+        }
+    }
+
+    if (!success)
+    {
+        if (error_msg.empty())
+        {
+            error_msg = mysql_error(server_conn);
+        }
+        MXS_WARNING("Standalone server '%s' failed to start replication: '%s'. Query: '%s'.",
+                    name(), error_msg.c_str(), query);
+    }
+    return success;
+}
+
+bool MariaDBServer::failover_wait_relay_log(int seconds_remaining, json_t** err_out)
+{
+    time_t begin = time(NULL);
+    bool query_ok = true;
+    bool io_pos_stable = true;
+    while (relay_log_events() > 0 &&
+           query_ok &&
+           io_pos_stable &&
+           difftime(time(NULL), begin) < seconds_remaining)
+    {
+        MXS_INFO("Relay log of server '%s' not yet empty, waiting to clear %" PRId64 " events.",
+                 name(), relay_log_events());
+        thread_millisleep(1000); // Sleep for a while before querying server again.
+        // Todo: check server version before entering failover.
+        GtidList old_gtid_io_pos = slave_status.gtid_io_pos;
+        // Update gtid:s first to make sure Gtid_IO_Pos is the more recent value.
+        // It doesn't matter here, but is a general rule.
+        query_ok = update_gtids() && do_show_slave_status();
+        io_pos_stable = (old_gtid_io_pos == slave_status.gtid_io_pos);
+    }
+
+    bool rval = false;
+    if (relay_log_events() == 0)
+    {
+        rval = true;
+    }
+    else
+    {
+        string reason = "Timeout";
+        if (!query_ok)
+        {
+            reason = "Query error";
+        }
+        else if (!io_pos_stable)
+        {
+            reason = "Old master sent new event(s)";
+        }
+        else if (relay_log_events() < 0) // TODO: This is currently impossible
+        {
+            reason = "Invalid Gtid(s) (current_pos: " + gtid_current_pos.to_string() +
+                     ", io_pos: " + slave_status.gtid_io_pos.to_string() + ")";
+        }
+        PRINT_MXS_JSON_ERROR(err_out, "Failover: %s while waiting for server '%s' to process relay log. "
+                             "Cancelling failover.", reason.c_str(), name());
+        rval = false;
+    }
+    return rval;
+}
+
 QueryResult::QueryResult(MYSQL_RES* resultset)
    : m_resultset(resultset)
    , m_columns(-1)
--- a/server/modules/monitor/mariadbmon/mariadbserver.hh
+++ b/server/modules/monitor/mariadbmon/mariadbserver.hh
@ -212,6 +212,58 @@ public:
     * @return Diagnostics string
     */
    std::string diagnostics(bool multimaster) const;
+
+    /**
+     * Check if server is using gtid replication.
+     *
+     * @param error_out Error output
+     * @return True if using gtid-replication. False if not, or if server is not a slave or otherwise does
+     * not have a gtid_IO_Pos.
+     */
+    bool uses_gtid(json_t** error_out);
+
+    /**
+     * Update replication settings, gtid:s and slave status of the server.
+     *
+     * @param server Slave to update
+     * @return True on success. False on error, or if server is not a slave (slave SQL not running).
+     */
+    bool update_slave_info();
+
+    /**
+     * Checks if this server can replicate from master. Only considers gtid:s and only detects obvious errors.
+     * The non-detected errors will mostly be detected once the slave tries to start replicating.
+     *
+     * @param master_info Master server
+     * @return True if slave can replicate from master
+     */
+    bool can_replicate_from(MariaDBServer* master);
+
+    /**
+     * Redirect one slave server to another master
+     *
+     * @param change_cmd Change master command, usually generated by generate_change_master_cmd()
+     * @return True if slave accepted all commands
+     */
+    bool redirect_one_slave(const std::string& change_cmd);
+
+    /**
+     * Joins this standalone server to the cluster.
+     *
+     * @param change_cmd Change master command
+     * @return True if commands were accepted by server
+     */
+    bool join_cluster(const std::string& change_cmd);
+
+    /**
+     * Waits until this server has processed all its relay log, or time is up.
+     *
+     * @param seconds_remaining How much time left
+     * @param err_out Json error output
+     * @return True if relay log was processed within time limit, or false if time ran out
+     * or an error occurred.
+     */
+    bool failover_wait_relay_log(int seconds_remaining, json_t** err_out);
 };

 /**