diff --git a/server/modules/monitor/mariadbmon/cluster_manipulation.cc b/server/modules/monitor/mariadbmon/cluster_manipulation.cc index d4b692649..46cb9e6e2 100644 --- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc +++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc @@ -979,7 +979,8 @@ bool MariaDBMonitor::switchover_wait_slaves_catchup(const ServerVector& slaves, { time_t begin = time(NULL); MXS_MONITORED_SERVER* slave = *iter; - if (switchover_wait_slave_catchup(slave, gtid, seconds_remaining, read_timeout, err_out)) + auto slave_server = get_server_info(slave); + if (slave_server->wait_until_gtid(gtid, seconds_remaining, err_out)) { seconds_remaining -= difftime(time(NULL), begin); } @@ -992,67 +993,6 @@ bool MariaDBMonitor::switchover_wait_slaves_catchup(const ServerVector& slaves, return success; } -/** - * Wait until slave replication catches up with the master gtid - * - * @param slave Slave to wait on - * @param gtid Which gtid must be reached - * @param total_timeout Maximum wait time in seconds TODO: timeouts - * @param read_timeout The value of read_timeout for the connection - * @param err_out json object for error printing. Can be NULL. - * @return True, if target gtid was reached within allotted time - */ -bool MariaDBMonitor::switchover_wait_slave_catchup(MXS_MONITORED_SERVER* slave, const GtidList& gtid, - int total_timeout, int read_timeout, - json_t** err_out) -{ - ss_dassert(read_timeout > 0); - StringVector output; - bool gtid_reached = false; - bool error = false; - double seconds_remaining = total_timeout; - - // Determine a reasonable timeout for the MASTER_GTID_WAIT-function depending on the - // backend_read_timeout setting (should be >= 1) and time remaining. - double loop_timeout = double(read_timeout) - 0.5; - string cmd = gtid.generate_master_gtid_wait_cmd(loop_timeout); - - while (seconds_remaining > 0 && !gtid_reached && !error) - { - if (loop_timeout > seconds_remaining) - { - // For the last iteration, change the wait timeout. - cmd = gtid.generate_master_gtid_wait_cmd(seconds_remaining); - } - seconds_remaining -= loop_timeout; - - if (query_one_row(slave, cmd.c_str(), 1, &output)) - { - if (output[0] == "0") - { - gtid_reached = true; - } - output.clear(); - } - else - { - error = true; - } - } - - if (error) - { - PRINT_MXS_JSON_ERROR(err_out, "MASTER_GTID_WAIT() query error on slave '%s'.", - slave->server->unique_name); - } - else if (!gtid_reached) - { - PRINT_MXS_JSON_ERROR(err_out, "MASTER_GTID_WAIT() timed out on slave '%s'.", - slave->server->unique_name); - } - return gtid_reached; -} - /** * Send an event to new master and wait for slaves to get the event. * diff --git a/server/modules/monitor/mariadbmon/gtid.cc b/server/modules/monitor/mariadbmon/gtid.cc index 6a7a99918..f96a09795 100644 --- a/server/modules/monitor/mariadbmon/gtid.cc +++ b/server/modules/monitor/mariadbmon/gtid.cc @@ -217,11 +217,6 @@ string Gtid::to_string() const return rval; } -string GtidList::generate_master_gtid_wait_cmd(double timeout) const -{ - return "SELECT MASTER_GTID_WAIT(\"" + to_string() + "\", " + std::to_string(timeout) + ");"; -} - Gtid GtidList::get_gtid(uint32_t domain) const { Gtid rval; diff --git a/server/modules/monitor/mariadbmon/gtid.hh b/server/modules/monitor/mariadbmon/gtid.hh index 48b7d7d72..b6da2b588 100644 --- a/server/modules/monitor/mariadbmon/gtid.hh +++ b/server/modules/monitor/mariadbmon/gtid.hh @@ -147,13 +147,11 @@ public: substraction_mode_t domain_substraction_mode); /** - * Generate a MASTER_GTID_WAIT()-query to this gtid. + * Return an individual gtid with the given domain. * - * @param timeout Maximum wait time in seconds - * @return The query + * @param domain Which domain to search for + * @return The gtid within the list. If domain is not found, an invalid gtid is returned. */ - std::string generate_master_gtid_wait_cmd(double timeout) const; - Gtid get_gtid(uint32_t domain) const; private: diff --git a/server/modules/monitor/mariadbmon/mariadbmon.hh b/server/modules/monitor/mariadbmon/mariadbmon.hh index 0366f3489..634f0b1d5 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.hh +++ b/server/modules/monitor/mariadbmon/mariadbmon.hh @@ -199,8 +199,6 @@ private: json_t** err_out); bool switchover_wait_slaves_catchup(const ServerVector& slaves, const GtidList& gtid, int total_timeout, int read_timeout, json_t** err_out); - bool switchover_wait_slave_catchup(MXS_MONITORED_SERVER* slave, const GtidList& gtid, - int total_timeout, int read_timeout, json_t** err_out); bool wait_cluster_stabilization(MXS_MONITORED_SERVER* new_master, const ServerVector& slaves, int seconds_remaining); bool switchover_check_preferred_master(MXS_MONITORED_SERVER* preferred, json_t** err_out); diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc index 04ba60ee5..ceeb4cd72 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.cc +++ b/server/modules/monitor/mariadbmon/mariadbserver.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include "utilities.hh" SlaveStatusInfo::SlaveStatusInfo() @@ -346,4 +347,55 @@ bool MariaDBServer::check_replication_settings(print_repl_warnings_t print_warni } } return rval; -} \ No newline at end of file +} + +bool MariaDBServer::wait_until_gtid(const GtidList& target, int timeout, json_t** err_out) +{ + bool gtid_reached = false; + bool error = false; + /* Prefer to use gtid_binlog_pos, as that is more reliable. But if log_slave_updates is not on, + * use gtid_current_pos. */ + const bool use_binlog_pos = rpl_settings.log_bin && rpl_settings.log_slave_updates; + + int seconds_remaining = 1; // Cheat a bit here to allow at least one iteration. + int sleep_ms = 200; // How long to sleep on next iteration. Incremented slowly. + time_t start_time = time(NULL); + while (seconds_remaining > 0 && !gtid_reached && !error) + { + if (update_gtids()) + { + const GtidList& compare_to = use_binlog_pos ? gtid_binlog_pos : gtid_current_pos; + if (GtidList::events_ahead(target, compare_to, GtidList::MISSING_DOMAIN_IGNORE) == 0) + { + gtid_reached = true; + } + else + { + // Query was successful but target gtid not yet reached. Check elapsed time. + seconds_remaining = timeout - difftime(time(NULL), start_time); + if (seconds_remaining > 0) + { + // Sleep for a moment, then try again. + thread_millisleep(sleep_ms); + sleep_ms += 100; // Sleep a bit more next iteration. + } + } + } + else + { + error = true; + } + } + + if (error) + { + PRINT_MXS_JSON_ERROR(err_out, "Failed to update gtid on server '%s' while waiting for catchup.", + server_base->server->unique_name); + } + else if (!gtid_reached) + { + PRINT_MXS_JSON_ERROR(err_out, "Slave catchup timed out on slave '%s'.", + server_base->server->unique_name); + } + return gtid_reached; +} diff --git a/server/modules/monitor/mariadbmon/mariadbserver.hh b/server/modules/monitor/mariadbmon/mariadbserver.hh index 62e696b84..5f3c17ac3 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.hh +++ b/server/modules/monitor/mariadbmon/mariadbserver.hh @@ -150,4 +150,16 @@ public: * @return True if log_bin is on */ bool check_replication_settings(print_repl_warnings_t print_warnings = WARNINGS_ON); + + /** + * Wait until server catches up to the target gtid. Only considers gtid domains common to this server + * and the target gtid. The gtid compared is the gtid_binlog_pos if this server has both log_bin and + * log_slave_updates on, and gtid_current_pos otherwise. + * + * @param target Which gtid must be reached + * @param timeout Maximum wait time in seconds + * @param err_out json object for error printing. Can be NULL. + * @return True, if target gtid was reached within allotted time + */ + bool wait_until_gtid(const GtidList& target, int timeout, json_t** err_out); }; diff --git a/server/modules/monitor/mariadbmon/utilities.hh b/server/modules/monitor/mariadbmon/utilities.hh index 727cc1588..e5f6ff12d 100644 --- a/server/modules/monitor/mariadbmon/utilities.hh +++ b/server/modules/monitor/mariadbmon/utilities.hh @@ -16,6 +16,7 @@ #include #include #include +#include #include /** Utility macro for printing both MXS_ERROR and json error */