diff --git a/server/modules/monitor/mariadbmon/cluster_discovery.cc b/server/modules/monitor/mariadbmon/cluster_discovery.cc index 8ca95c178..3b43f11e1 100644 --- a/server/modules/monitor/mariadbmon/cluster_discovery.cc +++ b/server/modules/monitor/mariadbmon/cluster_discovery.cc @@ -658,7 +658,7 @@ void MariaDBMonitor::monitor_database(MariaDBServer* serv_info) /* If gtid domain exists and server is 10.0, update gtid:s */ if (m_master_gtid_domain >= 0 && serv_info->version == MYSQL_SERVER_VERSION_100) { - serv_info->update_gtids(m_master_gtid_domain); + serv_info->update_gtids(); } /* Check for MariaDB 10.x.x and get status for multi-master replication */ if (serv_info->version == MYSQL_SERVER_VERSION_100 || serv_info->version == MYSQL_SERVER_VERSION_55) @@ -693,7 +693,7 @@ void MariaDBMonitor::monitor_mysql_db(MariaDBServer* serv_info) monitor_clear_pending_status(database, SERVER_SLAVE | SERVER_MASTER | SERVER_RELAY_MASTER | SERVER_SLAVE_OF_EXTERNAL_MASTER); - if (serv_info->do_show_slave_status(m_master_gtid_domain)) + if (serv_info->do_show_slave_status()) { /* If all configured slaves are running set this node as slave */ if (serv_info->slave_configured && serv_info->n_slaves_running > 0 && @@ -718,8 +718,8 @@ MariaDBServer* MariaDBMonitor::update_slave_info(MXS_MONITORED_SERVER* server) MariaDBServer* info = get_server_info(server); if (info->slave_status.slave_sql_running && info->update_replication_settings() && - info->update_gtids(m_master_gtid_domain) && - info->do_show_slave_status(m_master_gtid_domain)) + info->update_gtids() && + info->do_show_slave_status()) { return info; } diff --git a/server/modules/monitor/mariadbmon/cluster_manipulation.cc b/server/modules/monitor/mariadbmon/cluster_manipulation.cc index 6ca0d80a5..487d34d78 100644 --- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc +++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc @@ -156,7 +156,7 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output) if (server_is_rejoin_suspect(mon_server, master_info, output)) { - if (master_info->update_gtids(m_master_gtid_domain)) + if (master_info->update_gtids()) { if (can_replicate_from(mon_server, server_info, master_info)) { @@ -433,7 +433,7 @@ bool MariaDBMonitor::get_joinable_servers(ServerVector* output) bool comm_ok = true; if (!suspects.empty()) { - if (master_info->update_gtids(m_master_gtid_domain)) + if (master_info->update_gtids()) { for (size_t i = 0; i < suspects.size(); i++) { @@ -811,11 +811,10 @@ bool MariaDBMonitor::failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, i new_master->server->unique_name, master_info->relay_log_events()); thread_millisleep(1000); // Sleep for a while before querying server again. // Todo: check server version before entering failover. - GtidTriplet old_gtid_io_pos = master_info->slave_status.gtid_io_pos; + Gtid old_gtid_io_pos = master_info->slave_status.gtid_io_pos; // Update gtid:s first to make sure Gtid_IO_Pos is the more recent value. // It doesn't matter here, but is a general rule. - query_ok = master_info->update_gtids(m_master_gtid_domain) && - master_info->do_show_slave_status(m_master_gtid_domain); + query_ok = master_info->update_gtids() && master_info->do_show_slave_status(); io_pos_stable = (old_gtid_io_pos == master_info->slave_status.gtid_io_pos); } @@ -903,7 +902,7 @@ bool MariaDBMonitor::switchover_demote_master(MXS_MONITORED_SERVER* current_mast if (!query_error) { query = ""; - if (info->update_gtids(m_master_gtid_domain)) + if (info->update_gtids()) { success = true; } @@ -962,7 +961,7 @@ bool MariaDBMonitor::switchover_demote_master(MXS_MONITORED_SERVER* current_mast * @param err_out json object for error printing. Can be NULL. * @return True, if target gtid was reached within allotted time for all servers */ -bool MariaDBMonitor::switchover_wait_slaves_catchup(const ServerVector& slaves, const GtidTriplet& gtid, +bool MariaDBMonitor::switchover_wait_slaves_catchup(const ServerVector& slaves, const Gtid& gtid, int total_timeout, int read_timeout, json_t** err_out) { bool success = true; @@ -1003,7 +1002,7 @@ bool MariaDBMonitor::switchover_wait_slaves_catchup(const ServerVector& slaves, * @param err_out json object for error printing. Can be NULL. * @return True, if target gtid was reached within allotted time */ -bool MariaDBMonitor::switchover_wait_slave_catchup(MXS_MONITORED_SERVER* slave, const GtidTriplet& gtid, +bool MariaDBMonitor::switchover_wait_slave_catchup(MXS_MONITORED_SERVER* slave, const Gtid& gtid, int total_timeout, int read_timeout, json_t** err_out) { @@ -1071,12 +1070,12 @@ bool MariaDBMonitor::wait_cluster_stabilization(MXS_MONITORED_SERVER* new_master MariaDBServer* new_master_info = get_server_info(new_master); if (mxs_mysql_query(new_master->con, "FLUSH TABLES;") == 0 && - new_master_info->update_gtids(m_master_gtid_domain)) + new_master_info->update_gtids()) { int query_fails = 0; int repl_fails = 0; int successes = 0; - const GtidTriplet target = new_master_info->gtid_current_pos; + const Gtid& target = new_master_info->gtid_current_pos; ServerVector wait_list = slaves; // Check all the servers in the list bool first_round = true; bool time_is_up = false; @@ -1094,8 +1093,8 @@ bool MariaDBMonitor::wait_cluster_stabilization(MXS_MONITORED_SERVER* new_master { MXS_MONITORED_SERVER* slave = wait_list[i]; MariaDBServer* slave_info = get_server_info(slave); - if (slave_info->update_gtids(m_master_gtid_domain) && - slave_info->do_show_slave_status(m_master_gtid_domain)) + if (slave_info->update_gtids() && + slave_info->do_show_slave_status()) { if (!slave_info->slave_status.last_error.empty()) { @@ -1106,7 +1105,8 @@ bool MariaDBMonitor::wait_cluster_stabilization(MXS_MONITORED_SERVER* new_master wait_list.erase(wait_list.begin() + i); repl_fails++; } - else if (slave_info->gtid_current_pos.sequence >= target.sequence) + else if (Gtid::events_ahead(target, slave_info->gtid_current_pos, + Gtid::MISSING_DOMAIN_IGNORE) == 0) { // This slave has reached the same gtid as master, remove from list wait_list.erase(wait_list.begin() + i); @@ -1245,7 +1245,8 @@ MXS_MONITORED_SERVER* MariaDBMonitor::select_new_master(ServerVector* slaves_out else if (cand_info->check_replication_settings()) { // If no new master yet, accept any valid candidate. Otherwise check. - if (current_best == NULL || is_candidate_better(current_best_info, cand_info)) + if (current_best == NULL || + is_candidate_better(current_best_info, cand_info, m_master_gtid_domain)) { // The server has been selected for promotion, for now. current_best = cand; @@ -1278,7 +1279,7 @@ MXS_MONITORED_SERVER* MariaDBMonitor::select_new_master(ServerVector* slaves_out MXS_WARNING(EXCLUDED_ONLY_CAND, excluded_name); break; } - else if (is_candidate_better(current_best_info, excluded_info)) + else if (is_candidate_better(current_best_info, excluded_info, m_master_gtid_domain)) { // Print a warning if this server is actually a better candidate than the previous // best. @@ -1322,15 +1323,17 @@ bool MariaDBMonitor::server_is_excluded(const MXS_MONITORED_SERVER* server) * * @param current_best_info Server info of current best choice * @param candidate_info Server info of new candidate + * @param gtid_domain Which domain to compare * @return True if candidate is better */ bool MariaDBMonitor::is_candidate_better(const MariaDBServer* current_best_info, - const MariaDBServer* candidate_info) + const MariaDBServer* candidate_info, uint32_t gtid_domain) { - uint64_t cand_io = candidate_info->slave_status.gtid_io_pos.sequence; - uint64_t cand_processed = candidate_info->gtid_current_pos.sequence; - uint64_t curr_io = current_best_info->slave_status.gtid_io_pos.sequence; - uint64_t curr_processed = current_best_info->gtid_current_pos.sequence; + uint64_t cand_io = candidate_info->slave_status.gtid_io_pos.get_triplet(gtid_domain).sequence; + uint64_t cand_processed = candidate_info->gtid_current_pos.get_triplet(gtid_domain).sequence; + uint64_t curr_io = current_best_info->slave_status.gtid_io_pos.get_triplet(gtid_domain).sequence; + uint64_t curr_processed = current_best_info->gtid_current_pos.get_triplet(gtid_domain).sequence; + bool cand_updates = candidate_info->rpl_settings.log_slave_updates; bool curr_updates = current_best_info->rpl_settings.log_slave_updates; bool is_better = false; @@ -1496,19 +1499,9 @@ bool MariaDBMonitor::can_replicate_from(MXS_MONITORED_SERVER* slave, MariaDBServer* slave_info, MariaDBServer* master_info) { bool rval = false; - if (slave_info->update_gtids(m_master_gtid_domain)) + if (slave_info->update_gtids()) { - GtidTriplet slave_gtid = slave_info->gtid_current_pos; - GtidTriplet master_gtid = master_info->gtid_binlog_pos; - // The following are not sufficient requirements for replication to work, they only cover the basics. - // If the servers have diverging histories, the redirection will seem to succeed but the slave IO - // thread will stop in error. - if (slave_gtid.server_id != SERVER_ID_UNKNOWN && master_gtid.server_id != SERVER_ID_UNKNOWN && - slave_gtid.domain == master_gtid.domain && - slave_gtid.sequence <= master_info->gtid_current_pos.sequence) - { - rval = true; - } + rval = slave_info->gtid_current_pos.can_replicate_from(master_info->gtid_binlog_pos); } return rval; } @@ -1630,7 +1623,7 @@ bool MariaDBMonitor::uses_gtid(MXS_MONITORED_SERVER* mon_server, json_t** error_ { bool rval = false; const MariaDBServer* info = get_server_info(mon_server); - if (info->slave_status.gtid_io_pos.server_id == SERVER_ID_UNKNOWN) + if (info->slave_status.gtid_io_pos.empty()) { string slave_not_gtid_msg = string("Slave server ") + mon_server->server->unique_name + " is not using gtid replication."; diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index ab86fe810..d073ec7ea 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -269,17 +269,17 @@ void MariaDBMonitor::diagnostics(DCB *dcb) const dcb_printf(dcb, "Master binlog position: %lu\n", serv_info->slave_status.read_master_log_pos); } - if (serv_info->gtid_current_pos.server_id != SERVER_ID_UNKNOWN) + if (!serv_info->gtid_current_pos.empty()) { dcb_printf(dcb, "Gtid current position: %s\n", serv_info->gtid_current_pos.to_string().c_str()); } - if (serv_info->gtid_binlog_pos.server_id != SERVER_ID_UNKNOWN) + if (!serv_info->gtid_binlog_pos.empty()) { dcb_printf(dcb, "Gtid binlog position: %s\n", serv_info->gtid_current_pos.to_string().c_str()); } - if (serv_info->slave_status.gtid_io_pos.server_id != SERVER_ID_UNKNOWN) + if (!serv_info->slave_status.gtid_io_pos.empty()) { dcb_printf(dcb, "Gtid slave IO position: %s\n", serv_info->slave_status.gtid_io_pos.to_string().c_str()); diff --git a/server/modules/monitor/mariadbmon/mariadbmon.hh b/server/modules/monitor/mariadbmon/mariadbmon.hh index 226f175ef..9e277c556 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.hh +++ b/server/modules/monitor/mariadbmon/mariadbmon.hh @@ -197,9 +197,9 @@ private: bool failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, int seconds_remaining, json_t** err_out); bool switchover_demote_master(MXS_MONITORED_SERVER* current_master, MariaDBServer* info, json_t** err_out); - bool switchover_wait_slaves_catchup(const ServerVector& slaves, const GtidTriplet& gtid, int total_timeout, + bool switchover_wait_slaves_catchup(const ServerVector& slaves, const Gtid& gtid, int total_timeout, int read_timeout, json_t** err_out); - bool switchover_wait_slave_catchup(MXS_MONITORED_SERVER* slave, const GtidTriplet& gtid, + bool switchover_wait_slave_catchup(MXS_MONITORED_SERVER* slave, const Gtid& gtid, int total_timeout, int read_timeout, json_t** err_out); bool wait_cluster_stabilization(MXS_MONITORED_SERVER* new_master, const ServerVector& slaves, int seconds_remaining); @@ -207,7 +207,8 @@ private: bool promote_new_master(MXS_MONITORED_SERVER* new_master, json_t** err_out); MXS_MONITORED_SERVER* select_new_master(ServerVector* slaves_out, json_t** err_out); bool server_is_excluded(const MXS_MONITORED_SERVER* server); - bool is_candidate_better(const MariaDBServer* current_best_info, const MariaDBServer* candidate_info); + bool is_candidate_better(const MariaDBServer* current_best_info, const MariaDBServer* candidate_info, + uint32_t gtid_domain); MariaDBServer* update_slave_info(MXS_MONITORED_SERVER* server); void init_server_info(); bool slave_receiving_events(); diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc index 8c86a7c97..8ee3d019a 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.cc +++ b/server/modules/monitor/mariadbmon/mariadbserver.cc @@ -44,14 +44,11 @@ MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server) int64_t MariaDBServer::relay_log_events() { - if (slave_status.gtid_io_pos.server_id != SERVER_ID_UNKNOWN && - gtid_current_pos.server_id != SERVER_ID_UNKNOWN && - slave_status.gtid_io_pos.domain == gtid_current_pos.domain && - slave_status.gtid_io_pos.sequence >= gtid_current_pos.sequence) - { - return slave_status.gtid_io_pos.sequence - gtid_current_pos.sequence; - } - return -1; + /* The events_ahead-call below ignores domains where current_pos is ahead of io_pos. This situation is + * rare but is possible (I guess?) if the server is replicating a domain from multiple masters + * and decides to process events from one relay log before getting new events to the other. In + * any case, such events are obsolete and the server can be considered to have processed such logs. */ + return Gtid::events_ahead(slave_status.gtid_io_pos, gtid_current_pos, Gtid::MISSING_DOMAIN_LHS_ADD); } std::auto_ptr MariaDBServer::execute_query(const string& query) @@ -70,7 +67,7 @@ std::auto_ptr MariaDBServer::execute_query(const string& query) return rval; } -bool MariaDBServer::do_show_slave_status(int64_t gtid_domain) +bool MariaDBServer::do_show_slave_status() { /** Column positions for SHOW SLAVE STATUS */ const size_t MYSQL55_STATUS_MASTER_LOG_POS = 5; @@ -79,7 +76,6 @@ bool MariaDBServer::do_show_slave_status(int64_t gtid_domain) const size_t MYSQL55_STATUS_SQL_RUNNING = 11; const size_t MYSQL55_STATUS_MASTER_ID = 39; - bool rval = true; unsigned int columns; int i_slave_io_running, i_slave_sql_running, i_read_master_log_pos, i_master_server_id, i_master_log_file; int i_last_io_errno, i_last_io_error, i_last_sql_error, i_slave_rec_hbs, i_slave_hb_period; @@ -199,16 +195,15 @@ bool MariaDBServer::do_show_slave_status(int64_t gtid_domain) heartbeat_period = result->get_uint(i_slave_hb_period); } string using_gtid = result->get_string(i_using_gtid); - if (gtid_domain >= 0 && (using_gtid == "Current_Pos" || using_gtid == "Slave_Pos")) + string gtid_io_pos = result->get_string(i_gtid_io_pos); + if (!gtid_io_pos.empty() && + (using_gtid == "Current_Pos" || using_gtid == "Slave_Pos")) { - string gtid_io_pos = result->get_string(i_gtid_io_pos); - slave_status.gtid_io_pos = !gtid_io_pos.empty() ? - GtidTriplet(gtid_io_pos.c_str(), gtid_domain) : - GtidTriplet(); + slave_status.gtid_io_pos = Gtid::from_string(gtid_io_pos); } else { - slave_status.gtid_io_pos = GtidTriplet(); + slave_status.gtid_io_pos = Gtid(); } } } @@ -228,23 +223,44 @@ bool MariaDBServer::do_show_slave_status(int64_t gtid_domain) slave_status.master_server_id = master_server_id; n_slaves_configured = nconfigured; n_slaves_running = nrunning; - return rval; + return true; } -bool MariaDBServer::update_gtids(int64_t gtid_domain) +bool MariaDBServer::update_gtids() { - ss_dassert(gtid_domain >= 0); static const string query = "SELECT @@gtid_current_pos, @@gtid_binlog_pos;"; - const int ind_current_pos = 0; - const int ind_binlog_pos = 1; + const int i_current_pos = 0; + const int i_binlog_pos = 1; bool rval = false; auto result = execute_query(query); if (result.get() != NULL && result->next_row()) { - gtid_current_pos = result->get_gtid(ind_current_pos, gtid_domain); - gtid_binlog_pos = result->get_gtid(ind_binlog_pos, gtid_domain); - rval = true; + auto current_str = result->get_string(i_current_pos); + auto binlog_str = result->get_string(i_binlog_pos); + bool current_ok = false; + bool binlog_ok = false; + if (current_str.empty()) + { + gtid_current_pos = Gtid(); + } + else + { + gtid_current_pos = Gtid::from_string(current_str); + current_ok = !gtid_current_pos.empty(); + } + + if (binlog_str.empty()) + { + gtid_binlog_pos = Gtid(); + } + else + { + gtid_binlog_pos = Gtid::from_string(binlog_str); + binlog_ok = !gtid_binlog_pos.empty(); + } + + rval = (current_ok && binlog_ok); } return rval; } @@ -276,23 +292,23 @@ void MariaDBServer::read_server_variables() columns = 3; } - int ind_id = 0; - int ind_ro = 1; - int ind_domain = 2; + int i_id = 0; + int i_ro = 1; + int i_domain = 2; auto result = execute_query(query); if (result.get() != NULL && result->next_row()) { - int64_t server_id_parsed = result->get_uint(ind_id); + int64_t server_id_parsed = result->get_uint(i_id); if (server_id_parsed < 0) { server_id_parsed = SERVER_ID_UNKNOWN; } database->server->node_id = server_id_parsed; server_id = server_id_parsed; - read_only = result->get_bool(ind_ro); + read_only = result->get_bool(i_ro); if (columns == 3) { - gtid_domain_id = result->get_uint(ind_domain); + gtid_domain_id = result->get_uint(i_domain); } } } diff --git a/server/modules/monitor/mariadbmon/mariadbserver.hh b/server/modules/monitor/mariadbmon/mariadbserver.hh index b355cc5fd..22a1cd169 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.hh +++ b/server/modules/monitor/mariadbmon/mariadbserver.hh @@ -47,8 +47,7 @@ public: * reading from. */ uint64_t read_master_log_pos; /**< Position up to which the I/O thread has read in the current master * binary log file. */ - GtidTriplet gtid_io_pos; /**< Gtid I/O position of the slave thread. Only shows the triplet with - * the current master domain. */ + Gtid gtid_io_pos; /**< Gtid I/O position of the slave thread. */ std::string last_error; /**< Last IO or SQL error encountered. */ SlaveStatusInfo(); @@ -92,21 +91,17 @@ public: time_t latest_event; /**< Time when latest event was received from the master */ int64_t gtid_domain_id; /**< The value of gtid_domain_id, the domain which is used for * new non-replicated events. */ - GtidTriplet gtid_current_pos; /**< Gtid of latest event. Only shows the triplet - * with the current master domain. */ - GtidTriplet gtid_binlog_pos; /**< Gtid of latest event written to binlog. Only shows - * the triplet with the current master domain. */ + Gtid gtid_current_pos; /**< Gtid of latest event. */ + Gtid gtid_binlog_pos; /**< Gtid of latest event written to binlog. */ SlaveStatusInfo slave_status; /**< Data returned from SHOW SLAVE STATUS */ ReplicationSettings rpl_settings; /**< Miscellaneous replication related settings */ MariaDBServer(MXS_MONITORED_SERVER* monitored_server); /** - * Calculate how many events are left in the relay log. If gtid_current_pos is ahead of Gtid_IO_Pos, - * or a server_id is unknown, an error value is returned. + * Calculate how many events are left in the relay log. * - * @return Number of events in relay log according to latest queried info. A negative value signifies - * an error in the gtid-values. + * @return Number of events in relay log according to latest queried info. */ int64_t relay_log_events(); @@ -126,16 +121,14 @@ public: * @param gtid_domain Which gtid domain should be parsed. * @return True on success */ - bool do_show_slave_status(int64_t gtid_domain); + bool do_show_slave_status(); /** * Query gtid_current_pos and gtid_binlog_pos and save the values to the server. - * Only the given domain is parsed. * - * @param gtid_domain Which gtid domain should be parsed * @return True if successful */ - bool update_gtids(int64_t gtid_domain); + bool update_gtids(); /** * Query a few miscellaneous replication settings. diff --git a/server/modules/monitor/mariadbmon/utilities.cc b/server/modules/monitor/mariadbmon/utilities.cc index 7dc9be706..fbb0acb1b 100644 --- a/server/modules/monitor/mariadbmon/utilities.cc +++ b/server/modules/monitor/mariadbmon/utilities.cc @@ -446,9 +446,23 @@ void GtidTriplet::parse_triplet(const char* str) ss_dassert(rv == 3); } -string GtidTriplet::generate_master_gtid_wait_cmd(double timeout) const +string Gtid::generate_master_gtid_wait_cmd(double timeout) const { std::stringstream query_ss; query_ss << "SELECT MASTER_GTID_WAIT(\"" << to_string() << "\", " << timeout << ");"; return query_ss.str(); +} + +GtidTriplet Gtid::get_triplet(uint32_t domain) const +{ + GtidTriplet rval; + // Make a dummy triplet for the domain search + GtidTriplet search_val(domain, -1, 0); + auto found = std::lower_bound(m_triplets.begin(), m_triplets.end(), search_val, + GtidTriplet::compare_domains); + if (found != m_triplets.end() && found->domain == domain) + { + rval = *found; + } + return rval; } \ No newline at end of file diff --git a/server/modules/monitor/mariadbmon/utilities.hh b/server/modules/monitor/mariadbmon/utilities.hh index 7be3c54f8..96a699aef 100644 --- a/server/modules/monitor/mariadbmon/utilities.hh +++ b/server/modules/monitor/mariadbmon/utilities.hh @@ -109,14 +109,6 @@ public: std::string to_string() const; - /** - * Generate a MASTER_GTID_WAIT()-query to this gtid. - * - * @param timeout Maximum wait time in seconds - * @return The query - */ - std::string generate_master_gtid_wait_cmd(double timeout) const; - /** * Comparator, used when sorting by domain id. * @@ -205,11 +197,20 @@ public: static uint64_t events_ahead(const Gtid& lhs, const Gtid& rhs, substraction_mode_t domain_substraction_mode); + /** + * Generate a MASTER_GTID_WAIT()-query to this gtid. + * + * @param timeout Maximum wait time in seconds + * @return The query + */ + std::string generate_master_gtid_wait_cmd(double timeout) const; + + GtidTriplet get_triplet(uint32_t domain) const; + private: std::vector m_triplets; }; - /** * Helper class for simplifying working with resultsets. Used in MariaDBServer. */