diff --git a/server/modules/monitor/mariadbmon/cluster_manipulation.cc b/server/modules/monitor/mariadbmon/cluster_manipulation.cc index 79d9b5d7e..414ff9bea 100644 --- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc +++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc @@ -598,64 +598,54 @@ bool MariaDBMonitor::failover_perform(MariaDBServer* promotion_target, MariaDBSe // completed. int seconds_remaining = m_failover_timeout; time_t start_time = time(NULL); + // Step 1: Populate a vector with all slaves not the selected master. ServerArray redirectable_slaves = get_redirectables(promotion_target, demotion_target); - time_t step1_time = time(NULL); - seconds_remaining -= difftime(step1_time, start_time); - bool rval = false; - // Step 2: Wait until relay log consumed. - if (promotion_target->failover_wait_relay_log(demotion_target, seconds_remaining, error_out)) + // Step 2: Stop and reset slave, set read-only to 0. + if (promote_new_master(promotion_target, error_out)) { - time_t step2_time = time(NULL); - int seconds_step2 = difftime(step2_time, step1_time); - MXS_DEBUG("Failover: relay log processing took %d seconds.", seconds_step2); - seconds_remaining -= seconds_step2; + m_next_master = promotion_target; + m_cluster_modified = true; - // Step 3: Stop and reset slave, set read-only to 0. - if (promote_new_master(promotion_target, error_out)) + // Step 3: Redirect slaves. + ServerArray redirected_slaves; + int redirects = redirect_slaves(promotion_target, redirectable_slaves, &redirected_slaves); + bool success = redirectable_slaves.empty() ? true : redirects > 0; + if (success) { - m_next_master = promotion_target; - m_cluster_modified = true; - // Step 4: Redirect slaves. - ServerArray redirected_slaves; - int redirects = redirect_slaves(promotion_target, redirectable_slaves, &redirected_slaves); - bool success = redirectable_slaves.empty() ? true : redirects > 0; - if (success) - { - time_t step4_time = time(NULL); - seconds_remaining -= difftime(step4_time, step2_time); + time_t step3_time = time(NULL); + seconds_remaining -= difftime(step3_time, start_time); - // Step 5: Finally, add an event to the new master to advance gtid and wait for the slaves - // to receive it. seconds_remaining can be 0 or less at this point. Even in such a case - // wait_cluster_stabilization() may succeed if replication is fast enough. If using external - // replication, skip this step. Come up with an alternative later. - if (m_external_master_port != PORT_UNKNOWN) - { - MXS_WARNING("Replicating from external master, skipping final check."); - rval = true; - } - else if (redirected_slaves.empty()) - { - // No slaves to check. Assume success. - rval = true; - MXS_DEBUG("Failover: no slaves to redirect, skipping stabilization check."); - } - else if (wait_cluster_stabilization(promotion_target, redirected_slaves, seconds_remaining)) - { - rval = true; - time_t step5_time = time(NULL); - int seconds_step5 = difftime(step5_time, step4_time); - seconds_remaining -= seconds_step5; - MXS_DEBUG("Failover: slave replication confirmation took %d seconds with " - "%d seconds to spare.", seconds_step5, seconds_remaining); - } - } - else + // Step 4: Finally, add an event to the new master to advance gtid and wait for the slaves + // to receive it. seconds_remaining can be 0 or less at this point. Even in such a case + // wait_cluster_stabilization() may succeed if replication is fast enough. If using external + // replication, skip this step. Come up with an alternative later. + if (m_external_master_port != PORT_UNKNOWN) { - print_redirect_errors(NULL, redirectable_slaves, error_out); + MXS_WARNING("Replicating from external master, skipping final check."); + rval = true; } + else if (redirected_slaves.empty()) + { + // No slaves to check. Assume success. + rval = true; + MXS_DEBUG("Failover: no slaves to redirect, skipping stabilization check."); + } + else if (wait_cluster_stabilization(promotion_target, redirected_slaves, seconds_remaining)) + { + rval = true; + time_t step4_time = time(NULL); + int seconds_step4 = difftime(step4_time, step3_time); + seconds_remaining -= seconds_step4; + MXS_DEBUG("Failover: slave replication confirmation took %d seconds with " + "%d seconds to spare.", seconds_step4, seconds_remaining); + } + } + else + { + print_redirect_errors(NULL, redirectable_slaves, error_out); } } @@ -1222,9 +1212,49 @@ bool MariaDBMonitor::failover_prepare(Log log_mode, if (promotion_target && demotion_target && gtid_ok) { - *promotion_target_out = promotion_target; - *demotion_target_out = demotion_target; - return true; + const SlaveStatus* slave_conn = promotion_target->slave_connection_status(demotion_target); + mxb_assert(slave_conn); + uint64_t events = promotion_target->relay_log_events(*slave_conn); + if (events > 0) + { + // The relay log of the promotion target is not yet clear. This is not really an error, + // but should be communicated to the user in the case of manual failover. For automatic + // failover, it's best to just try again during the next monitor iteration. The difference + // to a typical prepare-fail is that the relay log status should be logged + // repeatedly since it is likely to change continuously. + if (error_out) + { + // Print a bit more helpful error for the user, goes to log too. This should be a very rare + // occurrence: either the dba managed to start failover really fast, or the relay log is + // massive. In the latter case it's ok that the monitor does not do the waiting since there + // is no telling how long the wait will be. + const char wait_relay_log[] = "The relay log of '%s' has %" PRIu64 " unprocessed events " + "(Gtid_IO_Pos: %s, Gtid_Current_Pos: %s). To avoid data loss, failover should be " + "postponed until the log has been processed. Please try again later."; + string error_msg = string_printf(wait_relay_log, + promotion_target->name(), + events, + slave_conn->gtid_io_pos.to_string().c_str(), + promotion_target->m_gtid_current_pos.to_string().c_str()); + PRINT_MXS_JSON_ERROR(error_out, "%s", error_msg.c_str()); + } + else if (log_mode == Log::ON) + { + // For automatic failover the message is more typical. TODO: Think if this message should + // be logged more often. + MXS_WARNING("The relay log of '%s' has %" PRId64 " unprocessed events " + "(Gtid_IO_Pos: %s, Gtid_Current_Pos: %s). To avoid data loss, " + "failover is postponed until the log has been processed.", + promotion_target->name(), events, slave_conn->gtid_io_pos.to_string().c_str(), + promotion_target->m_gtid_current_pos.to_string().c_str()); + } + } + else + { + *promotion_target_out = promotion_target; + *demotion_target_out = demotion_target; + return true; + } } return false; } @@ -1287,7 +1317,7 @@ void MariaDBMonitor::handle_auto_failover() // Servers were not modified, so it's ok to try this again. if (m_warn_failover_precond) { - MXS_WARNING("Not performing automatic failover. Will keep retrying with this message " + MXS_WARNING("Not performing automatic failover. Will keep retrying with most error messages " "suppressed."); m_warn_failover_precond = false; } diff --git a/server/modules/monitor/mariadbmon/gtid.cc b/server/modules/monitor/mariadbmon/gtid.cc index 023792548..d7478e887 100644 --- a/server/modules/monitor/mariadbmon/gtid.cc +++ b/server/modules/monitor/mariadbmon/gtid.cc @@ -97,12 +97,13 @@ bool GtidList::operator == (const GtidList& rhs) const return m_triplets == rhs.m_triplets; } -int64_t GtidList::events_ahead(const GtidList& rhs, substraction_mode_t domain_substraction_mode) const +uint64_t GtidList::events_ahead(const GtidList& rhs, substraction_mode_t domain_substraction_mode) const { const size_t n_lhs = m_triplets.size(); const size_t n_rhs = rhs.m_triplets.size(); size_t ind_lhs = 0, ind_rhs = 0; uint64_t events = 0; + // GtidLists are assumed to be ordered by domain in ascending order. while (ind_lhs < n_lhs && ind_rhs < n_rhs) { @@ -138,7 +139,7 @@ int64_t GtidList::events_ahead(const GtidList& rhs, substraction_mode_t domain_s ind_rhs++; } } - return (events > INT64_MAX) ? INT64_MAX : events; + return events; } Gtid Gtid::from_string(const char* str, char** endptr) diff --git a/server/modules/monitor/mariadbmon/gtid.hh b/server/modules/monitor/mariadbmon/gtid.hh index fb2260042..25e88f557 100644 --- a/server/modules/monitor/mariadbmon/gtid.hh +++ b/server/modules/monitor/mariadbmon/gtid.hh @@ -140,7 +140,7 @@ public: * the sequence number on lhs is added to the total difference. * @return The number of events between the two gtid:s */ - int64_t events_ahead(const GtidList& rhs, substraction_mode_t domain_substraction_mode) const; + uint64_t events_ahead(const GtidList& rhs, substraction_mode_t domain_substraction_mode) const; /** * Return an individual gtid with the given domain. diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc index fe81e8344..160906d12 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.cc +++ b/server/modules/monitor/mariadbmon/mariadbserver.cc @@ -73,19 +73,13 @@ void NodeData::reset_indexes() in_stack = false; } -int64_t MariaDBServer::relay_log_events(const MariaDBServer* master) +uint64_t MariaDBServer::relay_log_events(const SlaveStatus& slave_conn) { /* The events_ahead-call below ignores domains where current_pos is ahead of io_pos. This situation is * rare but is possible (I guess?) if the server is replicating a domain from multiple masters * and decides to process events from one relay log before getting new events to the other. In * any case, such events are obsolete and the server can be considered to have processed such logs. */ - int64_t rval = -1; - const SlaveStatus* sstatus = slave_connection_status(master); - if (sstatus) - { - rval = sstatus->gtid_io_pos.events_ahead(m_gtid_current_pos, GtidList::MISSING_DOMAIN_IGNORE); - } - return rval; + return slave_conn.gtid_io_pos.events_ahead(m_gtid_current_pos, GtidList::MISSING_DOMAIN_IGNORE); } std::unique_ptr MariaDBServer::execute_query(const string& query, std::string* errmsg_out) @@ -660,62 +654,6 @@ bool MariaDBServer::join_cluster(const string& change_cmd) return success; } -bool MariaDBServer::failover_wait_relay_log(const MariaDBServer* master, int seconds_remaining, - json_t** err_out) -{ - time_t begin = time(NULL); - bool query_ok = true; - bool io_pos_stable = true; - int64_t events = relay_log_events(master); - while (events > 0 && query_ok && io_pos_stable && difftime(time(NULL), begin) < seconds_remaining) - { - const SlaveStatus* sstatus = slave_connection_status(master); - mxb_assert(sstatus); - GtidList old_gtid_io_pos = sstatus->gtid_io_pos; - - // Sleep for a while before querying server again. - MXS_NOTICE("Relay log of server '%s' not yet empty, waiting to clear %" PRId64 " events.", - name(), events); - std::this_thread::sleep_for(std::chrono::seconds(1)); - - // Update gtid:s first to make sure Gtid_IO_Pos is the more recent value. - // It doesn't matter here, but is a general rule. - query_ok = update_gtids() && do_show_slave_status(); - if (query_ok) - { - const SlaveStatus* new_sstatus = slave_connection_status(master); - io_pos_stable = new_sstatus ? (old_gtid_io_pos == new_sstatus->gtid_io_pos) : false; - events = relay_log_events(master); - } - } - - bool rval = false; - if (events == 0 && query_ok && io_pos_stable) - { - rval = true; - } - else - { - string reason = "Timeout"; - if (!query_ok) - { - reason = "Query error"; - } - else if (!io_pos_stable) - { - reason = "Old master sent new event(s)"; - } - else if (events < 0) - { - reason = string_printf("Slave connection to '%s' was removed", master->name()); - } - PRINT_MXS_JSON_ERROR(err_out, "Failover: %s while waiting for server '%s' to process relay log. " - "Cancelling failover.", reason.c_str(), name()); - rval = false; - } - return rval; -} - bool MariaDBServer::run_sql_from_file(const string& path, json_t** error_out) { MYSQL* conn = m_server_base->con; diff --git a/server/modules/monitor/mariadbmon/mariadbserver.hh b/server/modules/monitor/mariadbmon/mariadbserver.hh index 61fd78445..e91fd1ce4 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.hh +++ b/server/modules/monitor/mariadbmon/mariadbserver.hh @@ -176,13 +176,12 @@ public: void check_permissions(); /** - * Calculate how many events are left in the relay log of the slave connection to 'master'. + * Calculate how many events are left in the relay log of the slave connection. * - * @param master The master server from which the slave connection is replicating from - * @return Number of events in relay log according to latest queried info. Negative on error, - * e.g. the slave connection didn't exist. + * @param slave_conn The slave connection to calculate for + * @return Number of events in relay log. Always 0 or greater. */ - int64_t relay_log_events(const MariaDBServer* master); + uint64_t relay_log_events(const SlaveStatus& slave_conn); /** * Execute a query which returns data. The results are returned as a unique pointer to a QueryResult @@ -398,17 +397,6 @@ public: */ bool join_cluster(const std::string& change_cmd); - /** - * Waits until this server has processed all its relay log, or time is up. - * - * @param master The master (or relay) whose relay log should be waited on - * @param seconds_remaining Maximum wait time - * @param err_out Error output - * @return True if relay log was processed within time limit, or false if time ran out - * or an error occurred. - */ - bool failover_wait_relay_log(const MariaDBServer* master, int seconds_remaining, json_t** err_out); - /** * Check if the server can be demoted by switchover. *