From ca9682f04244c78dc4344853cb8aa15ce7b11ec6 Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Wed, 11 Apr 2018 14:32:22 +0300 Subject: [PATCH] MXS-1703 Reorganize cluster manipulation methods Just moving code around. --- .../monitor/mariadbmon/cluster_discovery.cc | 296 +++++++++++++++++ .../mariadbmon/cluster_manipulation.cc | 8 + .../modules/monitor/mariadbmon/mariadbmon.cc | 300 +----------------- .../modules/monitor/mariadbmon/mariadbmon.hh | 142 ++++----- 4 files changed, 380 insertions(+), 366 deletions(-) diff --git a/server/modules/monitor/mariadbmon/cluster_discovery.cc b/server/modules/monitor/mariadbmon/cluster_discovery.cc index a470ce374..b05924991 100644 --- a/server/modules/monitor/mariadbmon/cluster_discovery.cc +++ b/server/modules/monitor/mariadbmon/cluster_discovery.cc @@ -950,3 +950,299 @@ static bool check_replicate_wild_ignore_table(MXS_MONITORED_SERVER* database) } return rval; } + +/** + * @brief Check whether standalone master conditions have been met + * + * This function checks whether all the conditions to use a standalone master are met. For this to happen, + * only one server must be available and other servers must have passed the configured tolerance level of + * failures. + * + * @param db Monitor servers + * + * @return True if standalone master should be used + */ +bool MariaDBMonitor::standalone_master_required(MXS_MONITORED_SERVER *db) +{ + int candidates = 0; + + while (db) + { + if (SERVER_IS_RUNNING(db->server)) + { + candidates++; + MariaDBServer *server_info = get_server_info(db); + + if (server_info->read_only || server_info->slave_configured || candidates > 1) + { + return false; + } + } + else if (db->mon_err_count < m_failcount) + { + return false; + } + + db = db->next; + } + + return candidates == 1; +} + +/** + * @brief Use standalone master + * + * This function assigns the last remaining server the master status and sets all other servers into + * maintenance mode. By setting the servers into maintenance mode, we prevent any possible conflicts when + * the failed servers come back up. + * + * @param db Monitor servers + */ +bool MariaDBMonitor::set_standalone_master(MXS_MONITORED_SERVER *db) +{ + bool rval = false; + + while (db) + { + if (SERVER_IS_RUNNING(db->server)) + { + if (!SERVER_IS_MASTER(db->server) && m_warn_set_standalone_master) + { + MXS_WARNING("Setting standalone master, server '%s' is now the master.%s", + db->server->unique_name, + m_allow_cluster_recovery ? + "" : " All other servers are set into maintenance mode."); + m_warn_set_standalone_master = false; + } + + server_clear_set_status(db->server, SERVER_SLAVE, SERVER_MASTER | SERVER_STALE_STATUS); + monitor_set_pending_status(db, SERVER_MASTER | SERVER_STALE_STATUS); + monitor_clear_pending_status(db, SERVER_SLAVE); + m_master = db; + rval = true; + } + else if (!m_allow_cluster_recovery) + { + server_set_status_nolock(db->server, SERVER_MAINT); + monitor_set_pending_status(db, SERVER_MAINT); + } + db = db->next; + } + + return rval; +} + +/** + * Monitor a server. Should be moved to the server class later on. + * + * @param server The server + */ +void MariaDBMonitor::monitor_one_server(MariaDBServer& server) +{ + MXS_MONITORED_SERVER* ptr = server.server_base; + + ptr->mon_prev_status = ptr->server->status; + /* copy server status into monitor pending_status */ + ptr->pending_status = ptr->server->status; + + /* monitor current node */ + monitor_database(get_server_info(ptr)); + + /* reset the slave list of current node */ + memset(&ptr->server->slaves, 0, sizeof(ptr->server->slaves)); + + if (mon_status_changed(ptr)) + { + if (SRV_MASTER_STATUS(ptr->mon_prev_status)) + { + /** Master failed, can't recover */ + MXS_NOTICE("Server [%s]:%d lost the master status.", + ptr->server->name, + ptr->server->port); + } + } + + if (mon_status_changed(ptr)) + { +#if defined(SS_DEBUG) + MXS_INFO("Backend server [%s]:%d state : %s", + ptr->server->name, + ptr->server->port, + STRSRVSTATUS(ptr->server)); +#else + MXS_DEBUG("Backend server [%s]:%d state : %s", + ptr->server->name, + ptr->server->port, + STRSRVSTATUS(ptr->server)); +#endif + } + + if (SERVER_IS_DOWN(ptr->server)) + { + /** Increase this server'e error count */ + ptr->mon_err_count += 1; + } + else + { + /** Reset this server's error count */ + ptr->mon_err_count = 0; + } +} + +/** + * Compute replication tree, find root master. + * + * @return Found master server or NULL + */ +MariaDBServer* MariaDBMonitor::find_root_master() +{ + MXS_MONITORED_SERVER* found_root_master = NULL; + const int num_servers = m_servers.size(); + /* if only one server is configured, that's is Master */ + if (num_servers == 1) + { + auto mon_server = m_servers[0].server_base; + if (SERVER_IS_RUNNING(mon_server->server)) + { + mon_server->server->depth = 0; + /* status cleanup */ + monitor_clear_pending_status(mon_server, SERVER_SLAVE); + /* master status set */ + monitor_set_pending_status(mon_server, SERVER_MASTER); + + mon_server->server->depth = 0; + m_master = mon_server; + found_root_master = mon_server; + } + } + else + { + /* Compute the replication tree */ + if (m_mysql51_replication) + { + found_root_master = build_mysql51_replication_tree(); + } + else + { + found_root_master = get_replication_tree(); + } + } + + if (m_detect_multimaster && num_servers > 0) + { + /** Find all the master server cycles in the cluster graph. If + multiple masters are found, the servers with the read_only + variable set to ON will be assigned the slave status. */ + find_graph_cycles(); + } + + return found_root_master ? get_server_info(found_root_master) : NULL; +} + +/** + * Test if server is a relay master and assign status if yes. + * + * @param candidate The server to assign + */ +void MariaDBMonitor::assign_relay_master(MariaDBServer& candidate) +{ + MXS_MONITORED_SERVER* ptr = candidate.server_base; + if (ptr->server->node_id > 0 && ptr->server->master_id > 0 && + getSlaveOfNodeId(ptr->server->node_id, REJECT_DOWN) && + getServerByNodeId(ptr->server->master_id) && + (!m_detect_multimaster || candidate.group == 0)) + { + /** This server is both a slave and a master i.e. a relay master */ + monitor_set_pending_status(ptr, SERVER_RELAY_MASTER); + monitor_clear_pending_status(ptr, SERVER_MASTER); + } +} + +/** + * Update serve states of a single server + * + * @param db_server Server to update + * @param root_master_server The current best master + */ +void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServer* root_master_server) +{ + MXS_MONITORED_SERVER* ptr = db_server.server_base; + MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->server_base : NULL; + if (!SERVER_IN_MAINT(ptr->server)) + { + MariaDBServer *serv_info = get_server_info(ptr); + + /** If "detect_stale_master" option is On, let's use the previous master. + * + * Multi-master mode detects the stale masters in find_graph_cycles(). + * + * TODO: If a stale master goes down and comes back up, it loses + * the master status. An adequate solution would be to promote + * the stale master as a real master if it is the last running server. + */ + if (m_detect_stale_master && root_master && !m_detect_multimaster && + (strcmp(ptr->server->name, root_master->server->name) == 0 && + ptr->server->port == root_master->server->port) && + (ptr->server->status & SERVER_MASTER) && + !(ptr->pending_status & SERVER_MASTER) && + !serv_info->read_only) + { + /** + * In this case server->status will not be updated from pending_status + * Set the STALE bit for this server in server struct + */ + server_set_status_nolock(ptr->server, SERVER_STALE_STATUS | SERVER_MASTER); + monitor_set_pending_status(ptr, SERVER_STALE_STATUS | SERVER_MASTER); + + /** Log the message only if the master server didn't have + * the stale master bit set */ + if ((ptr->mon_prev_status & SERVER_STALE_STATUS) == 0) + { + MXS_WARNING("All slave servers under the current master " + "server have been lost. Assigning Stale Master" + " status to the old master server '%s' (%s:%i).", + ptr->server->unique_name, ptr->server->name, + ptr->server->port); + } + } + + if (m_detect_stale_slave) + { + unsigned int bits = SERVER_SLAVE | SERVER_RUNNING; + + if ((ptr->mon_prev_status & bits) == bits && + root_master && SERVER_IS_MASTER(root_master->server)) + { + /** Slave with a running master, assign stale slave candidacy */ + if ((ptr->pending_status & bits) == bits) + { + monitor_set_pending_status(ptr, SERVER_STALE_SLAVE); + } + /** Server lost slave when a master is available, remove + * stale slave candidacy */ + else if ((ptr->pending_status & bits) == SERVER_RUNNING) + { + monitor_clear_pending_status(ptr, SERVER_STALE_SLAVE); + } + } + /** If this server was a stale slave candidate, assign + * slave status to it */ + else if (ptr->mon_prev_status & SERVER_STALE_SLAVE && + ptr->pending_status & SERVER_RUNNING && + // Master is down + (!root_master || !SERVER_IS_MASTER(root_master->server) || + // Master just came up + (SERVER_IS_MASTER(root_master->server) && + (root_master->mon_prev_status & SERVER_MASTER) == 0))) + { + monitor_set_pending_status(ptr, SERVER_SLAVE); + } + else if (root_master == NULL && serv_info->slave_configured) + { + monitor_set_pending_status(ptr, SERVER_SLAVE); + } + } + + ptr->server->status = ptr->pending_status; + } +} diff --git a/server/modules/monitor/mariadbmon/cluster_manipulation.cc b/server/modules/monitor/mariadbmon/cluster_manipulation.cc index 020dc3530..1a8da0c09 100644 --- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc +++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc @@ -1531,6 +1531,14 @@ bool MariaDBMonitor::mon_process_failover(bool* cluster_modified_out) return rval; } +/** + * Check if server is using gtid replication. + * + * @param mon_server Server to check + * @param error_out Error output + * @return True if using gtid-replication. False if not, or if server is not a slave or otherwise does + * not have a gtid_IO_Pos. + */ bool MariaDBMonitor::uses_gtid(MXS_MONITORED_SERVER* mon_server, json_t** error_out) { bool rval = false; diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index a411ca642..f80e22876 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -86,12 +86,22 @@ void MariaDBMonitor::init_server_info() } } +/** + * Get monitor-specific server info for the monitored server. + * + * @param handle + * @param db Server to get info for. Must be a valid server or function crashes. + * @return The server info. + */ MariaDBServer* MariaDBMonitor::get_server_info(MXS_MONITORED_SERVER* db) { ss_dassert(m_server_info.count(db) == 1); // Should always exist in the map return m_server_info[db]; } +/** + * Constant version of get_server_info(). + */ const MariaDBServer* MariaDBMonitor::get_server_info(const MXS_MONITORED_SERVER* db) const { return const_cast(this)->get_server_info(const_cast(db)); @@ -359,87 +369,6 @@ json_t* MariaDBMonitor::diagnostics_json() const return rval; } -/** - * @brief Check whether standalone master conditions have been met - * - * This function checks whether all the conditions to use a standalone master are met. For this to happen, - * only one server must be available and other servers must have passed the configured tolerance level of - * failures. - * - * @param db Monitor servers - * - * @return True if standalone master should be used - */ -bool MariaDBMonitor::standalone_master_required(MXS_MONITORED_SERVER *db) -{ - int candidates = 0; - - while (db) - { - if (SERVER_IS_RUNNING(db->server)) - { - candidates++; - MariaDBServer *server_info = get_server_info(db); - - if (server_info->read_only || server_info->slave_configured || candidates > 1) - { - return false; - } - } - else if (db->mon_err_count < m_failcount) - { - return false; - } - - db = db->next; - } - - return candidates == 1; -} - -/** - * @brief Use standalone master - * - * This function assigns the last remaining server the master status and sets all other servers into - * maintenance mode. By setting the servers into maintenance mode, we prevent any possible conflicts when - * the failed servers come back up. - * - * @param db Monitor servers - */ -bool MariaDBMonitor::set_standalone_master(MXS_MONITORED_SERVER *db) -{ - bool rval = false; - - while (db) - { - if (SERVER_IS_RUNNING(db->server)) - { - if (!SERVER_IS_MASTER(db->server) && m_warn_set_standalone_master) - { - MXS_WARNING("Setting standalone master, server '%s' is now the master.%s", - db->server->unique_name, - m_allow_cluster_recovery ? - "" : " All other servers are set into maintenance mode."); - m_warn_set_standalone_master = false; - } - - server_clear_set_status(db->server, SERVER_SLAVE, SERVER_MASTER | SERVER_STALE_STATUS); - monitor_set_pending_status(db, SERVER_MASTER | SERVER_STALE_STATUS); - monitor_clear_pending_status(db, SERVER_SLAVE); - m_master = db; - rval = true; - } - else if (!m_allow_cluster_recovery) - { - server_set_status_nolock(db->server, SERVER_MAINT); - monitor_set_pending_status(db, SERVER_MAINT); - } - db = db->next; - } - - return rval; -} - void MariaDBMonitor::main_loop() { m_status = MXS_MONITOR_RUNNING; @@ -608,113 +537,6 @@ void MariaDBMonitor::main_loop() m_status = MXS_MONITOR_STOPPED; } -/** - * Monitor a server. Should be moved to the server class later on. - * - * @param server The server - */ -void MariaDBMonitor::monitor_one_server(MariaDBServer& server) -{ - MXS_MONITORED_SERVER* ptr = server.server_base; - - ptr->mon_prev_status = ptr->server->status; - /* copy server status into monitor pending_status */ - ptr->pending_status = ptr->server->status; - - /* monitor current node */ - monitor_database(get_server_info(ptr)); - - /* reset the slave list of current node */ - memset(&ptr->server->slaves, 0, sizeof(ptr->server->slaves)); - - if (mon_status_changed(ptr)) - { - if (SRV_MASTER_STATUS(ptr->mon_prev_status)) - { - /** Master failed, can't recover */ - MXS_NOTICE("Server [%s]:%d lost the master status.", - ptr->server->name, - ptr->server->port); - } - } - - if (mon_status_changed(ptr)) - { -#if defined(SS_DEBUG) - MXS_INFO("Backend server [%s]:%d state : %s", - ptr->server->name, - ptr->server->port, - STRSRVSTATUS(ptr->server)); -#else - MXS_DEBUG("Backend server [%s]:%d state : %s", - ptr->server->name, - ptr->server->port, - STRSRVSTATUS(ptr->server)); -#endif - } - - if (SERVER_IS_DOWN(ptr->server)) - { - /** Increase this server'e error count */ - ptr->mon_err_count += 1; - } - else - { - /** Reset this server's error count */ - ptr->mon_err_count = 0; - } -} - -/** - * Compute replication tree, find root master. - * - * @return Found master server or NULL - */ -MariaDBServer* MariaDBMonitor::find_root_master() -{ - MXS_MONITORED_SERVER* found_root_master = NULL; - const int num_servers = m_servers.size(); - /* if only one server is configured, that's is Master */ - if (num_servers == 1) - { - auto mon_server = m_servers[0].server_base; - if (SERVER_IS_RUNNING(mon_server->server)) - { - mon_server->server->depth = 0; - /* status cleanup */ - monitor_clear_pending_status(mon_server, SERVER_SLAVE); - /* master status set */ - monitor_set_pending_status(mon_server, SERVER_MASTER); - - mon_server->server->depth = 0; - m_master = mon_server; - found_root_master = mon_server; - } - } - else - { - /* Compute the replication tree */ - if (m_mysql51_replication) - { - found_root_master = build_mysql51_replication_tree(); - } - else - { - found_root_master = get_replication_tree(); - } - } - - if (m_detect_multimaster && num_servers > 0) - { - /** Find all the master server cycles in the cluster graph. If - multiple masters are found, the servers with the read_only - variable set to ON will be assigned the slave status. */ - find_graph_cycles(); - } - - return found_root_master ? get_server_info(found_root_master) : NULL; -} - void MariaDBMonitor::update_gtid_domain() { MariaDBServer* master_info = get_server_info(m_master); @@ -763,108 +585,6 @@ void MariaDBMonitor::update_external_master() } } -/** - * TODO: Move to MariaDBServer. - * - * @param serv_info - */ -void MariaDBMonitor::assign_relay_master(MariaDBServer& serv_info) -{ - MXS_MONITORED_SERVER* ptr = serv_info.server_base; - if (ptr->server->node_id > 0 && ptr->server->master_id > 0 && - getSlaveOfNodeId(ptr->server->node_id, REJECT_DOWN) && - getServerByNodeId(ptr->server->master_id) && - (!m_detect_multimaster || serv_info.group == 0)) - { - /** This server is both a slave and a master i.e. a relay master */ - monitor_set_pending_status(ptr, SERVER_RELAY_MASTER); - monitor_clear_pending_status(ptr, SERVER_MASTER); - } -} - -void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServer* root_master_server) -{ - MXS_MONITORED_SERVER* ptr = db_server.server_base; - MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->server_base : NULL; - if (!SERVER_IN_MAINT(ptr->server)) - { - MariaDBServer *serv_info = get_server_info(ptr); - - /** If "detect_stale_master" option is On, let's use the previous master. - * - * Multi-master mode detects the stale masters in find_graph_cycles(). - * - * TODO: If a stale master goes down and comes back up, it loses - * the master status. An adequate solution would be to promote - * the stale master as a real master if it is the last running server. - */ - if (m_detect_stale_master && root_master && !m_detect_multimaster && - (strcmp(ptr->server->name, root_master->server->name) == 0 && - ptr->server->port == root_master->server->port) && - (ptr->server->status & SERVER_MASTER) && - !(ptr->pending_status & SERVER_MASTER) && - !serv_info->read_only) - { - /** - * In this case server->status will not be updated from pending_status - * Set the STALE bit for this server in server struct - */ - server_set_status_nolock(ptr->server, SERVER_STALE_STATUS | SERVER_MASTER); - monitor_set_pending_status(ptr, SERVER_STALE_STATUS | SERVER_MASTER); - - /** Log the message only if the master server didn't have - * the stale master bit set */ - if ((ptr->mon_prev_status & SERVER_STALE_STATUS) == 0) - { - MXS_WARNING("All slave servers under the current master " - "server have been lost. Assigning Stale Master" - " status to the old master server '%s' (%s:%i).", - ptr->server->unique_name, ptr->server->name, - ptr->server->port); - } - } - - if (m_detect_stale_slave) - { - unsigned int bits = SERVER_SLAVE | SERVER_RUNNING; - - if ((ptr->mon_prev_status & bits) == bits && - root_master && SERVER_IS_MASTER(root_master->server)) - { - /** Slave with a running master, assign stale slave candidacy */ - if ((ptr->pending_status & bits) == bits) - { - monitor_set_pending_status(ptr, SERVER_STALE_SLAVE); - } - /** Server lost slave when a master is available, remove - * stale slave candidacy */ - else if ((ptr->pending_status & bits) == SERVER_RUNNING) - { - monitor_clear_pending_status(ptr, SERVER_STALE_SLAVE); - } - } - /** If this server was a stale slave candidate, assign - * slave status to it */ - else if (ptr->mon_prev_status & SERVER_STALE_SLAVE && - ptr->pending_status & SERVER_RUNNING && - // Master is down - (!root_master || !SERVER_IS_MASTER(root_master->server) || - // Master just came up - (SERVER_IS_MASTER(root_master->server) && - (root_master->mon_prev_status & SERVER_MASTER) == 0))) - { - monitor_set_pending_status(ptr, SERVER_SLAVE); - } - else if (root_master == NULL && serv_info->slave_configured) - { - monitor_set_pending_status(ptr, SERVER_SLAVE); - } - } - - ptr->server->status = ptr->pending_status; - } -} - void MariaDBMonitor::measure_replication_lag(MariaDBServer* root_master_server) { MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->server_base : NULL; diff --git a/server/modules/monitor/mariadbmon/mariadbmon.hh b/server/modules/monitor/mariadbmon/mariadbmon.hh index ad71806af..3177e1690 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.hh +++ b/server/modules/monitor/mariadbmon/mariadbmon.hh @@ -108,30 +108,6 @@ public: */ bool manual_rejoin(SERVER* rejoin_server, json_t** output); - /** - * Check if server is using gtid replication. - * - * @param mon_server Server to check - * @param error_out Error output - * @return True if using gtid-replication. False if not, or if server is not a slave or otherwise does - * not have a gtid_IO_Pos. - */ - bool uses_gtid(MXS_MONITORED_SERVER* mon_server, json_t** error_out); - - /** - * Get monitor-specific server info for the monitored server. - * - * @param handle - * @param db Server to get info for. Must be a valid server or function crashes. - * @return The server info. - */ - MariaDBServer* get_server_info(MXS_MONITORED_SERVER* db); - - /** - * Constant version of get_server_info(). - */ - const MariaDBServer* get_server_info(const MXS_MONITORED_SERVER* db) const; - private: MXS_MONITOR* m_monitor_base; /**< Generic monitor object */ THREAD m_thread; /**< Monitor thread */ @@ -182,72 +158,86 @@ private: REJECT_DOWN }; + // Base methods MariaDBMonitor(MXS_MONITOR* monitor_base); ~MariaDBMonitor(); - bool load_config_params(const MXS_CONFIG_PARAMETER* params); - bool failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, int seconds_remaining, json_t** err_out); - bool switchover_demote_master(MXS_MONITORED_SERVER* current_master, MariaDBServer* info, - json_t** err_out); - bool switchover_wait_slaves_catchup(const ServerRefArray& slaves, const GtidList& gtid, int total_timeout, - int read_timeout, json_t** err_out); - bool wait_cluster_stabilization(MariaDBServer* new_master, const ServerRefArray& slaves, - int seconds_remaining); - bool switchover_check_preferred_master(MXS_MONITORED_SERVER* preferred, json_t** err_out); - bool promote_new_master(MXS_MONITORED_SERVER* new_master, json_t** err_out); - MariaDBServer* select_new_master(ServerRefArray* slaves_out, json_t** err_out); - bool server_is_excluded(const MXS_MONITORED_SERVER* server); - bool is_candidate_better(const MariaDBServer* current_best_info, const MariaDBServer* candidate_info, - uint32_t gtid_domain); - MariaDBServer* update_slave_info(MXS_MONITORED_SERVER* server); void init_server_info(); - bool slave_receiving_events(); - void monitor_database(MariaDBServer* param_db); - bool standalone_master_required(MXS_MONITORED_SERVER *db); - bool set_standalone_master(MXS_MONITORED_SERVER *db); - bool failover_not_possible(); - std::string generate_change_master_cmd(const std::string& master_host, int master_port); - int redirect_slaves(MariaDBServer* new_master, const ServerRefArray& slaves, - ServerRefArray* redirected_slaves); + bool load_config_params(const MXS_CONFIG_PARAMETER* params); bool set_replication_credentials(const MXS_CONFIG_PARAMETER* params); - bool start_external_replication(MXS_MONITORED_SERVER* new_master, json_t** err_out); - bool switchover_start_slave(MXS_MONITORED_SERVER* old_master, SERVER* new_master); - bool redirect_one_slave(MXS_MONITORED_SERVER* slave, const char* change_cmd); - bool get_joinable_servers(ServerRefArray* output); - bool join_cluster(MXS_MONITORED_SERVER* server, const char* change_cmd); + MariaDBServer* get_server_info(MXS_MONITORED_SERVER* db); + const MariaDBServer* get_server_info(const MXS_MONITORED_SERVER* db) const; + + // Cluster discovery and status assignment methods + void monitor_one_server(MariaDBServer& server); + void monitor_database(MariaDBServer* param_db); + void monitor_mysql_db(MariaDBServer *serv_info); + MariaDBServer* find_root_master(); + MXS_MONITORED_SERVER* get_replication_tree(); + MXS_MONITORED_SERVER* build_mysql51_replication_tree(); + void find_graph_cycles(); + void update_server_states(MariaDBServer& db_server, MariaDBServer* root_master); + bool standalone_master_required(MXS_MONITORED_SERVER* db); + bool set_standalone_master(MXS_MONITORED_SERVER* db); + void assign_relay_master(MariaDBServer& serv_info); + void log_master_changes(MariaDBServer* root_master, int* log_no_master); + void update_gtid_domain(); + void update_external_master(); void set_master_heartbeat(MXS_MONITORED_SERVER *); void set_slave_heartbeat(MXS_MONITORED_SERVER *); - MXS_MONITORED_SERVER* build_mysql51_replication_tree(); - MXS_MONITORED_SERVER* get_replication_tree(); - void monitor_mysql_db(MariaDBServer *serv_info); - bool do_switchover(MariaDBServer** current_master, MariaDBServer** new_master, json_t** err_out); - bool do_failover(json_t** err_out); - uint32_t do_rejoin(const ServerRefArray& joinable_servers); - bool mon_process_failover(bool* cluster_modified_out); - bool server_is_rejoin_suspect(MariaDBServer* rejoin_cand, MariaDBServer* master, json_t** output); - bool cluster_can_be_joined(); - bool failover_check(json_t** error_out); - void disable_setting(const char* setting); + void measure_replication_lag(MariaDBServer* root_master); + void check_maxscale_schema_replication(); + MXS_MONITORED_SERVER* getServerByNodeId(long); + MXS_MONITORED_SERVER* getSlaveOfNodeId(long, slave_down_setting_t); + + // Switchover methods bool switchover_check(SERVER* new_master, SERVER* current_master, MariaDBServer** new_master_out, MariaDBServer** current_master_out, json_t** error_out); bool switchover_check_new(const MXS_MONITORED_SERVER* monitored_server, json_t** error); bool switchover_check_current(const MXS_MONITORED_SERVER* suggested_curr_master, json_t** error_out) const; - bool can_replicate_from(MariaDBServer* slave_cand, MariaDBServer* master); - void monitor_one_server(MariaDBServer& server); - MariaDBServer* find_root_master(); - void update_gtid_domain(); - void update_external_master(); - void assign_relay_master(MariaDBServer& serv_info); - void update_server_states(MariaDBServer& db_server, MariaDBServer* root_master); - void log_master_changes(MariaDBServer* root_master, int* log_no_master); + bool do_switchover(MariaDBServer** current_master, MariaDBServer** new_master, json_t** err_out); + bool switchover_check_preferred_master(MXS_MONITORED_SERVER* preferred, json_t** err_out); + bool switchover_demote_master(MXS_MONITORED_SERVER* current_master, MariaDBServer* info, + json_t** err_out); + bool switchover_wait_slaves_catchup(const ServerRefArray& slaves, const GtidList& gtid, int total_timeout, + int read_timeout, json_t** err_out); + bool switchover_start_slave(MXS_MONITORED_SERVER* old_master, SERVER* new_master); + + // Failover methods void handle_auto_failover(bool* failover_performed); - void measure_replication_lag(MariaDBServer* root_master); + bool failover_not_possible(); + bool slave_receiving_events(); + bool mon_process_failover(bool* cluster_modified_out); + bool failover_check(json_t** error_out); + bool do_failover(json_t** err_out); + bool failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, int seconds_remaining, json_t** err_out); + + // Rejoin methods + bool cluster_can_be_joined(); void handle_auto_rejoin(); - void find_graph_cycles(); - void check_maxscale_schema_replication(); - MXS_MONITORED_SERVER* getServerByNodeId(long); - MXS_MONITORED_SERVER* getSlaveOfNodeId(long, slave_down_setting_t); + bool get_joinable_servers(ServerRefArray* output); + bool server_is_rejoin_suspect(MariaDBServer* rejoin_cand, MariaDBServer* master, json_t** output); + bool can_replicate_from(MariaDBServer* slave_cand, MariaDBServer* master); + uint32_t do_rejoin(const ServerRefArray& joinable_servers); + bool join_cluster(MXS_MONITORED_SERVER* server, const char* change_cmd); + + // Methods common to failover/switchover/rejoin + bool uses_gtid(MXS_MONITORED_SERVER* mon_server, json_t** error_out); + MariaDBServer* select_new_master(ServerRefArray* slaves_out, json_t** err_out); + MariaDBServer* update_slave_info(MXS_MONITORED_SERVER* server); + bool server_is_excluded(const MXS_MONITORED_SERVER* server); + bool is_candidate_better(const MariaDBServer* current_best_info, const MariaDBServer* candidate_info, + uint32_t gtid_domain); + bool promote_new_master(MXS_MONITORED_SERVER* new_master, json_t** err_out); + int redirect_slaves(MariaDBServer* new_master, const ServerRefArray& slaves, + ServerRefArray* redirected_slaves); + bool redirect_one_slave(MXS_MONITORED_SERVER* slave, const char* change_cmd); + std::string generate_change_master_cmd(const std::string& master_host, int master_port); + bool start_external_replication(MXS_MONITORED_SERVER* new_master, json_t** err_out); + bool wait_cluster_stabilization(MariaDBServer* new_master, const ServerRefArray& slaves, + int seconds_remaining); + void disable_setting(const char* setting); }; /**