diff --git a/server/modules/monitor/mariadbmon/cluster_discovery.cc b/server/modules/monitor/mariadbmon/cluster_discovery.cc index 4292e4633..4f9035e7a 100644 --- a/server/modules/monitor/mariadbmon/cluster_discovery.cc +++ b/server/modules/monitor/mariadbmon/cluster_discovery.cc @@ -713,22 +713,15 @@ void MariaDBMonitor::monitor_mysql_db(MariaDBServer* serv_info) } /** - * Update replication settings and gtid:s of the slave server. + * Update replication settings, gtid:s and slave status of the server. * * @param server Slave to update - * @return Slave server info. NULL on error, or if server is not a slave. + * @return True on success. False on error, or if server is not a slave (slave SQL not running). */ -MariaDBServer* MariaDBMonitor::update_slave_info(MXS_MONITORED_SERVER* server) +bool MariaDBMonitor::update_slave_info(MariaDBServer* server) { - MariaDBServer* info = get_server_info(server); - if (info->slave_status.slave_sql_running && - info->update_replication_settings() && - info->update_gtids() && - info->do_show_slave_status()) - { - return info; - } - return NULL; + return (server->slave_status.slave_sql_running && server->update_replication_settings() && + server->update_gtids() && server->do_show_slave_status()); } /** @@ -957,32 +950,26 @@ static bool check_replicate_wild_ignore_table(MXS_MONITORED_SERVER* database) * only one server must be available and other servers must have passed the configured tolerance level of * failures. * - * @param db Monitor servers - * * @return True if standalone master should be used */ -bool MariaDBMonitor::standalone_master_required(MXS_MONITORED_SERVER *db) +bool MariaDBMonitor::standalone_master_required() { int candidates = 0; - - while (db) + for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) { - if (SERVER_IS_RUNNING(db->server)) + MariaDBServer* server = *iter; + if (server->is_running()) { candidates++; - MariaDBServer *server_info = get_server_info(db); - - if (server_info->read_only || server_info->slave_configured || candidates > 1) + if (server->read_only || server->slave_configured || candidates > 1) { return false; } } - else if (db->mon_err_count < m_failcount) + else if (server->server_base->mon_err_count < m_failcount) { return false; } - - db = db->next; } return candidates == 1; @@ -995,37 +982,36 @@ bool MariaDBMonitor::standalone_master_required(MXS_MONITORED_SERVER *db) * maintenance mode. By setting the servers into maintenance mode, we prevent any possible conflicts when * the failed servers come back up. * - * @param db Monitor servers + * @return True if standalone master was set */ -bool MariaDBMonitor::set_standalone_master(MXS_MONITORED_SERVER *db) +bool MariaDBMonitor::set_standalone_master() { bool rval = false; - - while (db) + for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) { - if (SERVER_IS_RUNNING(db->server)) + MariaDBServer* server = *iter; + auto mon_server = server->server_base; + if (server->is_running()) { - if (!SERVER_IS_MASTER(db->server) && m_warn_set_standalone_master) + if (!server->is_master() && m_warn_set_standalone_master) { MXS_WARNING("Setting standalone master, server '%s' is now the master.%s", - db->server->unique_name, - m_allow_cluster_recovery ? - "" : " All other servers are set into maintenance mode."); + server->name(), m_allow_cluster_recovery ? "" : + " All other servers are set into maintenance mode."); m_warn_set_standalone_master = false; } - server_clear_set_status(db->server, SERVER_SLAVE, SERVER_MASTER | SERVER_STALE_STATUS); - monitor_set_pending_status(db, SERVER_MASTER | SERVER_STALE_STATUS); - monitor_clear_pending_status(db, SERVER_SLAVE); - m_master = get_server_info(db); + server_clear_set_status(mon_server->server, SERVER_SLAVE, SERVER_MASTER | SERVER_STALE_STATUS); + monitor_set_pending_status(mon_server, SERVER_MASTER | SERVER_STALE_STATUS); + monitor_clear_pending_status(mon_server, SERVER_SLAVE); + m_master = server; rval = true; } else if (!m_allow_cluster_recovery) { - server_set_status_nolock(db->server, SERVER_MAINT); - monitor_set_pending_status(db, SERVER_MAINT); + server_set_status_nolock(mon_server->server, SERVER_MAINT); + monitor_set_pending_status(mon_server, SERVER_MAINT); } - db = db->next; } return rval; diff --git a/server/modules/monitor/mariadbmon/cluster_manipulation.cc b/server/modules/monitor/mariadbmon/cluster_manipulation.cc index ced0f33f6..b5bbaf9bf 100644 --- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc +++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc @@ -227,7 +227,7 @@ int MariaDBMonitor::redirect_slaves(MariaDBServer* new_master, const ServerArray int successes = 0; for (auto iter = slaves.begin(); iter != slaves.end(); iter++) { - if (redirect_one_slave((*iter)->server_base, change_cmd.c_str())) + if (redirect_one_slave(*iter, change_cmd)) { successes++; redirected_slaves->push_back(*iter); @@ -243,12 +243,13 @@ int MariaDBMonitor::redirect_slaves(MariaDBServer* new_master, const ServerArray * @param err_out Error output * @return True if new master accepted commands */ -bool MariaDBMonitor::start_external_replication(MXS_MONITORED_SERVER* new_master, json_t** err_out) +bool MariaDBMonitor::start_external_replication(MariaDBServer* new_master, json_t** err_out) { bool rval = false; + MYSQL* new_master_conn = new_master->server_base->con; string change_cmd = generate_change_master_cmd(m_external_master_host, m_external_master_port); - if (mxs_mysql_query(new_master->con, change_cmd.c_str()) == 0 && - mxs_mysql_query(new_master->con, "START SLAVE;") == 0) + if (mxs_mysql_query(new_master_conn, change_cmd.c_str()) == 0 && + mxs_mysql_query(new_master_conn, "START SLAVE;") == 0) { MXS_NOTICE("New master starting replication from external master %s:%d.", m_external_master_host.c_str(), m_external_master_port); @@ -257,7 +258,7 @@ bool MariaDBMonitor::start_external_replication(MXS_MONITORED_SERVER* new_master else { PRINT_MXS_JSON_ERROR(err_out, "Could not start replication from external master: '%s'.", - mysql_error(new_master->con)); + mysql_error(new_master_conn)); } return rval; } @@ -270,21 +271,24 @@ bool MariaDBMonitor::start_external_replication(MXS_MONITORED_SERVER* new_master * @return True if commands were accepted. This does not guarantee that replication proceeds * successfully. */ -bool MariaDBMonitor::switchover_start_slave(MXS_MONITORED_SERVER* old_master, SERVER* new_master) +bool MariaDBMonitor::switchover_start_slave(MariaDBServer* old_master, MariaDBServer* new_master) { bool rval = false; - string change_cmd = generate_change_master_cmd(new_master->name, new_master->port); - if (mxs_mysql_query(old_master->con, change_cmd.c_str()) == 0 && - mxs_mysql_query(old_master->con, "START SLAVE;") == 0) + MYSQL* old_master_con = old_master->server_base->con; + SERVER* new_master_server = new_master->server_base->server; + + string change_cmd = generate_change_master_cmd(new_master_server->name, new_master_server->port); + if (mxs_mysql_query(old_master_con, change_cmd.c_str()) == 0 && + mxs_mysql_query(old_master_con, "START SLAVE;") == 0) { MXS_NOTICE("Old master '%s' starting replication from '%s'.", - old_master->server->unique_name, new_master->unique_name); + old_master->name(), new_master->name()); rval = true; } else { MXS_ERROR("Old master '%s' could not start replication: '%s'.", - old_master->server->unique_name, mysql_error(old_master->con)); + old_master->name(), mysql_error(old_master_con)); } return rval; } @@ -296,23 +300,24 @@ bool MariaDBMonitor::switchover_start_slave(MXS_MONITORED_SERVER* old_master, SE * @param change_cmd Change master command, usually generated by generate_change_master_cmd() * @return True if slave accepted all commands */ -bool MariaDBMonitor::redirect_one_slave(MXS_MONITORED_SERVER* slave, const char* change_cmd) +bool MariaDBMonitor::redirect_one_slave(MariaDBServer* slave, const string& change_cmd) { bool success = false; + MYSQL* slave_conn = slave->server_base->con; const char* query = "STOP SLAVE;"; - if (mxs_mysql_query(slave->con, query) == 0) + if (mxs_mysql_query(slave_conn, query) == 0) { query = "RESET SLAVE;"; // To erase any old I/O or SQL errors - if (mxs_mysql_query(slave->con, query) == 0) + if (mxs_mysql_query(slave_conn, query) == 0) { query = "CHANGE MASTER TO ..."; // Don't show the real query as it contains a password. - if (mxs_mysql_query(slave->con, change_cmd) == 0) + if (mxs_mysql_query(slave_conn, change_cmd.c_str()) == 0) { query = "START SLAVE;"; - if (mxs_mysql_query(slave->con, query) == 0) + if (mxs_mysql_query(slave_conn, query) == 0) { success = true; - MXS_NOTICE("Slave '%s' redirected to new master.", slave->server->unique_name); + MXS_NOTICE("Slave '%s' redirected to new master.", slave->name()); } } } @@ -320,8 +325,8 @@ bool MariaDBMonitor::redirect_one_slave(MXS_MONITORED_SERVER* slave, const char* if (!success) { - MXS_WARNING("Slave '%s' redirection failed: '%s'. Query: '%s'.", slave->server->unique_name, - mysql_error(slave->con), query); + MXS_WARNING("Slave '%s' redirection failed: '%s'. Query: '%s'.", slave->name(), + mysql_error(slave_conn), query); } return success; } @@ -350,13 +355,13 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers) if (joinable->n_slaves_configured == 0) { MXS_NOTICE("Directing standalone server '%s' to replicate from '%s'.", name, master_name); - op_success = join_cluster(joinable->server_base, change_cmd.c_str()); + op_success = join_cluster(joinable, change_cmd); } else { MXS_NOTICE("Server '%s' is replicating from a server other than '%s', " "redirecting it to '%s'.", name, master_name, master_name); - op_success = redirect_one_slave(joinable->server_base, change_cmd.c_str()); + op_success = redirect_one_slave(joinable, change_cmd); } if (op_success) @@ -431,31 +436,32 @@ bool MariaDBMonitor::get_joinable_servers(ServerArray* output) * @param change_cmd Change master command * @return True if commands were accepted by server */ -bool MariaDBMonitor::join_cluster(MXS_MONITORED_SERVER* server, const char* change_cmd) +bool MariaDBMonitor::join_cluster(MariaDBServer* server, const string& change_cmd) { /* Server does not have slave connections. This operation can fail, or the resulting * replication may end up broken. */ bool success = false; string error_msg; + MYSQL* server_conn = server->server_base->con; const char* query = "SET GLOBAL read_only=1;"; - if (mxs_mysql_query(server->con, query) == 0) + if (mxs_mysql_query(server_conn, query) == 0) { query = "CHANGE MASTER TO ..."; // Don't show the real query as it contains a password. - if (mxs_mysql_query(server->con, change_cmd) == 0) + if (mxs_mysql_query(server_conn, change_cmd.c_str()) == 0) { query = "START SLAVE;"; - if (mxs_mysql_query(server->con, query) == 0) + if (mxs_mysql_query(server_conn, query) == 0) { success = true; - MXS_NOTICE("Standalone server '%s' starting replication.", server->server->unique_name); + MXS_NOTICE("Standalone server '%s' starting replication.", server->name()); } } if (!success) { // A step after "SET GLOBAL read_only=1" failed, try to undo. First, backup error message. - error_msg = mysql_error(server->con); - mxs_mysql_query(server->con, "SET GLOBAL read_only=0;"); + error_msg = mysql_error(server_conn); + mxs_mysql_query(server_conn, "SET GLOBAL read_only=0;"); } } @@ -463,10 +469,10 @@ bool MariaDBMonitor::join_cluster(MXS_MONITORED_SERVER* server, const char* chan { if (error_msg.empty()) { - error_msg = mysql_error(server->con); + error_msg = mysql_error(server_conn); } MXS_WARNING("Standalone server '%s' failed to start replication: '%s'. Query: '%s'.", - server->server->unique_name, error_msg.c_str(), query); + server->name(), error_msg.c_str(), query); } return success; } @@ -586,7 +592,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer else { // Check user-given new master. Some checks have already been performed but more is needed. - if (switchover_check_preferred_master((*new_master)->server_base, err_out)) + if (switchover_check_preferred_master(*new_master, err_out)) { promotion_target = *new_master; /* User-given candidate is good. Update info on all slave servers. @@ -597,14 +603,9 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) { MariaDBServer* server = *iter; - if (server != promotion_target) + if (server != promotion_target && update_slave_info(server) && server != demotion_target) { - MariaDBServer* slave_info = update_slave_info(server->server_base); - // If master is replicating from external master, it is updated but not added to array. - if (slave_info && server != demotion_target) - { - redirectable_slaves.push_back(server); - } + redirectable_slaves.push_back(server); } } } @@ -617,7 +618,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer bool rval = false; // Step 2: Set read-only to on, flush logs, update master gtid:s - if (switchover_demote_master(demotion_target->server_base, demotion_target, err_out)) + if (switchover_demote_master(demotion_target, err_out)) { bool catchup_and_promote_success = false; time_t step2_time = time(NULL); @@ -627,7 +628,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer ServerArray catchup_slaves = redirectable_slaves; catchup_slaves.push_back(promotion_target); if (switchover_wait_slaves_catchup(catchup_slaves, demotion_target->gtid_binlog_pos, - seconds_remaining, m_monitor_base->read_timeout, err_out)) + seconds_remaining, err_out)) { time_t step3_time = time(NULL); int seconds_step3 = difftime(step3_time, step2_time); @@ -635,13 +636,12 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer seconds_remaining -= seconds_step3; // Step 4: On new master STOP and RESET SLAVE, set read-only to off. - if (promote_new_master(promotion_target->server_base, err_out)) + if (promote_new_master(promotion_target, err_out)) { catchup_and_promote_success = true; // Step 5: Redirect slaves and start replication on old master. ServerArray redirected_slaves; - bool start_ok = switchover_start_slave(demotion_target->server_base, - promotion_target->server_base->server); + bool start_ok = switchover_start_slave(demotion_target, promotion_target); if (start_ok) { redirected_slaves.push_back(demotion_target); @@ -698,7 +698,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer // Try to reactivate external replication if any. if (m_external_master_port != PORT_UNKNOWN) { - start_external_replication(promotion_target->server_base, err_out); + start_external_replication(promotion_target, err_out); } } } @@ -735,7 +735,7 @@ bool MariaDBMonitor::do_failover(json_t** err_out) bool rval = false; // Step 2: Wait until relay log consumed. - if (failover_wait_relay_log(new_master->server_base, seconds_remaining, err_out)) + if (failover_wait_relay_log(new_master, seconds_remaining, err_out)) { time_t step2_time = time(NULL); int seconds_step2 = difftime(step2_time, step1_time); @@ -743,7 +743,7 @@ bool MariaDBMonitor::do_failover(json_t** err_out) seconds_remaining -= seconds_step2; // Step 3: Stop and reset slave, set read-only to 0. - if (promote_new_master(new_master->server_base, err_out)) + if (promote_new_master(new_master, err_out)) { // Step 4: Redirect slaves. ServerArray redirected_slaves; @@ -797,31 +797,30 @@ bool MariaDBMonitor::do_failover(json_t** err_out) * @param err_out Json error output * @return True if relay log was processed within time limit, or false if time ran out or an error occurred. */ -bool MariaDBMonitor::failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, int seconds_remaining, +bool MariaDBMonitor::failover_wait_relay_log(MariaDBServer* new_master, int seconds_remaining, json_t** err_out) { - MariaDBServer* master_info = get_server_info(new_master); time_t begin = time(NULL); bool query_ok = true; bool io_pos_stable = true; - while (master_info->relay_log_events() > 0 && + while (new_master->relay_log_events() > 0 && query_ok && io_pos_stable && difftime(time(NULL), begin) < seconds_remaining) { MXS_INFO("Relay log of server '%s' not yet empty, waiting to clear %" PRId64 " events.", - new_master->server->unique_name, master_info->relay_log_events()); + new_master->name(), new_master->relay_log_events()); thread_millisleep(1000); // Sleep for a while before querying server again. // Todo: check server version before entering failover. - GtidList old_gtid_io_pos = master_info->slave_status.gtid_io_pos; + GtidList old_gtid_io_pos = new_master->slave_status.gtid_io_pos; // Update gtid:s first to make sure Gtid_IO_Pos is the more recent value. // It doesn't matter here, but is a general rule. - query_ok = master_info->update_gtids() && master_info->do_show_slave_status(); - io_pos_stable = (old_gtid_io_pos == master_info->slave_status.gtid_io_pos); + query_ok = new_master->update_gtids() && new_master->do_show_slave_status(); + io_pos_stable = (old_gtid_io_pos == new_master->slave_status.gtid_io_pos); } bool rval = false; - if (master_info->relay_log_events() == 0) + if (new_master->relay_log_events() == 0) { rval = true; } @@ -836,14 +835,13 @@ bool MariaDBMonitor::failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, i { reason = "Old master sent new event(s)"; } - else if (master_info->relay_log_events() < 0) + else if (new_master->relay_log_events() < 0) // TODO: This is currently impossible { - reason = "Invalid Gtid(s) (current_pos: " + master_info->gtid_current_pos.to_string() + - ", io_pos: " + master_info->slave_status.gtid_io_pos.to_string() + ")"; + reason = "Invalid Gtid(s) (current_pos: " + new_master->gtid_current_pos.to_string() + + ", io_pos: " + new_master->slave_status.gtid_io_pos.to_string() + ")"; } PRINT_MXS_JSON_ERROR(err_out, "Failover: %s while waiting for server '%s' to process relay log. " - "Cancelling failover.", - reason.c_str(), new_master->server->unique_name); + "Cancelling failover.", reason.c_str(), new_master->name()); rval = false; } return rval; @@ -858,16 +856,15 @@ bool MariaDBMonitor::failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, i * @param err_out json object for error printing. Can be NULL. * @return True if successful. */ -bool MariaDBMonitor::switchover_demote_master(MXS_MONITORED_SERVER* current_master, MariaDBServer* info, - json_t** err_out) +bool MariaDBMonitor::switchover_demote_master(MariaDBServer* current_master, json_t** err_out) { - MXS_NOTICE("Demoting server '%s'.", current_master->server->unique_name); + MXS_NOTICE("Demoting server '%s'.", current_master->name()); bool success = false; bool query_error = false; - MYSQL* conn = current_master->con; + MYSQL* conn = current_master->server_base->con; const char* query = ""; // The next query to execute. Used also for error printing. // The presence of an external master changes several things. - const bool external_master = SERVER_IS_SLAVE_OF_EXTERNAL_MASTER(current_master->server); + const bool external_master = SERVER_IS_SLAVE_OF_EXTERNAL_MASTER(current_master->server_base->server); if (external_master) { @@ -904,7 +901,7 @@ bool MariaDBMonitor::switchover_demote_master(MXS_MONITORED_SERVER* current_mast if (!query_error) { query = ""; - if (info->update_gtids()) + if (current_master->update_gtids()) { success = true; } @@ -959,12 +956,11 @@ bool MariaDBMonitor::switchover_demote_master(MXS_MONITORED_SERVER* current_mast * @param slave Slaves to wait on * @param gtid Which gtid must be reached * @param total_timeout Maximum wait time in seconds - * @param read_timeout The value of read_timeout for the connection TODO: see if timeouts can be removed here * @param err_out json object for error printing. Can be NULL. * @return True, if target gtid was reached within allotted time for all servers */ bool MariaDBMonitor::switchover_wait_slaves_catchup(const ServerArray& slaves, const GtidList& gtid, - int total_timeout, int read_timeout, json_t** err_out) + int total_timeout, json_t** err_out) { bool success = true; int seconds_remaining = total_timeout; @@ -1089,15 +1085,14 @@ bool MariaDBMonitor::wait_cluster_stabilization(MariaDBServer* new_master, const * @param err_out Json object for error printing. Can be NULL. * @return True, if given slave is a valid promotion candidate. */ -bool MariaDBMonitor::switchover_check_preferred_master(MXS_MONITORED_SERVER* preferred, json_t** err_out) +bool MariaDBMonitor::switchover_check_preferred_master(MariaDBServer* preferred, json_t** err_out) { ss_dassert(preferred); bool rval = true; - MariaDBServer* preferred_info = update_slave_info(preferred); - if (preferred_info == NULL || !preferred_info->check_replication_settings()) + if (!update_slave_info(preferred) || !preferred->check_replication_settings()) { PRINT_MXS_JSON_ERROR(err_out, "The requested server '%s' is not a valid promotion candidate.", - preferred->server->unique_name); + preferred->name()); rval = false; } return rval; @@ -1110,18 +1105,19 @@ bool MariaDBMonitor::switchover_check_preferred_master(MXS_MONITORED_SERVER* pre * @param err_out json object for error printing. Can be NULL. * @return True if successful */ -bool MariaDBMonitor::promote_new_master(MXS_MONITORED_SERVER* new_master, json_t** err_out) +bool MariaDBMonitor::promote_new_master(MariaDBServer* new_master, json_t** err_out) { bool success = false; - MXS_NOTICE("Promoting server '%s' to master.", new_master->server->unique_name); + MYSQL* new_master_conn = new_master->server_base->con; + MXS_NOTICE("Promoting server '%s' to master.", new_master->name()); const char* query = "STOP SLAVE;"; - if (mxs_mysql_query(new_master->con, query) == 0) + if (mxs_mysql_query(new_master_conn, query) == 0) { query = "RESET SLAVE ALL;"; - if (mxs_mysql_query(new_master->con, query) == 0) + if (mxs_mysql_query(new_master_conn, query) == 0) { query = "SET GLOBAL read_only=0;"; - if (mxs_mysql_query(new_master->con, query) == 0) + if (mxs_mysql_query(new_master_conn, query) == 0) { success = true; } @@ -1131,7 +1127,7 @@ bool MariaDBMonitor::promote_new_master(MXS_MONITORED_SERVER* new_master, json_t if (!success) { PRINT_MXS_JSON_ERROR(err_out, "Promotion failed: '%s'. Query: '%s'.", - mysql_error(new_master->con), query); + mysql_error(new_master_conn), query); } // If the previous master was a slave to an external master, start the equivalent slave connection on // the new master. Success of replication is not checked. @@ -1154,8 +1150,7 @@ MariaDBServer* MariaDBMonitor::select_new_master(ServerArray* slaves_out, json_t ss_dassert(slaves_out && slaves_out->size() == 0); /* Select a new master candidate. Selects the one with the latest event in relay log. * If multiple slaves have same number of events, select the one with most processed events. */ - MXS_MONITORED_SERVER* current_best = NULL; - MariaDBServer* current_best_info = NULL; + MariaDBServer* current_best = NULL; // Servers that cannot be selected because of exclusion, but seem otherwise ok. ServerArray valid_but_excluded; // Index of the current best candidate in slaves_out @@ -1167,12 +1162,12 @@ MariaDBServer* MariaDBMonitor::select_new_master(ServerArray* slaves_out, json_t * Do not worry about the exclusion list yet, querying the excluded servers is ok. * If master is replicating from external master, it is updated by update_slave_info() * but not added to array. */ - MariaDBServer* cand = update_slave_info((*iter)->server_base); - if (cand && cand != m_master) + MariaDBServer* cand = *iter; + if (update_slave_info(cand) && cand != m_master) { slaves_out->push_back(cand); // Check that server is not in the exclusion list while still being a valid choice. - if (server_is_excluded(cand->server_base) && cand->check_replication_settings(WARNINGS_OFF)) + if (server_is_excluded(cand) && cand->check_replication_settings(WARNINGS_OFF)) { valid_but_excluded.push_back(cand); const char CANNOT_SELECT[] = "Promotion candidate '%s' is excluded from new " @@ -1182,12 +1177,10 @@ MariaDBServer* MariaDBMonitor::select_new_master(ServerArray* slaves_out, json_t else if (cand->check_replication_settings()) { // If no new master yet, accept any valid candidate. Otherwise check. - if (current_best == NULL || - is_candidate_better(current_best_info, cand, m_master_gtid_domain)) + if (current_best == NULL || is_candidate_better(current_best, cand, m_master_gtid_domain)) { // The server has been selected for promotion, for now. - current_best = cand->server_base; - current_best_info = cand; + current_best = cand; master_vector_index = slaves_out->size() - 1; } } @@ -1214,14 +1207,13 @@ MariaDBServer* MariaDBMonitor::select_new_master(ServerArray* slaves_out, json_t MXS_WARNING(EXCLUDED_ONLY_CAND, excluded_name); break; } - else if (is_candidate_better(current_best_info, excluded_info, m_master_gtid_domain)) + else if (is_candidate_better(current_best, excluded_info, m_master_gtid_domain)) { - // Print a warning if this server is actually a better candidate than the previous - // best. - const char EXCLUDED_CAND[] = "Server '%s' is superior to current " - "best candidate '%s', but cannot be selected as it's excluded. This may lead to " + // Print a warning if this server is actually a better candidate than the previous best. + const char EXCLUDED_CAND[] = "Server '%s' is superior to current best candidate '%s', " + "but cannot be selected as it's excluded. This may lead to " "loss of data if '%s' is ahead of other servers."; - MXS_WARNING(EXCLUDED_CAND, excluded_name, current_best->server->unique_name, excluded_name); + MXS_WARNING(EXCLUDED_CAND, excluded_name, current_best->name(), excluded_name); break; } } @@ -1230,22 +1222,20 @@ MariaDBServer* MariaDBMonitor::select_new_master(ServerArray* slaves_out, json_t { PRINT_MXS_JSON_ERROR(err_out, "No suitable promotion candidate found."); } - return current_best ? get_server_info(current_best) : NULL; + return current_best; } /** * Is the server in the excluded list * - * @param handle Cluster monitor * @param server Server to test * @return True if server is in the excluded-list of the monitor. */ -bool MariaDBMonitor::server_is_excluded(const MXS_MONITORED_SERVER* server) +bool MariaDBMonitor::server_is_excluded(const MariaDBServer* server) { - size_t n_excluded = m_excluded_servers.size(); - for (size_t i = 0; i < n_excluded; i++) + for (auto iter = m_excluded_servers.begin(); iter != m_excluded_servers.end(); iter++) { - if (m_excluded_servers[i]->server_base == server) + if (*iter == server) { return true; } @@ -1261,16 +1251,16 @@ bool MariaDBMonitor::server_is_excluded(const MXS_MONITORED_SERVER* server) * @param gtid_domain Which domain to compare * @return True if candidate is better */ -bool MariaDBMonitor::is_candidate_better(const MariaDBServer* current_best_info, - const MariaDBServer* candidate_info, uint32_t gtid_domain) +bool MariaDBMonitor::is_candidate_better(const MariaDBServer* current_best, const MariaDBServer* candidate, + uint32_t gtid_domain) { - uint64_t cand_io = candidate_info->slave_status.gtid_io_pos.get_gtid(gtid_domain).m_sequence; - uint64_t cand_processed = candidate_info->gtid_current_pos.get_gtid(gtid_domain).m_sequence; - uint64_t curr_io = current_best_info->slave_status.gtid_io_pos.get_gtid(gtid_domain).m_sequence; - uint64_t curr_processed = current_best_info->gtid_current_pos.get_gtid(gtid_domain).m_sequence; + uint64_t cand_io = candidate->slave_status.gtid_io_pos.get_gtid(gtid_domain).m_sequence; + uint64_t cand_processed = candidate->gtid_current_pos.get_gtid(gtid_domain).m_sequence; + uint64_t curr_io = current_best->slave_status.gtid_io_pos.get_gtid(gtid_domain).m_sequence; + uint64_t curr_processed = current_best->gtid_current_pos.get_gtid(gtid_domain).m_sequence; - bool cand_updates = candidate_info->rpl_settings.log_slave_updates; - bool curr_updates = current_best_info->rpl_settings.log_slave_updates; + bool cand_updates = candidate->rpl_settings.log_slave_updates; + bool curr_updates = current_best->rpl_settings.log_slave_updates; bool is_better = false; // Accept a slave with a later event in relay log. if (cand_io > curr_io) @@ -1379,16 +1369,14 @@ bool MariaDBMonitor::failover_check(json_t** error_out) int slaves = 0; bool error = false; - for (MXS_MONITORED_SERVER* mon_server = m_monitor_base->monitored_servers; - mon_server != NULL; - mon_server = mon_server->next) + for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) { - uint64_t status_bits = mon_server->server->status; + MariaDBServer* server = *iter; + uint64_t status_bits = server->server_base->server->status; uint64_t master_up = (SERVER_MASTER | SERVER_RUNNING); if ((status_bits & master_up) == master_up) { - string master_up_msg = string("Master server '") + mon_server->server->unique_name + - "' is running"; + string master_up_msg = string("Master server '") + server->name() + "' is running"; if (status_bits & SERVER_MAINT) { master_up_msg += ", although in maintenance mode"; @@ -1397,9 +1385,9 @@ bool MariaDBMonitor::failover_check(json_t** error_out) PRINT_MXS_JSON_ERROR(error_out, "%s", master_up_msg.c_str()); error = true; } - else if (SERVER_IS_SLAVE(mon_server->server)) + else if (server->is_slave()) { - if (uses_gtid(mon_server, error_out)) + if (uses_gtid(server, error_out)) { slaves++; } @@ -1529,13 +1517,12 @@ bool MariaDBMonitor::mon_process_failover(bool* cluster_modified_out) * @return True if using gtid-replication. False if not, or if server is not a slave or otherwise does * not have a gtid_IO_Pos. */ -bool MariaDBMonitor::uses_gtid(MXS_MONITORED_SERVER* mon_server, json_t** error_out) +bool MariaDBMonitor::uses_gtid(MariaDBServer* server, json_t** error_out) { bool rval = false; - const MariaDBServer* info = get_server_info(mon_server); - if (info->slave_status.gtid_io_pos.empty()) + if (server->slave_status.gtid_io_pos.empty()) { - string slave_not_gtid_msg = string("Slave server ") + mon_server->server->unique_name + + string slave_not_gtid_msg = string("Slave server ") + server->name() + " is not using gtid replication."; PRINT_MXS_JSON_ERROR(error_out, "%s", slave_not_gtid_msg.c_str()); } @@ -1685,7 +1672,7 @@ bool MariaDBMonitor::switchover_check(SERVER* new_master, SERVER* current_master bool gtid_ok = true; for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) { - if ((*iter)->is_slave() && !uses_gtid((*iter)->server_base, error_out)) + if ((*iter)->is_slave() && !uses_gtid(*iter, error_out)) { gtid_ok = false; } diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index b5f9fe08b..0da718302 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -436,10 +436,10 @@ void MariaDBMonitor::main_loop() if we need to use standalone master. */ if (m_detect_standalone_master) { - if (standalone_master_required(m_monitor_base->monitored_servers)) + if (standalone_master_required()) { // Other servers have died, set last remaining server as master - if (set_standalone_master(m_monitor_base->monitored_servers)) + if (set_standalone_master()) { // Update the root_master to point to the standalone master root_master = m_master; @@ -582,21 +582,22 @@ void MariaDBMonitor::update_external_master() } } -void MariaDBMonitor::measure_replication_lag(MariaDBServer* root_master_server) +void MariaDBMonitor::measure_replication_lag(MariaDBServer* root_master) { - MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->server_base : NULL; + ss_dassert(root_master); + MXS_MONITORED_SERVER* mon_root_master = root_master->server_base; set_master_heartbeat(root_master); for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) { - MXS_MONITORED_SERVER* ptr = (*iter)->server_base; - if ((!SERVER_IN_MAINT(ptr->server)) && SERVER_IS_RUNNING(ptr->server)) + MariaDBServer* server = *iter; + MXS_MONITORED_SERVER* ptr = server->server_base; + if ((!SERVER_IN_MAINT(ptr->server)) && server->is_running()) { - if (ptr->server->node_id != root_master->server->node_id && - (SERVER_IS_SLAVE(ptr->server) || - SERVER_IS_RELAY_SERVER(ptr->server)) && - !(*iter)->binlog_relay) // No select lag for Binlog Server + if (ptr->server->node_id != mon_root_master->server->node_id && + (server->is_slave() || SERVER_IS_RELAY_SERVER(ptr->server)) && + !server->binlog_relay) // No select lag for Binlog Server { - set_slave_heartbeat(ptr); + set_slave_heartbeat(server); } } } @@ -731,9 +732,9 @@ static int get_row_count(MXS_MONITORED_SERVER *database, const char* query) * Write the replication heartbeat into the maxscale_schema.replication_heartbeat table in the current master. * The inserted value will be seen from all slaves replicating from this master. * - * @param database The number database server + * @param server The server to write the heartbeat to */ -void MariaDBMonitor::set_master_heartbeat(MXS_MONITORED_SERVER *database) +void MariaDBMonitor::set_master_heartbeat(MariaDBServer* server) { time_t heartbeat; time_t purge_time; @@ -746,6 +747,7 @@ void MariaDBMonitor::set_master_heartbeat(MXS_MONITORED_SERVER *database) return; } + MXS_MONITORED_SERVER* database = server->server_base; int n_db = get_row_count(database, "SELECT schema_name FROM information_schema.schemata " "WHERE schema_name = 'maxscale_schema'"); int n_tbl = get_row_count(database, "SELECT table_name FROM information_schema.tables " @@ -845,9 +847,9 @@ void MariaDBMonitor::set_master_heartbeat(MXS_MONITORED_SERVER *database) * This function gets the replication heartbeat from the maxscale_schema.replication_heartbeat table in * the current slave and stores the timestamp and replication lag in the slave server struct. * - * @param database The number database server + * @param server The slave to measure lag at */ -void MariaDBMonitor::set_slave_heartbeat(MXS_MONITORED_SERVER *database) +void MariaDBMonitor::set_slave_heartbeat(MariaDBServer* server) { time_t heartbeat; char select_heartbeat_query[256] = ""; @@ -867,6 +869,7 @@ void MariaDBMonitor::set_slave_heartbeat(MXS_MONITORED_SERVER *database) "WHERE maxscale_id = %lu AND master_server_id = %li", m_id, m_master->server_base->server->node_id); + MXS_MONITORED_SERVER* database = server->server_base; /* if there is a master then send the query to the slave with master_id */ if (m_master != NULL && (mxs_mysql_query(database->con, select_heartbeat_query) == 0 && (result = mysql_store_result(database->con)) != NULL)) diff --git a/server/modules/monitor/mariadbmon/mariadbmon.hh b/server/modules/monitor/mariadbmon/mariadbmon.hh index f4f5d01d0..dbfe8b69e 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.hh +++ b/server/modules/monitor/mariadbmon/mariadbmon.hh @@ -170,14 +170,14 @@ private: MXS_MONITORED_SERVER* build_mysql51_replication_tree(); void find_graph_cycles(); void update_server_states(MariaDBServer& db_server, MariaDBServer* root_master); - bool standalone_master_required(MXS_MONITORED_SERVER* db); - bool set_standalone_master(MXS_MONITORED_SERVER* db); + bool standalone_master_required(); + bool set_standalone_master(); void assign_relay_master(MariaDBServer& serv_info); void log_master_changes(MariaDBServer* root_master, int* log_no_master); void update_gtid_domain(); void update_external_master(); - void set_master_heartbeat(MXS_MONITORED_SERVER *); - void set_slave_heartbeat(MXS_MONITORED_SERVER *); + void set_master_heartbeat(MariaDBServer*); + void set_slave_heartbeat(MariaDBServer*); void measure_replication_lag(MariaDBServer* root_master); void check_maxscale_schema_replication(); MXS_MONITORED_SERVER* getServerByNodeId(long); @@ -191,12 +191,12 @@ private: bool switchover_check_current(const MXS_MONITORED_SERVER* suggested_curr_master, json_t** error_out) const; bool do_switchover(MariaDBServer** current_master, MariaDBServer** new_master, json_t** err_out); - bool switchover_check_preferred_master(MXS_MONITORED_SERVER* preferred, json_t** err_out); - bool switchover_demote_master(MXS_MONITORED_SERVER* current_master, MariaDBServer* info, + bool switchover_check_preferred_master(MariaDBServer* preferred, json_t** err_out); + bool switchover_demote_master(MariaDBServer* current_master, json_t** err_out); bool switchover_wait_slaves_catchup(const ServerArray& slaves, const GtidList& gtid, int total_timeout, - int read_timeout, json_t** err_out); - bool switchover_start_slave(MXS_MONITORED_SERVER* old_master, SERVER* new_master); + json_t** err_out); + bool switchover_start_slave(MariaDBServer* old_master, MariaDBServer* new_master); // Failover methods void handle_auto_failover(bool* failover_performed); @@ -205,7 +205,7 @@ private: bool mon_process_failover(bool* cluster_modified_out); bool failover_check(json_t** error_out); bool do_failover(json_t** err_out); - bool failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, int seconds_remaining, json_t** err_out); + bool failover_wait_relay_log(MariaDBServer* new_master, int seconds_remaining, json_t** err_out); // Rejoin methods bool cluster_can_be_joined(); @@ -214,21 +214,21 @@ private: bool server_is_rejoin_suspect(MariaDBServer* rejoin_cand, MariaDBServer* master, json_t** output); bool can_replicate_from(MariaDBServer* slave_cand, MariaDBServer* master); uint32_t do_rejoin(const ServerArray& joinable_servers); - bool join_cluster(MXS_MONITORED_SERVER* server, const char* change_cmd); + bool join_cluster(MariaDBServer* server, const std::string& change_cmd); // Methods common to failover/switchover/rejoin - bool uses_gtid(MXS_MONITORED_SERVER* mon_server, json_t** error_out); + bool uses_gtid(MariaDBServer* mon_server, json_t** error_out); MariaDBServer* select_new_master(ServerArray* slaves_out, json_t** err_out); - MariaDBServer* update_slave_info(MXS_MONITORED_SERVER* server); - bool server_is_excluded(const MXS_MONITORED_SERVER* server); - bool is_candidate_better(const MariaDBServer* current_best_info, const MariaDBServer* candidate_info, + bool update_slave_info(MariaDBServer* server); + bool server_is_excluded(const MariaDBServer* server); + bool is_candidate_better(const MariaDBServer* current_best, const MariaDBServer* candidate, uint32_t gtid_domain); - bool promote_new_master(MXS_MONITORED_SERVER* new_master, json_t** err_out); + bool promote_new_master(MariaDBServer* new_master, json_t** err_out); int redirect_slaves(MariaDBServer* new_master, const ServerArray& slaves, ServerArray* redirected_slaves); - bool redirect_one_slave(MXS_MONITORED_SERVER* slave, const char* change_cmd); + bool redirect_one_slave(MariaDBServer* slave, const std::string& change_cmd); std::string generate_change_master_cmd(const std::string& master_host, int master_port); - bool start_external_replication(MXS_MONITORED_SERVER* new_master, json_t** err_out); + bool start_external_replication(MariaDBServer* new_master, json_t** err_out); bool wait_cluster_stabilization(MariaDBServer* new_master, const ServerArray& slaves, int seconds_remaining); void disable_setting(const char* setting);