MXS-1703 Cleanup more methods

Most monitor functions now work with the monitor's own server class.
This commit is contained in:
Esa Korhonen 2018-04-13 10:01:24 +03:00
parent ec33fcf87d
commit c43f64c87e
4 changed files with 167 additions and 191 deletions

View File

@ -713,22 +713,15 @@ void MariaDBMonitor::monitor_mysql_db(MariaDBServer* serv_info)
}
/**
* Update replication settings and gtid:s of the slave server.
* Update replication settings, gtid:s and slave status of the server.
*
* @param server Slave to update
* @return Slave server info. NULL on error, or if server is not a slave.
* @return True on success. False on error, or if server is not a slave (slave SQL not running).
*/
MariaDBServer* MariaDBMonitor::update_slave_info(MXS_MONITORED_SERVER* server)
bool MariaDBMonitor::update_slave_info(MariaDBServer* server)
{
MariaDBServer* info = get_server_info(server);
if (info->slave_status.slave_sql_running &&
info->update_replication_settings() &&
info->update_gtids() &&
info->do_show_slave_status())
{
return info;
}
return NULL;
return (server->slave_status.slave_sql_running && server->update_replication_settings() &&
server->update_gtids() && server->do_show_slave_status());
}
/**
@ -957,32 +950,26 @@ static bool check_replicate_wild_ignore_table(MXS_MONITORED_SERVER* database)
* only one server must be available and other servers must have passed the configured tolerance level of
* failures.
*
* @param db Monitor servers
*
* @return True if standalone master should be used
*/
bool MariaDBMonitor::standalone_master_required(MXS_MONITORED_SERVER *db)
bool MariaDBMonitor::standalone_master_required()
{
int candidates = 0;
while (db)
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
{
if (SERVER_IS_RUNNING(db->server))
MariaDBServer* server = *iter;
if (server->is_running())
{
candidates++;
MariaDBServer *server_info = get_server_info(db);
if (server_info->read_only || server_info->slave_configured || candidates > 1)
if (server->read_only || server->slave_configured || candidates > 1)
{
return false;
}
}
else if (db->mon_err_count < m_failcount)
else if (server->server_base->mon_err_count < m_failcount)
{
return false;
}
db = db->next;
}
return candidates == 1;
@ -995,37 +982,36 @@ bool MariaDBMonitor::standalone_master_required(MXS_MONITORED_SERVER *db)
* maintenance mode. By setting the servers into maintenance mode, we prevent any possible conflicts when
* the failed servers come back up.
*
* @param db Monitor servers
* @return True if standalone master was set
*/
bool MariaDBMonitor::set_standalone_master(MXS_MONITORED_SERVER *db)
bool MariaDBMonitor::set_standalone_master()
{
bool rval = false;
while (db)
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
{
if (SERVER_IS_RUNNING(db->server))
MariaDBServer* server = *iter;
auto mon_server = server->server_base;
if (server->is_running())
{
if (!SERVER_IS_MASTER(db->server) && m_warn_set_standalone_master)
if (!server->is_master() && m_warn_set_standalone_master)
{
MXS_WARNING("Setting standalone master, server '%s' is now the master.%s",
db->server->unique_name,
m_allow_cluster_recovery ?
"" : " All other servers are set into maintenance mode.");
server->name(), m_allow_cluster_recovery ? "" :
" All other servers are set into maintenance mode.");
m_warn_set_standalone_master = false;
}
server_clear_set_status(db->server, SERVER_SLAVE, SERVER_MASTER | SERVER_STALE_STATUS);
monitor_set_pending_status(db, SERVER_MASTER | SERVER_STALE_STATUS);
monitor_clear_pending_status(db, SERVER_SLAVE);
m_master = get_server_info(db);
server_clear_set_status(mon_server->server, SERVER_SLAVE, SERVER_MASTER | SERVER_STALE_STATUS);
monitor_set_pending_status(mon_server, SERVER_MASTER | SERVER_STALE_STATUS);
monitor_clear_pending_status(mon_server, SERVER_SLAVE);
m_master = server;
rval = true;
}
else if (!m_allow_cluster_recovery)
{
server_set_status_nolock(db->server, SERVER_MAINT);
monitor_set_pending_status(db, SERVER_MAINT);
server_set_status_nolock(mon_server->server, SERVER_MAINT);
monitor_set_pending_status(mon_server, SERVER_MAINT);
}
db = db->next;
}
return rval;

View File

@ -227,7 +227,7 @@ int MariaDBMonitor::redirect_slaves(MariaDBServer* new_master, const ServerArray
int successes = 0;
for (auto iter = slaves.begin(); iter != slaves.end(); iter++)
{
if (redirect_one_slave((*iter)->server_base, change_cmd.c_str()))
if (redirect_one_slave(*iter, change_cmd))
{
successes++;
redirected_slaves->push_back(*iter);
@ -243,12 +243,13 @@ int MariaDBMonitor::redirect_slaves(MariaDBServer* new_master, const ServerArray
* @param err_out Error output
* @return True if new master accepted commands
*/
bool MariaDBMonitor::start_external_replication(MXS_MONITORED_SERVER* new_master, json_t** err_out)
bool MariaDBMonitor::start_external_replication(MariaDBServer* new_master, json_t** err_out)
{
bool rval = false;
MYSQL* new_master_conn = new_master->server_base->con;
string change_cmd = generate_change_master_cmd(m_external_master_host, m_external_master_port);
if (mxs_mysql_query(new_master->con, change_cmd.c_str()) == 0 &&
mxs_mysql_query(new_master->con, "START SLAVE;") == 0)
if (mxs_mysql_query(new_master_conn, change_cmd.c_str()) == 0 &&
mxs_mysql_query(new_master_conn, "START SLAVE;") == 0)
{
MXS_NOTICE("New master starting replication from external master %s:%d.",
m_external_master_host.c_str(), m_external_master_port);
@ -257,7 +258,7 @@ bool MariaDBMonitor::start_external_replication(MXS_MONITORED_SERVER* new_master
else
{
PRINT_MXS_JSON_ERROR(err_out, "Could not start replication from external master: '%s'.",
mysql_error(new_master->con));
mysql_error(new_master_conn));
}
return rval;
}
@ -270,21 +271,24 @@ bool MariaDBMonitor::start_external_replication(MXS_MONITORED_SERVER* new_master
* @return True if commands were accepted. This does not guarantee that replication proceeds
* successfully.
*/
bool MariaDBMonitor::switchover_start_slave(MXS_MONITORED_SERVER* old_master, SERVER* new_master)
bool MariaDBMonitor::switchover_start_slave(MariaDBServer* old_master, MariaDBServer* new_master)
{
bool rval = false;
string change_cmd = generate_change_master_cmd(new_master->name, new_master->port);
if (mxs_mysql_query(old_master->con, change_cmd.c_str()) == 0 &&
mxs_mysql_query(old_master->con, "START SLAVE;") == 0)
MYSQL* old_master_con = old_master->server_base->con;
SERVER* new_master_server = new_master->server_base->server;
string change_cmd = generate_change_master_cmd(new_master_server->name, new_master_server->port);
if (mxs_mysql_query(old_master_con, change_cmd.c_str()) == 0 &&
mxs_mysql_query(old_master_con, "START SLAVE;") == 0)
{
MXS_NOTICE("Old master '%s' starting replication from '%s'.",
old_master->server->unique_name, new_master->unique_name);
old_master->name(), new_master->name());
rval = true;
}
else
{
MXS_ERROR("Old master '%s' could not start replication: '%s'.",
old_master->server->unique_name, mysql_error(old_master->con));
old_master->name(), mysql_error(old_master_con));
}
return rval;
}
@ -296,23 +300,24 @@ bool MariaDBMonitor::switchover_start_slave(MXS_MONITORED_SERVER* old_master, SE
* @param change_cmd Change master command, usually generated by generate_change_master_cmd()
* @return True if slave accepted all commands
*/
bool MariaDBMonitor::redirect_one_slave(MXS_MONITORED_SERVER* slave, const char* change_cmd)
bool MariaDBMonitor::redirect_one_slave(MariaDBServer* slave, const string& change_cmd)
{
bool success = false;
MYSQL* slave_conn = slave->server_base->con;
const char* query = "STOP SLAVE;";
if (mxs_mysql_query(slave->con, query) == 0)
if (mxs_mysql_query(slave_conn, query) == 0)
{
query = "RESET SLAVE;"; // To erase any old I/O or SQL errors
if (mxs_mysql_query(slave->con, query) == 0)
if (mxs_mysql_query(slave_conn, query) == 0)
{
query = "CHANGE MASTER TO ..."; // Don't show the real query as it contains a password.
if (mxs_mysql_query(slave->con, change_cmd) == 0)
if (mxs_mysql_query(slave_conn, change_cmd.c_str()) == 0)
{
query = "START SLAVE;";
if (mxs_mysql_query(slave->con, query) == 0)
if (mxs_mysql_query(slave_conn, query) == 0)
{
success = true;
MXS_NOTICE("Slave '%s' redirected to new master.", slave->server->unique_name);
MXS_NOTICE("Slave '%s' redirected to new master.", slave->name());
}
}
}
@ -320,8 +325,8 @@ bool MariaDBMonitor::redirect_one_slave(MXS_MONITORED_SERVER* slave, const char*
if (!success)
{
MXS_WARNING("Slave '%s' redirection failed: '%s'. Query: '%s'.", slave->server->unique_name,
mysql_error(slave->con), query);
MXS_WARNING("Slave '%s' redirection failed: '%s'. Query: '%s'.", slave->name(),
mysql_error(slave_conn), query);
}
return success;
}
@ -350,13 +355,13 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers)
if (joinable->n_slaves_configured == 0)
{
MXS_NOTICE("Directing standalone server '%s' to replicate from '%s'.", name, master_name);
op_success = join_cluster(joinable->server_base, change_cmd.c_str());
op_success = join_cluster(joinable, change_cmd);
}
else
{
MXS_NOTICE("Server '%s' is replicating from a server other than '%s', "
"redirecting it to '%s'.", name, master_name, master_name);
op_success = redirect_one_slave(joinable->server_base, change_cmd.c_str());
op_success = redirect_one_slave(joinable, change_cmd);
}
if (op_success)
@ -431,31 +436,32 @@ bool MariaDBMonitor::get_joinable_servers(ServerArray* output)
* @param change_cmd Change master command
* @return True if commands were accepted by server
*/
bool MariaDBMonitor::join_cluster(MXS_MONITORED_SERVER* server, const char* change_cmd)
bool MariaDBMonitor::join_cluster(MariaDBServer* server, const string& change_cmd)
{
/* Server does not have slave connections. This operation can fail, or the resulting
* replication may end up broken. */
bool success = false;
string error_msg;
MYSQL* server_conn = server->server_base->con;
const char* query = "SET GLOBAL read_only=1;";
if (mxs_mysql_query(server->con, query) == 0)
if (mxs_mysql_query(server_conn, query) == 0)
{
query = "CHANGE MASTER TO ..."; // Don't show the real query as it contains a password.
if (mxs_mysql_query(server->con, change_cmd) == 0)
if (mxs_mysql_query(server_conn, change_cmd.c_str()) == 0)
{
query = "START SLAVE;";
if (mxs_mysql_query(server->con, query) == 0)
if (mxs_mysql_query(server_conn, query) == 0)
{
success = true;
MXS_NOTICE("Standalone server '%s' starting replication.", server->server->unique_name);
MXS_NOTICE("Standalone server '%s' starting replication.", server->name());
}
}
if (!success)
{
// A step after "SET GLOBAL read_only=1" failed, try to undo. First, backup error message.
error_msg = mysql_error(server->con);
mxs_mysql_query(server->con, "SET GLOBAL read_only=0;");
error_msg = mysql_error(server_conn);
mxs_mysql_query(server_conn, "SET GLOBAL read_only=0;");
}
}
@ -463,10 +469,10 @@ bool MariaDBMonitor::join_cluster(MXS_MONITORED_SERVER* server, const char* chan
{
if (error_msg.empty())
{
error_msg = mysql_error(server->con);
error_msg = mysql_error(server_conn);
}
MXS_WARNING("Standalone server '%s' failed to start replication: '%s'. Query: '%s'.",
server->server->unique_name, error_msg.c_str(), query);
server->name(), error_msg.c_str(), query);
}
return success;
}
@ -586,7 +592,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer
else
{
// Check user-given new master. Some checks have already been performed but more is needed.
if (switchover_check_preferred_master((*new_master)->server_base, err_out))
if (switchover_check_preferred_master(*new_master, err_out))
{
promotion_target = *new_master;
/* User-given candidate is good. Update info on all slave servers.
@ -597,14 +603,9 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
{
MariaDBServer* server = *iter;
if (server != promotion_target)
if (server != promotion_target && update_slave_info(server) && server != demotion_target)
{
MariaDBServer* slave_info = update_slave_info(server->server_base);
// If master is replicating from external master, it is updated but not added to array.
if (slave_info && server != demotion_target)
{
redirectable_slaves.push_back(server);
}
redirectable_slaves.push_back(server);
}
}
}
@ -617,7 +618,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer
bool rval = false;
// Step 2: Set read-only to on, flush logs, update master gtid:s
if (switchover_demote_master(demotion_target->server_base, demotion_target, err_out))
if (switchover_demote_master(demotion_target, err_out))
{
bool catchup_and_promote_success = false;
time_t step2_time = time(NULL);
@ -627,7 +628,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer
ServerArray catchup_slaves = redirectable_slaves;
catchup_slaves.push_back(promotion_target);
if (switchover_wait_slaves_catchup(catchup_slaves, demotion_target->gtid_binlog_pos,
seconds_remaining, m_monitor_base->read_timeout, err_out))
seconds_remaining, err_out))
{
time_t step3_time = time(NULL);
int seconds_step3 = difftime(step3_time, step2_time);
@ -635,13 +636,12 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer
seconds_remaining -= seconds_step3;
// Step 4: On new master STOP and RESET SLAVE, set read-only to off.
if (promote_new_master(promotion_target->server_base, err_out))
if (promote_new_master(promotion_target, err_out))
{
catchup_and_promote_success = true;
// Step 5: Redirect slaves and start replication on old master.
ServerArray redirected_slaves;
bool start_ok = switchover_start_slave(demotion_target->server_base,
promotion_target->server_base->server);
bool start_ok = switchover_start_slave(demotion_target, promotion_target);
if (start_ok)
{
redirected_slaves.push_back(demotion_target);
@ -698,7 +698,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer
// Try to reactivate external replication if any.
if (m_external_master_port != PORT_UNKNOWN)
{
start_external_replication(promotion_target->server_base, err_out);
start_external_replication(promotion_target, err_out);
}
}
}
@ -735,7 +735,7 @@ bool MariaDBMonitor::do_failover(json_t** err_out)
bool rval = false;
// Step 2: Wait until relay log consumed.
if (failover_wait_relay_log(new_master->server_base, seconds_remaining, err_out))
if (failover_wait_relay_log(new_master, seconds_remaining, err_out))
{
time_t step2_time = time(NULL);
int seconds_step2 = difftime(step2_time, step1_time);
@ -743,7 +743,7 @@ bool MariaDBMonitor::do_failover(json_t** err_out)
seconds_remaining -= seconds_step2;
// Step 3: Stop and reset slave, set read-only to 0.
if (promote_new_master(new_master->server_base, err_out))
if (promote_new_master(new_master, err_out))
{
// Step 4: Redirect slaves.
ServerArray redirected_slaves;
@ -797,31 +797,30 @@ bool MariaDBMonitor::do_failover(json_t** err_out)
* @param err_out Json error output
* @return True if relay log was processed within time limit, or false if time ran out or an error occurred.
*/
bool MariaDBMonitor::failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, int seconds_remaining,
bool MariaDBMonitor::failover_wait_relay_log(MariaDBServer* new_master, int seconds_remaining,
json_t** err_out)
{
MariaDBServer* master_info = get_server_info(new_master);
time_t begin = time(NULL);
bool query_ok = true;
bool io_pos_stable = true;
while (master_info->relay_log_events() > 0 &&
while (new_master->relay_log_events() > 0 &&
query_ok &&
io_pos_stable &&
difftime(time(NULL), begin) < seconds_remaining)
{
MXS_INFO("Relay log of server '%s' not yet empty, waiting to clear %" PRId64 " events.",
new_master->server->unique_name, master_info->relay_log_events());
new_master->name(), new_master->relay_log_events());
thread_millisleep(1000); // Sleep for a while before querying server again.
// Todo: check server version before entering failover.
GtidList old_gtid_io_pos = master_info->slave_status.gtid_io_pos;
GtidList old_gtid_io_pos = new_master->slave_status.gtid_io_pos;
// Update gtid:s first to make sure Gtid_IO_Pos is the more recent value.
// It doesn't matter here, but is a general rule.
query_ok = master_info->update_gtids() && master_info->do_show_slave_status();
io_pos_stable = (old_gtid_io_pos == master_info->slave_status.gtid_io_pos);
query_ok = new_master->update_gtids() && new_master->do_show_slave_status();
io_pos_stable = (old_gtid_io_pos == new_master->slave_status.gtid_io_pos);
}
bool rval = false;
if (master_info->relay_log_events() == 0)
if (new_master->relay_log_events() == 0)
{
rval = true;
}
@ -836,14 +835,13 @@ bool MariaDBMonitor::failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, i
{
reason = "Old master sent new event(s)";
}
else if (master_info->relay_log_events() < 0)
else if (new_master->relay_log_events() < 0) // TODO: This is currently impossible
{
reason = "Invalid Gtid(s) (current_pos: " + master_info->gtid_current_pos.to_string() +
", io_pos: " + master_info->slave_status.gtid_io_pos.to_string() + ")";
reason = "Invalid Gtid(s) (current_pos: " + new_master->gtid_current_pos.to_string() +
", io_pos: " + new_master->slave_status.gtid_io_pos.to_string() + ")";
}
PRINT_MXS_JSON_ERROR(err_out, "Failover: %s while waiting for server '%s' to process relay log. "
"Cancelling failover.",
reason.c_str(), new_master->server->unique_name);
"Cancelling failover.", reason.c_str(), new_master->name());
rval = false;
}
return rval;
@ -858,16 +856,15 @@ bool MariaDBMonitor::failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, i
* @param err_out json object for error printing. Can be NULL.
* @return True if successful.
*/
bool MariaDBMonitor::switchover_demote_master(MXS_MONITORED_SERVER* current_master, MariaDBServer* info,
json_t** err_out)
bool MariaDBMonitor::switchover_demote_master(MariaDBServer* current_master, json_t** err_out)
{
MXS_NOTICE("Demoting server '%s'.", current_master->server->unique_name);
MXS_NOTICE("Demoting server '%s'.", current_master->name());
bool success = false;
bool query_error = false;
MYSQL* conn = current_master->con;
MYSQL* conn = current_master->server_base->con;
const char* query = ""; // The next query to execute. Used also for error printing.
// The presence of an external master changes several things.
const bool external_master = SERVER_IS_SLAVE_OF_EXTERNAL_MASTER(current_master->server);
const bool external_master = SERVER_IS_SLAVE_OF_EXTERNAL_MASTER(current_master->server_base->server);
if (external_master)
{
@ -904,7 +901,7 @@ bool MariaDBMonitor::switchover_demote_master(MXS_MONITORED_SERVER* current_mast
if (!query_error)
{
query = "";
if (info->update_gtids())
if (current_master->update_gtids())
{
success = true;
}
@ -959,12 +956,11 @@ bool MariaDBMonitor::switchover_demote_master(MXS_MONITORED_SERVER* current_mast
* @param slave Slaves to wait on
* @param gtid Which gtid must be reached
* @param total_timeout Maximum wait time in seconds
* @param read_timeout The value of read_timeout for the connection TODO: see if timeouts can be removed here
* @param err_out json object for error printing. Can be NULL.
* @return True, if target gtid was reached within allotted time for all servers
*/
bool MariaDBMonitor::switchover_wait_slaves_catchup(const ServerArray& slaves, const GtidList& gtid,
int total_timeout, int read_timeout, json_t** err_out)
int total_timeout, json_t** err_out)
{
bool success = true;
int seconds_remaining = total_timeout;
@ -1089,15 +1085,14 @@ bool MariaDBMonitor::wait_cluster_stabilization(MariaDBServer* new_master, const
* @param err_out Json object for error printing. Can be NULL.
* @return True, if given slave is a valid promotion candidate.
*/
bool MariaDBMonitor::switchover_check_preferred_master(MXS_MONITORED_SERVER* preferred, json_t** err_out)
bool MariaDBMonitor::switchover_check_preferred_master(MariaDBServer* preferred, json_t** err_out)
{
ss_dassert(preferred);
bool rval = true;
MariaDBServer* preferred_info = update_slave_info(preferred);
if (preferred_info == NULL || !preferred_info->check_replication_settings())
if (!update_slave_info(preferred) || !preferred->check_replication_settings())
{
PRINT_MXS_JSON_ERROR(err_out, "The requested server '%s' is not a valid promotion candidate.",
preferred->server->unique_name);
preferred->name());
rval = false;
}
return rval;
@ -1110,18 +1105,19 @@ bool MariaDBMonitor::switchover_check_preferred_master(MXS_MONITORED_SERVER* pre
* @param err_out json object for error printing. Can be NULL.
* @return True if successful
*/
bool MariaDBMonitor::promote_new_master(MXS_MONITORED_SERVER* new_master, json_t** err_out)
bool MariaDBMonitor::promote_new_master(MariaDBServer* new_master, json_t** err_out)
{
bool success = false;
MXS_NOTICE("Promoting server '%s' to master.", new_master->server->unique_name);
MYSQL* new_master_conn = new_master->server_base->con;
MXS_NOTICE("Promoting server '%s' to master.", new_master->name());
const char* query = "STOP SLAVE;";
if (mxs_mysql_query(new_master->con, query) == 0)
if (mxs_mysql_query(new_master_conn, query) == 0)
{
query = "RESET SLAVE ALL;";
if (mxs_mysql_query(new_master->con, query) == 0)
if (mxs_mysql_query(new_master_conn, query) == 0)
{
query = "SET GLOBAL read_only=0;";
if (mxs_mysql_query(new_master->con, query) == 0)
if (mxs_mysql_query(new_master_conn, query) == 0)
{
success = true;
}
@ -1131,7 +1127,7 @@ bool MariaDBMonitor::promote_new_master(MXS_MONITORED_SERVER* new_master, json_t
if (!success)
{
PRINT_MXS_JSON_ERROR(err_out, "Promotion failed: '%s'. Query: '%s'.",
mysql_error(new_master->con), query);
mysql_error(new_master_conn), query);
}
// If the previous master was a slave to an external master, start the equivalent slave connection on
// the new master. Success of replication is not checked.
@ -1154,8 +1150,7 @@ MariaDBServer* MariaDBMonitor::select_new_master(ServerArray* slaves_out, json_t
ss_dassert(slaves_out && slaves_out->size() == 0);
/* Select a new master candidate. Selects the one with the latest event in relay log.
* If multiple slaves have same number of events, select the one with most processed events. */
MXS_MONITORED_SERVER* current_best = NULL;
MariaDBServer* current_best_info = NULL;
MariaDBServer* current_best = NULL;
// Servers that cannot be selected because of exclusion, but seem otherwise ok.
ServerArray valid_but_excluded;
// Index of the current best candidate in slaves_out
@ -1167,12 +1162,12 @@ MariaDBServer* MariaDBMonitor::select_new_master(ServerArray* slaves_out, json_t
* Do not worry about the exclusion list yet, querying the excluded servers is ok.
* If master is replicating from external master, it is updated by update_slave_info()
* but not added to array. */
MariaDBServer* cand = update_slave_info((*iter)->server_base);
if (cand && cand != m_master)
MariaDBServer* cand = *iter;
if (update_slave_info(cand) && cand != m_master)
{
slaves_out->push_back(cand);
// Check that server is not in the exclusion list while still being a valid choice.
if (server_is_excluded(cand->server_base) && cand->check_replication_settings(WARNINGS_OFF))
if (server_is_excluded(cand) && cand->check_replication_settings(WARNINGS_OFF))
{
valid_but_excluded.push_back(cand);
const char CANNOT_SELECT[] = "Promotion candidate '%s' is excluded from new "
@ -1182,12 +1177,10 @@ MariaDBServer* MariaDBMonitor::select_new_master(ServerArray* slaves_out, json_t
else if (cand->check_replication_settings())
{
// If no new master yet, accept any valid candidate. Otherwise check.
if (current_best == NULL ||
is_candidate_better(current_best_info, cand, m_master_gtid_domain))
if (current_best == NULL || is_candidate_better(current_best, cand, m_master_gtid_domain))
{
// The server has been selected for promotion, for now.
current_best = cand->server_base;
current_best_info = cand;
current_best = cand;
master_vector_index = slaves_out->size() - 1;
}
}
@ -1214,14 +1207,13 @@ MariaDBServer* MariaDBMonitor::select_new_master(ServerArray* slaves_out, json_t
MXS_WARNING(EXCLUDED_ONLY_CAND, excluded_name);
break;
}
else if (is_candidate_better(current_best_info, excluded_info, m_master_gtid_domain))
else if (is_candidate_better(current_best, excluded_info, m_master_gtid_domain))
{
// Print a warning if this server is actually a better candidate than the previous
// best.
const char EXCLUDED_CAND[] = "Server '%s' is superior to current "
"best candidate '%s', but cannot be selected as it's excluded. This may lead to "
// Print a warning if this server is actually a better candidate than the previous best.
const char EXCLUDED_CAND[] = "Server '%s' is superior to current best candidate '%s', "
"but cannot be selected as it's excluded. This may lead to "
"loss of data if '%s' is ahead of other servers.";
MXS_WARNING(EXCLUDED_CAND, excluded_name, current_best->server->unique_name, excluded_name);
MXS_WARNING(EXCLUDED_CAND, excluded_name, current_best->name(), excluded_name);
break;
}
}
@ -1230,22 +1222,20 @@ MariaDBServer* MariaDBMonitor::select_new_master(ServerArray* slaves_out, json_t
{
PRINT_MXS_JSON_ERROR(err_out, "No suitable promotion candidate found.");
}
return current_best ? get_server_info(current_best) : NULL;
return current_best;
}
/**
* Is the server in the excluded list
*
* @param handle Cluster monitor
* @param server Server to test
* @return True if server is in the excluded-list of the monitor.
*/
bool MariaDBMonitor::server_is_excluded(const MXS_MONITORED_SERVER* server)
bool MariaDBMonitor::server_is_excluded(const MariaDBServer* server)
{
size_t n_excluded = m_excluded_servers.size();
for (size_t i = 0; i < n_excluded; i++)
for (auto iter = m_excluded_servers.begin(); iter != m_excluded_servers.end(); iter++)
{
if (m_excluded_servers[i]->server_base == server)
if (*iter == server)
{
return true;
}
@ -1261,16 +1251,16 @@ bool MariaDBMonitor::server_is_excluded(const MXS_MONITORED_SERVER* server)
* @param gtid_domain Which domain to compare
* @return True if candidate is better
*/
bool MariaDBMonitor::is_candidate_better(const MariaDBServer* current_best_info,
const MariaDBServer* candidate_info, uint32_t gtid_domain)
bool MariaDBMonitor::is_candidate_better(const MariaDBServer* current_best, const MariaDBServer* candidate,
uint32_t gtid_domain)
{
uint64_t cand_io = candidate_info->slave_status.gtid_io_pos.get_gtid(gtid_domain).m_sequence;
uint64_t cand_processed = candidate_info->gtid_current_pos.get_gtid(gtid_domain).m_sequence;
uint64_t curr_io = current_best_info->slave_status.gtid_io_pos.get_gtid(gtid_domain).m_sequence;
uint64_t curr_processed = current_best_info->gtid_current_pos.get_gtid(gtid_domain).m_sequence;
uint64_t cand_io = candidate->slave_status.gtid_io_pos.get_gtid(gtid_domain).m_sequence;
uint64_t cand_processed = candidate->gtid_current_pos.get_gtid(gtid_domain).m_sequence;
uint64_t curr_io = current_best->slave_status.gtid_io_pos.get_gtid(gtid_domain).m_sequence;
uint64_t curr_processed = current_best->gtid_current_pos.get_gtid(gtid_domain).m_sequence;
bool cand_updates = candidate_info->rpl_settings.log_slave_updates;
bool curr_updates = current_best_info->rpl_settings.log_slave_updates;
bool cand_updates = candidate->rpl_settings.log_slave_updates;
bool curr_updates = current_best->rpl_settings.log_slave_updates;
bool is_better = false;
// Accept a slave with a later event in relay log.
if (cand_io > curr_io)
@ -1379,16 +1369,14 @@ bool MariaDBMonitor::failover_check(json_t** error_out)
int slaves = 0;
bool error = false;
for (MXS_MONITORED_SERVER* mon_server = m_monitor_base->monitored_servers;
mon_server != NULL;
mon_server = mon_server->next)
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
{
uint64_t status_bits = mon_server->server->status;
MariaDBServer* server = *iter;
uint64_t status_bits = server->server_base->server->status;
uint64_t master_up = (SERVER_MASTER | SERVER_RUNNING);
if ((status_bits & master_up) == master_up)
{
string master_up_msg = string("Master server '") + mon_server->server->unique_name +
"' is running";
string master_up_msg = string("Master server '") + server->name() + "' is running";
if (status_bits & SERVER_MAINT)
{
master_up_msg += ", although in maintenance mode";
@ -1397,9 +1385,9 @@ bool MariaDBMonitor::failover_check(json_t** error_out)
PRINT_MXS_JSON_ERROR(error_out, "%s", master_up_msg.c_str());
error = true;
}
else if (SERVER_IS_SLAVE(mon_server->server))
else if (server->is_slave())
{
if (uses_gtid(mon_server, error_out))
if (uses_gtid(server, error_out))
{
slaves++;
}
@ -1529,13 +1517,12 @@ bool MariaDBMonitor::mon_process_failover(bool* cluster_modified_out)
* @return True if using gtid-replication. False if not, or if server is not a slave or otherwise does
* not have a gtid_IO_Pos.
*/
bool MariaDBMonitor::uses_gtid(MXS_MONITORED_SERVER* mon_server, json_t** error_out)
bool MariaDBMonitor::uses_gtid(MariaDBServer* server, json_t** error_out)
{
bool rval = false;
const MariaDBServer* info = get_server_info(mon_server);
if (info->slave_status.gtid_io_pos.empty())
if (server->slave_status.gtid_io_pos.empty())
{
string slave_not_gtid_msg = string("Slave server ") + mon_server->server->unique_name +
string slave_not_gtid_msg = string("Slave server ") + server->name() +
" is not using gtid replication.";
PRINT_MXS_JSON_ERROR(error_out, "%s", slave_not_gtid_msg.c_str());
}
@ -1685,7 +1672,7 @@ bool MariaDBMonitor::switchover_check(SERVER* new_master, SERVER* current_master
bool gtid_ok = true;
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
{
if ((*iter)->is_slave() && !uses_gtid((*iter)->server_base, error_out))
if ((*iter)->is_slave() && !uses_gtid(*iter, error_out))
{
gtid_ok = false;
}

View File

@ -436,10 +436,10 @@ void MariaDBMonitor::main_loop()
if we need to use standalone master. */
if (m_detect_standalone_master)
{
if (standalone_master_required(m_monitor_base->monitored_servers))
if (standalone_master_required())
{
// Other servers have died, set last remaining server as master
if (set_standalone_master(m_monitor_base->monitored_servers))
if (set_standalone_master())
{
// Update the root_master to point to the standalone master
root_master = m_master;
@ -582,21 +582,22 @@ void MariaDBMonitor::update_external_master()
}
}
void MariaDBMonitor::measure_replication_lag(MariaDBServer* root_master_server)
void MariaDBMonitor::measure_replication_lag(MariaDBServer* root_master)
{
MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->server_base : NULL;
ss_dassert(root_master);
MXS_MONITORED_SERVER* mon_root_master = root_master->server_base;
set_master_heartbeat(root_master);
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
{
MXS_MONITORED_SERVER* ptr = (*iter)->server_base;
if ((!SERVER_IN_MAINT(ptr->server)) && SERVER_IS_RUNNING(ptr->server))
MariaDBServer* server = *iter;
MXS_MONITORED_SERVER* ptr = server->server_base;
if ((!SERVER_IN_MAINT(ptr->server)) && server->is_running())
{
if (ptr->server->node_id != root_master->server->node_id &&
(SERVER_IS_SLAVE(ptr->server) ||
SERVER_IS_RELAY_SERVER(ptr->server)) &&
!(*iter)->binlog_relay) // No select lag for Binlog Server
if (ptr->server->node_id != mon_root_master->server->node_id &&
(server->is_slave() || SERVER_IS_RELAY_SERVER(ptr->server)) &&
!server->binlog_relay) // No select lag for Binlog Server
{
set_slave_heartbeat(ptr);
set_slave_heartbeat(server);
}
}
}
@ -731,9 +732,9 @@ static int get_row_count(MXS_MONITORED_SERVER *database, const char* query)
* Write the replication heartbeat into the maxscale_schema.replication_heartbeat table in the current master.
* The inserted value will be seen from all slaves replicating from this master.
*
* @param database The number database server
* @param server The server to write the heartbeat to
*/
void MariaDBMonitor::set_master_heartbeat(MXS_MONITORED_SERVER *database)
void MariaDBMonitor::set_master_heartbeat(MariaDBServer* server)
{
time_t heartbeat;
time_t purge_time;
@ -746,6 +747,7 @@ void MariaDBMonitor::set_master_heartbeat(MXS_MONITORED_SERVER *database)
return;
}
MXS_MONITORED_SERVER* database = server->server_base;
int n_db = get_row_count(database, "SELECT schema_name FROM information_schema.schemata "
"WHERE schema_name = 'maxscale_schema'");
int n_tbl = get_row_count(database, "SELECT table_name FROM information_schema.tables "
@ -845,9 +847,9 @@ void MariaDBMonitor::set_master_heartbeat(MXS_MONITORED_SERVER *database)
* This function gets the replication heartbeat from the maxscale_schema.replication_heartbeat table in
* the current slave and stores the timestamp and replication lag in the slave server struct.
*
* @param database The number database server
* @param server The slave to measure lag at
*/
void MariaDBMonitor::set_slave_heartbeat(MXS_MONITORED_SERVER *database)
void MariaDBMonitor::set_slave_heartbeat(MariaDBServer* server)
{
time_t heartbeat;
char select_heartbeat_query[256] = "";
@ -867,6 +869,7 @@ void MariaDBMonitor::set_slave_heartbeat(MXS_MONITORED_SERVER *database)
"WHERE maxscale_id = %lu AND master_server_id = %li",
m_id, m_master->server_base->server->node_id);
MXS_MONITORED_SERVER* database = server->server_base;
/* if there is a master then send the query to the slave with master_id */
if (m_master != NULL && (mxs_mysql_query(database->con, select_heartbeat_query) == 0
&& (result = mysql_store_result(database->con)) != NULL))

View File

@ -170,14 +170,14 @@ private:
MXS_MONITORED_SERVER* build_mysql51_replication_tree();
void find_graph_cycles();
void update_server_states(MariaDBServer& db_server, MariaDBServer* root_master);
bool standalone_master_required(MXS_MONITORED_SERVER* db);
bool set_standalone_master(MXS_MONITORED_SERVER* db);
bool standalone_master_required();
bool set_standalone_master();
void assign_relay_master(MariaDBServer& serv_info);
void log_master_changes(MariaDBServer* root_master, int* log_no_master);
void update_gtid_domain();
void update_external_master();
void set_master_heartbeat(MXS_MONITORED_SERVER *);
void set_slave_heartbeat(MXS_MONITORED_SERVER *);
void set_master_heartbeat(MariaDBServer*);
void set_slave_heartbeat(MariaDBServer*);
void measure_replication_lag(MariaDBServer* root_master);
void check_maxscale_schema_replication();
MXS_MONITORED_SERVER* getServerByNodeId(long);
@ -191,12 +191,12 @@ private:
bool switchover_check_current(const MXS_MONITORED_SERVER* suggested_curr_master,
json_t** error_out) const;
bool do_switchover(MariaDBServer** current_master, MariaDBServer** new_master, json_t** err_out);
bool switchover_check_preferred_master(MXS_MONITORED_SERVER* preferred, json_t** err_out);
bool switchover_demote_master(MXS_MONITORED_SERVER* current_master, MariaDBServer* info,
bool switchover_check_preferred_master(MariaDBServer* preferred, json_t** err_out);
bool switchover_demote_master(MariaDBServer* current_master,
json_t** err_out);
bool switchover_wait_slaves_catchup(const ServerArray& slaves, const GtidList& gtid, int total_timeout,
int read_timeout, json_t** err_out);
bool switchover_start_slave(MXS_MONITORED_SERVER* old_master, SERVER* new_master);
json_t** err_out);
bool switchover_start_slave(MariaDBServer* old_master, MariaDBServer* new_master);
// Failover methods
void handle_auto_failover(bool* failover_performed);
@ -205,7 +205,7 @@ private:
bool mon_process_failover(bool* cluster_modified_out);
bool failover_check(json_t** error_out);
bool do_failover(json_t** err_out);
bool failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, int seconds_remaining, json_t** err_out);
bool failover_wait_relay_log(MariaDBServer* new_master, int seconds_remaining, json_t** err_out);
// Rejoin methods
bool cluster_can_be_joined();
@ -214,21 +214,21 @@ private:
bool server_is_rejoin_suspect(MariaDBServer* rejoin_cand, MariaDBServer* master, json_t** output);
bool can_replicate_from(MariaDBServer* slave_cand, MariaDBServer* master);
uint32_t do_rejoin(const ServerArray& joinable_servers);
bool join_cluster(MXS_MONITORED_SERVER* server, const char* change_cmd);
bool join_cluster(MariaDBServer* server, const std::string& change_cmd);
// Methods common to failover/switchover/rejoin
bool uses_gtid(MXS_MONITORED_SERVER* mon_server, json_t** error_out);
bool uses_gtid(MariaDBServer* mon_server, json_t** error_out);
MariaDBServer* select_new_master(ServerArray* slaves_out, json_t** err_out);
MariaDBServer* update_slave_info(MXS_MONITORED_SERVER* server);
bool server_is_excluded(const MXS_MONITORED_SERVER* server);
bool is_candidate_better(const MariaDBServer* current_best_info, const MariaDBServer* candidate_info,
bool update_slave_info(MariaDBServer* server);
bool server_is_excluded(const MariaDBServer* server);
bool is_candidate_better(const MariaDBServer* current_best, const MariaDBServer* candidate,
uint32_t gtid_domain);
bool promote_new_master(MXS_MONITORED_SERVER* new_master, json_t** err_out);
bool promote_new_master(MariaDBServer* new_master, json_t** err_out);
int redirect_slaves(MariaDBServer* new_master, const ServerArray& slaves,
ServerArray* redirected_slaves);
bool redirect_one_slave(MXS_MONITORED_SERVER* slave, const char* change_cmd);
bool redirect_one_slave(MariaDBServer* slave, const std::string& change_cmd);
std::string generate_change_master_cmd(const std::string& master_host, int master_port);
bool start_external_replication(MXS_MONITORED_SERVER* new_master, json_t** err_out);
bool start_external_replication(MariaDBServer* new_master, json_t** err_out);
bool wait_cluster_stabilization(MariaDBServer* new_master, const ServerArray& slaves,
int seconds_remaining);
void disable_setting(const char* setting);