Only write to SERVER->status at the end of a monitoring loop

This makes the code clearer and reduces race conditions, as the monitor
could be writing SERVER->status while a router is reading it. Also,
the time during which the SERVER struct is locked drops to a fraction.
This commit is contained in:
Esa Korhonen
2018-05-17 14:27:16 +03:00
parent 715b8a6f1d
commit 3ec449339f
6 changed files with 183 additions and 118 deletions

View File

@ -48,7 +48,7 @@ MXS_MONITORED_SERVER* MariaDBMonitor::get_replication_tree()
* that means SERVER_IS_RUNNING returns 0 * that means SERVER_IS_RUNNING returns 0
* Let's check only for SERVER_IS_DOWN: server is not running * Let's check only for SERVER_IS_DOWN: server is not running
*/ */
if (SERVER_IS_DOWN(ptr->server)) if (get_server_info(ptr)->is_down())
{ {
ptr = ptr->next; ptr = ptr->next;
continue; continue;
@ -127,8 +127,7 @@ MXS_MONITORED_SERVER* MariaDBMonitor::get_replication_tree()
} }
MariaDBServer* info = get_server_info(master_cand); MariaDBServer* info = get_server_info(master_cand);
if (info->is_running())
if (SERVER_IS_RUNNING(master_cand->server))
{ {
/** Only set the Master status if read_only is disabled */ /** Only set the Master status if read_only is disabled */
monitor_set_pending_status(master_cand, monitor_set_pending_status(master_cand,
@ -158,7 +157,7 @@ MXS_MONITORED_SERVER* MariaDBMonitor::get_replication_tree()
if (m_master != NULL) if (m_master != NULL)
{ {
/* If the root master is in MAINT, return NULL */ /* If the root master is in MAINT, return NULL */
if (SERVER_IN_MAINT(m_master->m_server_base->server)) if (m_master->is_in_maintenance())
{ {
return NULL; return NULL;
} }
@ -210,7 +209,8 @@ MXS_MONITORED_SERVER* MariaDBMonitor::getSlaveOfNodeId(long node_id, slave_down_
while (ptr) while (ptr)
{ {
current = ptr->server; current = ptr->server;
if (current->master_id == node_id && (slave_down_setting == ACCEPT_DOWN || !SERVER_IS_DOWN(current))) if (current->master_id == node_id &&
(slave_down_setting == ACCEPT_DOWN || !get_server_info(ptr)->is_down()))
{ {
return ptr; return ptr;
} }
@ -411,7 +411,7 @@ void MariaDBMonitor::find_graph_cycles()
} }
} }
else if (m_detect_stale_master && cycle == 0 && else if (m_detect_stale_master && cycle == 0 &&
graph[i].db->server->status & SERVER_MASTER && graph[i].db->mon_prev_status & SERVER_MASTER &&
(graph[i].db->pending_status & SERVER_MASTER) == 0) (graph[i].db->pending_status & SERVER_MASTER) == 0)
{ {
/** /**
@ -716,7 +716,6 @@ bool MariaDBMonitor::set_standalone_master()
m_warn_set_standalone_master = false; m_warn_set_standalone_master = false;
} }
server_clear_set_status(mon_server->server, SERVER_SLAVE, SERVER_MASTER | SERVER_STALE_STATUS);
monitor_set_pending_status(mon_server, SERVER_MASTER | SERVER_STALE_STATUS); monitor_set_pending_status(mon_server, SERVER_MASTER | SERVER_STALE_STATUS);
monitor_clear_pending_status(mon_server, SERVER_SLAVE); monitor_clear_pending_status(mon_server, SERVER_SLAVE);
m_master = server; m_master = server;
@ -724,8 +723,7 @@ bool MariaDBMonitor::set_standalone_master()
} }
else if (!m_allow_cluster_recovery) else if (!m_allow_cluster_recovery)
{ {
server_set_status_nolock(mon_server->server, SERVER_MAINT); server->set_status(SERVER_MAINT);
monitor_set_pending_status(mon_server, SERVER_MAINT);
} }
} }
@ -745,7 +743,7 @@ MariaDBServer* MariaDBMonitor::find_root_master()
if (num_servers == 1) if (num_servers == 1)
{ {
auto mon_server = m_servers[0]->m_server_base; auto mon_server = m_servers[0]->m_server_base;
if (SERVER_IS_RUNNING(mon_server->server)) if (m_servers[0]->is_running())
{ {
mon_server->server->depth = 0; mon_server->server->depth = 0;
/* status cleanup */ /* status cleanup */
@ -803,7 +801,7 @@ void MariaDBMonitor::assign_relay_master(MariaDBServer& candidate)
void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServer* root_master_server) void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServer* root_master_server)
{ {
MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->m_server_base : NULL; MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->m_server_base : NULL;
if (!SERVER_IN_MAINT(db_server.m_server_base->server)) if (!db_server.is_in_maintenance())
{ {
/** If "detect_stale_master" option is On, let's use the previous master. /** If "detect_stale_master" option is On, let's use the previous master.
* *
@ -819,15 +817,10 @@ void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServe
(strcmp(ptr->server->address, root_master->server->address) == 0 && (strcmp(ptr->server->address, root_master->server->address) == 0 &&
ptr->server->port == root_master->server->port) && ptr->server->port == root_master->server->port) &&
// had master status but is now losing it. // had master status but is now losing it.
(ptr->server->status & SERVER_MASTER) && !(ptr->pending_status & SERVER_MASTER) && (ptr->mon_prev_status & SERVER_MASTER) && !(ptr->pending_status & SERVER_MASTER) &&
!db_server.m_read_only) !db_server.m_read_only)
{ {
/** db_server.set_status(SERVER_STALE_STATUS | SERVER_MASTER);
* In this case server->status will not be updated from pending_status
* Set the STALE bit for this server in server struct
*/
server_set_status_nolock(ptr->server, SERVER_STALE_STATUS | SERVER_MASTER);
monitor_set_pending_status(ptr, SERVER_STALE_STATUS | SERVER_MASTER);
/** Log the message only if the master server didn't have /** Log the message only if the master server didn't have
* the stale master bit set */ * the stale master bit set */
@ -845,7 +838,7 @@ void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServe
unsigned int bits = SERVER_SLAVE | SERVER_RUNNING; unsigned int bits = SERVER_SLAVE | SERVER_RUNNING;
if ((ptr->mon_prev_status & bits) == bits && if ((ptr->mon_prev_status & bits) == bits &&
root_master && SERVER_IS_MASTER(root_master->server)) root_master && SRV_MASTER_STATUS(root_master->pending_status))
{ {
/** Slave with a running master, assign stale slave candidacy */ /** Slave with a running master, assign stale slave candidacy */
if ((ptr->pending_status & bits) == bits) if ((ptr->pending_status & bits) == bits)
@ -864,9 +857,9 @@ void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServe
else if (ptr->mon_prev_status & SERVER_STALE_SLAVE && else if (ptr->mon_prev_status & SERVER_STALE_SLAVE &&
ptr->pending_status & SERVER_RUNNING && ptr->pending_status & SERVER_RUNNING &&
// Master is down // Master is down
(!root_master || !SERVER_IS_MASTER(root_master->server) || (!root_master || !SRV_MASTER_STATUS(root_master->pending_status) ||
// Master just came up // Master just came up
(SERVER_IS_MASTER(root_master->server) && (SRV_MASTER_STATUS(root_master->pending_status) &&
(root_master->mon_prev_status & SERVER_MASTER) == 0))) (root_master->mon_prev_status & SERVER_MASTER) == 0)))
{ {
monitor_set_pending_status(ptr, SERVER_SLAVE); monitor_set_pending_status(ptr, SERVER_SLAVE);
@ -876,7 +869,5 @@ void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServe
monitor_set_pending_status(ptr, SERVER_SLAVE); monitor_set_pending_status(ptr, SERVER_SLAVE);
} }
} }
ptr->server->status = ptr->pending_status;
} }
} }

View File

@ -1203,7 +1203,7 @@ bool MariaDBMonitor::switchover_check_current(const MXS_MONITORED_SERVER* sugges
mon_serv != NULL && extra_master == NULL; mon_serv != NULL && extra_master == NULL;
mon_serv = mon_serv->next) mon_serv = mon_serv->next)
{ {
if (SERVER_IS_MASTER(mon_serv->server)) if (SRV_MASTER_STATUS(mon_serv->pending_status))
{ {
if (mon_serv == suggested_curr_master) if (mon_serv == suggested_curr_master)
{ {
@ -1237,22 +1237,20 @@ bool MariaDBMonitor::switchover_check_current(const MXS_MONITORED_SERVER* sugges
* *
* @return True, if suggested new master is a viable promotion candidate. * @return True, if suggested new master is a viable promotion candidate.
*/ */
bool MariaDBMonitor::switchover_check_new(const MXS_MONITORED_SERVER* monitored_server, json_t** error) bool MariaDBMonitor::switchover_check_new(const MariaDBServer* new_master_cand, json_t** error)
{ {
SERVER* server = monitored_server->server; bool is_master = new_master_cand->is_master();
const char* name = server->name; bool is_slave = new_master_cand->is_slave();
bool is_master = SERVER_IS_MASTER(server);
bool is_slave = SERVER_IS_SLAVE(server);
if (is_master) if (is_master)
{ {
const char IS_MASTER[] = "Specified new master '%s' is already the current master."; const char IS_MASTER[] = "Specified new master '%s' is already the current master.";
PRINT_MXS_JSON_ERROR(error, IS_MASTER, name); PRINT_MXS_JSON_ERROR(error, IS_MASTER, new_master_cand->name());
} }
else if (!is_slave) else if (!is_slave)
{ {
const char NOT_SLAVE[] = "Specified new master '%s' is not a slave."; const char NOT_SLAVE[] = "Specified new master '%s' is not a slave.";
PRINT_MXS_JSON_ERROR(error, NOT_SLAVE, name); PRINT_MXS_JSON_ERROR(error, NOT_SLAVE, new_master_cand->name());
} }
return !is_master && is_slave; return !is_master && is_slave;
@ -1274,7 +1272,7 @@ bool MariaDBMonitor::failover_check(json_t** error_out)
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
{ {
MariaDBServer* server = *iter; MariaDBServer* server = *iter;
uint64_t status_bits = server->m_server_base->server->status; uint64_t status_bits = server->m_server_base->pending_status;
uint64_t master_up = (SERVER_MASTER | SERVER_RUNNING); uint64_t master_up = (SERVER_MASTER | SERVER_RUNNING);
if ((status_bits & master_up) == master_up) if ((status_bits & master_up) == master_up)
{ {
@ -1509,13 +1507,17 @@ bool MariaDBMonitor::switchover_check(SERVER* new_master, SERVER* current_master
new_master_ok = false; new_master_ok = false;
PRINT_MXS_JSON_ERROR(error_out, NO_SERVER, new_master->name, m_monitor_base->name); PRINT_MXS_JSON_ERROR(error_out, NO_SERVER, new_master->name, m_monitor_base->name);
} }
else if (!switchover_check_new(mon_new_master, error_out))
{
new_master_ok = false;
}
else else
{ {
*new_master_out = get_server_info(mon_new_master); MariaDBServer* found_new_master = get_server_info(mon_new_master);
if (switchover_check_new(found_new_master, error_out))
{
*new_master_out = found_new_master;
}
else
{
new_master_ok = false;
}
} }
} }

View File

@ -351,7 +351,22 @@ void MariaDBMonitor::main_loop()
clock_gettime(CLOCK_MONOTONIC_COARSE, &loop_start); clock_gettime(CLOCK_MONOTONIC_COARSE, &loop_start);
lock_monitor_servers(m_monitor_base); lock_monitor_servers(m_monitor_base);
servers_status_pending_to_current(m_monitor_base); /* Read any admin status changes from SERVER->status_pending. */
if (m_monitor_base->server_pending_changes)
{
servers_status_pending_to_current(m_monitor_base);
}
/* Update MXS_MONITORED_SERVER->pending_status. This is where the monitor loop writes it's findings.
* Also, backup current status so that it can be compared to any deduced state. */
for (auto mon_srv = m_monitor_base->monitored_servers; mon_srv; mon_srv = mon_srv->next)
{
auto status = mon_srv->server->status;
mon_srv->pending_status = status;
mon_srv->mon_prev_status = status;
}
/* Avoid reading/writing SERVER->status while servers are not locked. Routers read the state
* all the time. */
release_monitor_servers(m_monitor_base);
// Query all servers for their status. // Query all servers for their status.
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
@ -409,10 +424,8 @@ void MariaDBMonitor::main_loop()
if (root_master && root_master->is_master()) if (root_master && root_master->is_master())
{ {
SERVER* root_master_server = root_master->m_server_base->server;
// Clear slave and stale slave status bits from current master // Clear slave and stale slave status bits from current master
server_clear_status_nolock(root_master_server, SERVER_SLAVE | SERVER_STALE_SLAVE); root_master->clear_status(SERVER_SLAVE | SERVER_STALE_SLAVE);
monitor_clear_pending_status(root_master->m_server_base, SERVER_SLAVE | SERVER_STALE_SLAVE);
/** /**
* Clear external slave status from master if configured to do so. * Clear external slave status from master if configured to do so.
@ -421,59 +434,83 @@ void MariaDBMonitor::main_loop()
*/ */
if (m_ignore_external_masters) if (m_ignore_external_masters)
{ {
monitor_clear_pending_status(root_master->m_server_base, SERVER_SLAVE_OF_EXTERNAL_MASTER); root_master->clear_status(SERVER_SLAVE_OF_EXTERNAL_MASTER);
server_clear_status_nolock(root_master_server, SERVER_SLAVE_OF_EXTERNAL_MASTER);
} }
} }
ss_dassert(root_master == NULL || root_master == m_master); ss_dassert(root_master == NULL || root_master == m_master);
ss_dassert(root_master == NULL || ss_dassert(root_master == NULL ||
((root_master->m_server_base->server->status & (SERVER_SLAVE | SERVER_MASTER)) != ((root_master->m_server_base->pending_status & (SERVER_SLAVE | SERVER_MASTER)) !=
(SERVER_SLAVE | SERVER_MASTER))); (SERVER_SLAVE | SERVER_MASTER)));
/**
* After updating the status of all servers, check if monitor events
* need to be launched.
*/
mon_process_state_changes(m_monitor_base, m_script.c_str(), m_events);
m_cluster_modified = false;
if (m_auto_failover)
{
m_cluster_modified = handle_auto_failover();
}
/* log master detection failure of first master becomes available after failure */
log_master_changes(root_master, &log_no_master);
/* Generate the replication heartbeat event by performing an update */ /* Generate the replication heartbeat event by performing an update */
if (m_detect_replication_lag && root_master && if (m_detect_replication_lag && root_master &&
(root_master->is_master() || SERVER_IS_RELAY_SERVER(root_master->m_server_base->server))) (root_master->is_master() || root_master->is_relay_server()))
{ {
measure_replication_lag(root_master); measure_replication_lag(root_master);
} }
// Do not auto-join servers on this monitor loop if a failover (or any other cluster modification) /* Servers have been queried and states deduced. Lock servers for a short while again while we update
// has been performed, as server states have not been updated yet. It will happen next iteration. * the shared server states and make the changes visible. There is a problem though: the admin could
if (!config_get_global_options()->passive && m_auto_rejoin && * have changed the shared state while the queries were running. In this rare case, the deduced
!m_cluster_modified && cluster_can_be_joined()) * server states no longer apply and may be inconsistent. The most straightforward solution to this
* is to discard the results of the latest monitor loop and start another immediately. Cluster
* modifications are not performed until the next loop. The discarding of results is not perfect as
* some data has already been written. But this data is only used by the monitor itself and will be
* rewritten next loop. TODO: make versions of log_master_changes() and mon_process_state_changes()
* which read from MXS_MONITORED_SERVER->pending_status instead of SERVER->status. Then this locking
* and releasing can be moved after failover etc and the user changes can be applied gracefully. */
lock_monitor_servers(m_monitor_base);
if (m_monitor_base->server_pending_changes)
{ {
// Check if any servers should be autojoined to the cluster and try to join them. MXS_WARNING("MaxAdmin or REST API modified the state of a server monitored by '%s' during a "
handle_auto_rejoin(); "monitor iteration. Discarding the monitor results and retrying.",
m_monitor_base->name);
release_monitor_servers(m_monitor_base);
} }
else
/* Check if any slave servers have read-only off and turn it on if user so wishes. Again, do not
* perform this if cluster has been modified this loop since it may not be clear which server
* should be a slave. */
if (!config_get_global_options()->passive && m_enforce_read_only_slaves && !m_cluster_modified)
{ {
enforce_read_only_on_slaves(); // Update shared status.
} for (auto mon_srv = m_monitor_base->monitored_servers; mon_srv; mon_srv = mon_srv->next)
{
auto new_status = mon_srv->pending_status;
auto srv = mon_srv->server;
srv->status = new_status;
srv->status_pending = new_status;
}
mon_hangup_failed_servers(m_monitor_base); /* Check if monitor events need to be launched. */
servers_status_current_to_pending(m_monitor_base); mon_process_state_changes(m_monitor_base, m_script.c_str(), m_events);
store_server_journal(m_monitor_base, m_master ? m_master->m_server_base : NULL); m_cluster_modified = false;
release_monitor_servers(m_monitor_base); if (m_auto_failover)
{
m_cluster_modified = handle_auto_failover();
}
/* log master detection failure of first master becomes available after failure */
log_master_changes(root_master, &log_no_master);
// Do not auto-join servers on this monitor loop if a failover (or any other cluster modification)
// has been performed, as server states have not been updated yet. It will happen next iteration.
if (!config_get_global_options()->passive && m_auto_rejoin && !m_cluster_modified &&
cluster_can_be_joined())
{
// Check if any servers should be autojoined to the cluster and try to join them.
handle_auto_rejoin();
}
/* Check if any slave servers have read-only off and turn it on if user so wishes. Again, do not
* perform this if cluster has been modified this loop since it may not be clear which server
* should be a slave. */
if (!config_get_global_options()->passive && m_enforce_read_only_slaves && !m_cluster_modified)
{
enforce_read_only_on_slaves();
}
mon_hangup_failed_servers(m_monitor_base);
store_server_journal(m_monitor_base, m_master ? m_master->m_server_base : NULL);
release_monitor_servers(m_monitor_base);
}
// Check how much the monitor should sleep to get one full monitor interval. // Check how much the monitor should sleep to get one full monitor interval.
timespec loop_end; timespec loop_end;
@ -555,10 +592,10 @@ void MariaDBMonitor::measure_replication_lag(MariaDBServer* root_master)
{ {
MariaDBServer* server = *iter; MariaDBServer* server = *iter;
MXS_MONITORED_SERVER* ptr = server->m_server_base; MXS_MONITORED_SERVER* ptr = server->m_server_base;
if ((!SERVER_IN_MAINT(ptr->server)) && server->is_running()) if ((!server->is_in_maintenance()) && server->is_running())
{ {
if (ptr->server->node_id != mon_root_master->server->node_id && if (ptr->server->node_id != mon_root_master->server->node_id &&
(server->is_slave() || SERVER_IS_RELAY_SERVER(ptr->server)) && (server->is_slave() || server->is_relay_server()) &&
(server->m_version == MariaDBServer::version::MARIADB_MYSQL_55 || (server->m_version == MariaDBServer::version::MARIADB_MYSQL_55 ||
server->m_version == MariaDBServer::version::MARIADB_100)) // No select lag for Binlog Server server->m_version == MariaDBServer::version::MARIADB_100)) // No select lag for Binlog Server
{ {
@ -571,14 +608,13 @@ void MariaDBMonitor::measure_replication_lag(MariaDBServer* root_master)
void MariaDBMonitor::log_master_changes(MariaDBServer* root_master_server, int* log_no_master) void MariaDBMonitor::log_master_changes(MariaDBServer* root_master_server, int* log_no_master)
{ {
MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->m_server_base : NULL; MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->m_server_base : NULL;
if (root_master && if (root_master && mon_status_changed(root_master) &&
mon_status_changed(root_master) && !(root_master->pending_status & SERVER_STALE_STATUS))
!(root_master->server->status & SERVER_STALE_STATUS))
{ {
if (root_master->pending_status & (SERVER_MASTER) && SERVER_IS_RUNNING(root_master->server)) if ((root_master->pending_status & SERVER_MASTER) && root_master_server->is_running())
{ {
if (!(root_master->mon_prev_status & SERVER_STALE_STATUS) && if (!(root_master->mon_prev_status & SERVER_STALE_STATUS) &&
!(root_master->server->status & SERVER_MAINT)) !(root_master->pending_status & SERVER_MAINT))
{ {
MXS_NOTICE("A Master Server is now available: %s:%i", MXS_NOTICE("A Master Server is now available: %s:%i",
root_master->server->address, root_master->server->address,

View File

@ -201,7 +201,7 @@ private:
bool switchover_check(SERVER* new_master, SERVER* current_master, bool switchover_check(SERVER* new_master, SERVER* current_master,
MariaDBServer** new_master_out, MariaDBServer** current_master_out, MariaDBServer** new_master_out, MariaDBServer** current_master_out,
json_t** error_out); json_t** error_out);
bool switchover_check_new(const MXS_MONITORED_SERVER* monitored_server, json_t** error); bool switchover_check_new(const MariaDBServer* new_master_cand, json_t** error);
bool switchover_check_current(const MXS_MONITORED_SERVER* suggested_curr_master, bool switchover_check_current(const MXS_MONITORED_SERVER* suggested_curr_master,
json_t** error_out) const; json_t** error_out) const;
bool do_switchover(MariaDBServer** current_master, MariaDBServer** new_master, json_t** err_out); bool do_switchover(MariaDBServer** current_master, MariaDBServer** new_master, json_t** err_out);

View File

@ -391,22 +391,38 @@ bool MariaDBServer::wait_until_gtid(const GtidList& target, int timeout, json_t*
bool MariaDBServer::is_master() const bool MariaDBServer::is_master() const
{ {
return SERVER_IS_MASTER(m_server_base->server); // Similar to macro SERVER_IS_MASTER
return SRV_MASTER_STATUS(m_server_base->pending_status);
} }
bool MariaDBServer::is_slave() const bool MariaDBServer::is_slave() const
{ {
return SERVER_IS_SLAVE(m_server_base->server); // Similar to macro SERVER_IS_SLAVE
return (m_server_base->pending_status & (SERVER_RUNNING | SERVER_SLAVE | SERVER_MAINT)) ==
(SERVER_RUNNING | SERVER_SLAVE);
} }
bool MariaDBServer::is_running() const bool MariaDBServer::is_running() const
{ {
return SERVER_IS_RUNNING(m_server_base->server); // Similar to macro SERVER_IS_RUNNING
return (m_server_base->pending_status & (SERVER_RUNNING | SERVER_MAINT)) == SERVER_RUNNING;
} }
bool MariaDBServer::is_down() const bool MariaDBServer::is_down() const
{ {
return SERVER_IS_DOWN(m_server_base->server); // Similar to macro SERVER_IS_DOWN
return (m_server_base->pending_status & SERVER_RUNNING) == 0;
}
bool MariaDBServer::is_in_maintenance() const
{
return m_server_base->pending_status & SERVER_MAINT;
}
bool MariaDBServer::is_relay_server() const
{
return (m_server_base->pending_status & (SERVER_RUNNING | SERVER_MASTER | SERVER_SLAVE | SERVER_MAINT)) ==
(SERVER_RUNNING | SERVER_MASTER | SERVER_SLAVE);
} }
const char* MariaDBServer::name() const const char* MariaDBServer::name() const
@ -666,21 +682,15 @@ bool MariaDBServer::run_sql_from_file(const string& path, json_t** error_out)
void MariaDBServer::update_server(MXS_MONITOR* base_monitor) void MariaDBServer::update_server(MXS_MONITOR* base_monitor)
{ {
/* Backup current status so that it can be compared to the new status. Also, we should be careful when
* modifying 'server->status' as routers can read it at any moment. It's best to write to
* 'mon_server->pending_status' until the status is final and then write it to 'server->status'. */
auto current_status = m_server_base->server->status;
m_server_base->mon_prev_status = current_status;
m_server_base->pending_status = current_status;
/* Monitor current node if not in maintenance. */ /* Monitor current node if not in maintenance. */
if (!SERVER_IN_MAINT(m_server_base->server)) bool in_maintenance = m_server_base->pending_status & SERVER_MAINT;
if (!in_maintenance)
{ {
monitor_server(base_monitor); monitor_server(base_monitor);
} }
/** Increase or reset the error count of the server. */ /** Increase or reset the error count of the server. */
m_server_base->mon_err_count = (is_down()) ? m_server_base->mon_err_count + 1 : 0; bool is_running = m_server_base->pending_status & SERVER_RUNNING;
m_server_base->mon_err_count = (is_running || in_maintenance) ? 0 : m_server_base->mon_err_count + 1;
} }
/** /**
@ -693,15 +703,11 @@ void MariaDBServer::monitor_server(MXS_MONITOR* base_monitor)
MXS_MONITORED_SERVER* mon_srv = m_server_base; MXS_MONITORED_SERVER* mon_srv = m_server_base;
mxs_connect_result_t rval = mon_ping_or_connect_to_db(base_monitor, mon_srv); mxs_connect_result_t rval = mon_ping_or_connect_to_db(base_monitor, mon_srv);
SERVER* srv = mon_srv->server;
MYSQL* conn = mon_srv->con; // mon_ping_or_connect_to_db() may have reallocated the MYSQL struct. MYSQL* conn = mon_srv->con; // mon_ping_or_connect_to_db() may have reallocated the MYSQL struct.
if (mon_connection_is_ok(rval)) if (mon_connection_is_ok(rval))
{ {
server_clear_status_nolock(srv, SERVER_AUTH_ERROR); clear_status(SERVER_AUTH_ERROR);
monitor_clear_pending_status(mon_srv, SERVER_AUTH_ERROR); set_status(SERVER_RUNNING);
/* Store current status in both server and monitor server pending struct */
server_set_status_nolock(srv, SERVER_RUNNING);
monitor_set_pending_status(mon_srv, SERVER_RUNNING);
if (rval == MONITOR_CONN_NEWCONN_OK) if (rval == MONITOR_CONN_NEWCONN_OK)
{ {
@ -712,22 +718,19 @@ void MariaDBServer::monitor_server(MXS_MONITOR* base_monitor)
{ {
/* The current server is not running. Clear all but the stale master bit as it is used to detect /* The current server is not running. Clear all but the stale master bit as it is used to detect
* masters that went down but came up. */ * masters that went down but came up. */
unsigned int all_bits = ~SERVER_STALE_STATUS; clear_status(~SERVER_STALE_STATUS);
server_clear_status_nolock(srv, all_bits); auto conn_errno = mysql_errno(conn);
monitor_clear_pending_status(mon_srv, all_bits); if (conn_errno == ER_ACCESS_DENIED_ERROR || conn_errno == ER_ACCESS_DENIED_NO_PASSWORD_ERROR)
if (mysql_errno(conn) == ER_ACCESS_DENIED_ERROR)
{ {
server_set_status_nolock(srv, SERVER_AUTH_ERROR); set_status(SERVER_AUTH_ERROR);
monitor_set_pending_status(mon_srv, SERVER_AUTH_ERROR);
} }
/* Log connect failure only once */ /* Log connect failure only once, that is, if server was RUNNING or MAINTENANCE during last
if (mon_status_changed(mon_srv) && mon_print_fail_status(mon_srv)) * iteration. */
if (m_server_base->mon_prev_status & (SERVER_RUNNING | SERVER_MAINT))
{ {
mon_log_connect_error(mon_srv, rval); mon_log_connect_error(mon_srv, rval);
} }
return; return;
} }
@ -777,8 +780,7 @@ void MariaDBServer::monitor_server(MXS_MONITOR* base_monitor)
bool MariaDBServer::update_slave_status(string* errmsg_out) bool MariaDBServer::update_slave_status(string* errmsg_out)
{ {
/** Clear old states */ /** Clear old states */
monitor_clear_pending_status(m_server_base, SERVER_SLAVE | SERVER_MASTER | SERVER_RELAY_MASTER | clear_status(SERVER_SLAVE | SERVER_MASTER | SERVER_RELAY_MASTER | SERVER_SLAVE_OF_EXTERNAL_MASTER);
SERVER_SLAVE_OF_EXTERNAL_MASTER);
bool rval = false; bool rval = false;
if (do_show_slave_status(errmsg_out)) if (do_show_slave_status(errmsg_out))
@ -787,7 +789,7 @@ bool MariaDBServer::update_slave_status(string* errmsg_out)
/* If all configured slaves are running set this node as slave */ /* If all configured slaves are running set this node as slave */
if (m_n_slaves_running > 0 && m_n_slaves_running == m_slave_status.size()) if (m_n_slaves_running > 0 && m_n_slaves_running == m_slave_status.size())
{ {
monitor_set_pending_status(m_server_base, SERVER_SLAVE); set_status(SERVER_SLAVE);
} }
/** Store master_id of current node. */ /** Store master_id of current node. */
@ -840,6 +842,16 @@ void MariaDBServer::update_server_info()
} }
} }
void MariaDBServer::clear_status(uint64_t bits)
{
monitor_clear_pending_status(m_server_base, bits);
}
void MariaDBServer::set_status(uint64_t bits)
{
monitor_set_pending_status(m_server_base, bits);
}
string SlaveStatus::to_string() const string SlaveStatus::to_string() const
{ {
using std::setw; using std::setw;

View File

@ -205,6 +205,16 @@ public:
*/ */
bool is_down() const; bool is_down() const;
/**
* Convenience method for SERVER_IN_MAINT
*/
bool is_in_maintenance() const;
/**
* Convenience method for SERVER_IS_RELAY_SERVER
*/
bool is_relay_server() const;
/** /**
* Returns the server name. * Returns the server name.
* *
@ -299,6 +309,20 @@ public:
*/ */
void update_server(MXS_MONITOR* base_monitor); void update_server(MXS_MONITOR* base_monitor);
/**
* Clear server pending status flags.
*
* @param bits Which flags to clear
*/
void clear_status(uint64_t bits);
/**
* Set server pending status flags.
*
* @param bits Which flags to set
*/
void set_status(uint64_t bits);
private: private:
void monitor_server(MXS_MONITOR* base_monitor); void monitor_server(MXS_MONITOR* base_monitor);
bool update_slave_status(std::string* errmsg_out = NULL); bool update_slave_status(std::string* errmsg_out = NULL);