Only write to SERVER->status at the end of a monitoring loop
This makes the code clearer and reduces race conditions, as the monitor could be writing SERVER->status while a router is reading it. Also, the time during which the SERVER struct is locked drops to a fraction.
This commit is contained in:
@ -48,7 +48,7 @@ MXS_MONITORED_SERVER* MariaDBMonitor::get_replication_tree()
|
|||||||
* that means SERVER_IS_RUNNING returns 0
|
* that means SERVER_IS_RUNNING returns 0
|
||||||
* Let's check only for SERVER_IS_DOWN: server is not running
|
* Let's check only for SERVER_IS_DOWN: server is not running
|
||||||
*/
|
*/
|
||||||
if (SERVER_IS_DOWN(ptr->server))
|
if (get_server_info(ptr)->is_down())
|
||||||
{
|
{
|
||||||
ptr = ptr->next;
|
ptr = ptr->next;
|
||||||
continue;
|
continue;
|
||||||
@ -127,8 +127,7 @@ MXS_MONITORED_SERVER* MariaDBMonitor::get_replication_tree()
|
|||||||
}
|
}
|
||||||
|
|
||||||
MariaDBServer* info = get_server_info(master_cand);
|
MariaDBServer* info = get_server_info(master_cand);
|
||||||
|
if (info->is_running())
|
||||||
if (SERVER_IS_RUNNING(master_cand->server))
|
|
||||||
{
|
{
|
||||||
/** Only set the Master status if read_only is disabled */
|
/** Only set the Master status if read_only is disabled */
|
||||||
monitor_set_pending_status(master_cand,
|
monitor_set_pending_status(master_cand,
|
||||||
@ -158,7 +157,7 @@ MXS_MONITORED_SERVER* MariaDBMonitor::get_replication_tree()
|
|||||||
if (m_master != NULL)
|
if (m_master != NULL)
|
||||||
{
|
{
|
||||||
/* If the root master is in MAINT, return NULL */
|
/* If the root master is in MAINT, return NULL */
|
||||||
if (SERVER_IN_MAINT(m_master->m_server_base->server))
|
if (m_master->is_in_maintenance())
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -210,7 +209,8 @@ MXS_MONITORED_SERVER* MariaDBMonitor::getSlaveOfNodeId(long node_id, slave_down_
|
|||||||
while (ptr)
|
while (ptr)
|
||||||
{
|
{
|
||||||
current = ptr->server;
|
current = ptr->server;
|
||||||
if (current->master_id == node_id && (slave_down_setting == ACCEPT_DOWN || !SERVER_IS_DOWN(current)))
|
if (current->master_id == node_id &&
|
||||||
|
(slave_down_setting == ACCEPT_DOWN || !get_server_info(ptr)->is_down()))
|
||||||
{
|
{
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
@ -411,7 +411,7 @@ void MariaDBMonitor::find_graph_cycles()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (m_detect_stale_master && cycle == 0 &&
|
else if (m_detect_stale_master && cycle == 0 &&
|
||||||
graph[i].db->server->status & SERVER_MASTER &&
|
graph[i].db->mon_prev_status & SERVER_MASTER &&
|
||||||
(graph[i].db->pending_status & SERVER_MASTER) == 0)
|
(graph[i].db->pending_status & SERVER_MASTER) == 0)
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
@ -716,7 +716,6 @@ bool MariaDBMonitor::set_standalone_master()
|
|||||||
m_warn_set_standalone_master = false;
|
m_warn_set_standalone_master = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
server_clear_set_status(mon_server->server, SERVER_SLAVE, SERVER_MASTER | SERVER_STALE_STATUS);
|
|
||||||
monitor_set_pending_status(mon_server, SERVER_MASTER | SERVER_STALE_STATUS);
|
monitor_set_pending_status(mon_server, SERVER_MASTER | SERVER_STALE_STATUS);
|
||||||
monitor_clear_pending_status(mon_server, SERVER_SLAVE);
|
monitor_clear_pending_status(mon_server, SERVER_SLAVE);
|
||||||
m_master = server;
|
m_master = server;
|
||||||
@ -724,8 +723,7 @@ bool MariaDBMonitor::set_standalone_master()
|
|||||||
}
|
}
|
||||||
else if (!m_allow_cluster_recovery)
|
else if (!m_allow_cluster_recovery)
|
||||||
{
|
{
|
||||||
server_set_status_nolock(mon_server->server, SERVER_MAINT);
|
server->set_status(SERVER_MAINT);
|
||||||
monitor_set_pending_status(mon_server, SERVER_MAINT);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -745,7 +743,7 @@ MariaDBServer* MariaDBMonitor::find_root_master()
|
|||||||
if (num_servers == 1)
|
if (num_servers == 1)
|
||||||
{
|
{
|
||||||
auto mon_server = m_servers[0]->m_server_base;
|
auto mon_server = m_servers[0]->m_server_base;
|
||||||
if (SERVER_IS_RUNNING(mon_server->server))
|
if (m_servers[0]->is_running())
|
||||||
{
|
{
|
||||||
mon_server->server->depth = 0;
|
mon_server->server->depth = 0;
|
||||||
/* status cleanup */
|
/* status cleanup */
|
||||||
@ -803,7 +801,7 @@ void MariaDBMonitor::assign_relay_master(MariaDBServer& candidate)
|
|||||||
void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServer* root_master_server)
|
void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServer* root_master_server)
|
||||||
{
|
{
|
||||||
MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->m_server_base : NULL;
|
MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->m_server_base : NULL;
|
||||||
if (!SERVER_IN_MAINT(db_server.m_server_base->server))
|
if (!db_server.is_in_maintenance())
|
||||||
{
|
{
|
||||||
/** If "detect_stale_master" option is On, let's use the previous master.
|
/** If "detect_stale_master" option is On, let's use the previous master.
|
||||||
*
|
*
|
||||||
@ -819,15 +817,10 @@ void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServe
|
|||||||
(strcmp(ptr->server->address, root_master->server->address) == 0 &&
|
(strcmp(ptr->server->address, root_master->server->address) == 0 &&
|
||||||
ptr->server->port == root_master->server->port) &&
|
ptr->server->port == root_master->server->port) &&
|
||||||
// had master status but is now losing it.
|
// had master status but is now losing it.
|
||||||
(ptr->server->status & SERVER_MASTER) && !(ptr->pending_status & SERVER_MASTER) &&
|
(ptr->mon_prev_status & SERVER_MASTER) && !(ptr->pending_status & SERVER_MASTER) &&
|
||||||
!db_server.m_read_only)
|
!db_server.m_read_only)
|
||||||
{
|
{
|
||||||
/**
|
db_server.set_status(SERVER_STALE_STATUS | SERVER_MASTER);
|
||||||
* In this case server->status will not be updated from pending_status
|
|
||||||
* Set the STALE bit for this server in server struct
|
|
||||||
*/
|
|
||||||
server_set_status_nolock(ptr->server, SERVER_STALE_STATUS | SERVER_MASTER);
|
|
||||||
monitor_set_pending_status(ptr, SERVER_STALE_STATUS | SERVER_MASTER);
|
|
||||||
|
|
||||||
/** Log the message only if the master server didn't have
|
/** Log the message only if the master server didn't have
|
||||||
* the stale master bit set */
|
* the stale master bit set */
|
||||||
@ -845,7 +838,7 @@ void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServe
|
|||||||
unsigned int bits = SERVER_SLAVE | SERVER_RUNNING;
|
unsigned int bits = SERVER_SLAVE | SERVER_RUNNING;
|
||||||
|
|
||||||
if ((ptr->mon_prev_status & bits) == bits &&
|
if ((ptr->mon_prev_status & bits) == bits &&
|
||||||
root_master && SERVER_IS_MASTER(root_master->server))
|
root_master && SRV_MASTER_STATUS(root_master->pending_status))
|
||||||
{
|
{
|
||||||
/** Slave with a running master, assign stale slave candidacy */
|
/** Slave with a running master, assign stale slave candidacy */
|
||||||
if ((ptr->pending_status & bits) == bits)
|
if ((ptr->pending_status & bits) == bits)
|
||||||
@ -864,9 +857,9 @@ void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServe
|
|||||||
else if (ptr->mon_prev_status & SERVER_STALE_SLAVE &&
|
else if (ptr->mon_prev_status & SERVER_STALE_SLAVE &&
|
||||||
ptr->pending_status & SERVER_RUNNING &&
|
ptr->pending_status & SERVER_RUNNING &&
|
||||||
// Master is down
|
// Master is down
|
||||||
(!root_master || !SERVER_IS_MASTER(root_master->server) ||
|
(!root_master || !SRV_MASTER_STATUS(root_master->pending_status) ||
|
||||||
// Master just came up
|
// Master just came up
|
||||||
(SERVER_IS_MASTER(root_master->server) &&
|
(SRV_MASTER_STATUS(root_master->pending_status) &&
|
||||||
(root_master->mon_prev_status & SERVER_MASTER) == 0)))
|
(root_master->mon_prev_status & SERVER_MASTER) == 0)))
|
||||||
{
|
{
|
||||||
monitor_set_pending_status(ptr, SERVER_SLAVE);
|
monitor_set_pending_status(ptr, SERVER_SLAVE);
|
||||||
@ -876,7 +869,5 @@ void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServe
|
|||||||
monitor_set_pending_status(ptr, SERVER_SLAVE);
|
monitor_set_pending_status(ptr, SERVER_SLAVE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ptr->server->status = ptr->pending_status;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1203,7 +1203,7 @@ bool MariaDBMonitor::switchover_check_current(const MXS_MONITORED_SERVER* sugges
|
|||||||
mon_serv != NULL && extra_master == NULL;
|
mon_serv != NULL && extra_master == NULL;
|
||||||
mon_serv = mon_serv->next)
|
mon_serv = mon_serv->next)
|
||||||
{
|
{
|
||||||
if (SERVER_IS_MASTER(mon_serv->server))
|
if (SRV_MASTER_STATUS(mon_serv->pending_status))
|
||||||
{
|
{
|
||||||
if (mon_serv == suggested_curr_master)
|
if (mon_serv == suggested_curr_master)
|
||||||
{
|
{
|
||||||
@ -1237,22 +1237,20 @@ bool MariaDBMonitor::switchover_check_current(const MXS_MONITORED_SERVER* sugges
|
|||||||
*
|
*
|
||||||
* @return True, if suggested new master is a viable promotion candidate.
|
* @return True, if suggested new master is a viable promotion candidate.
|
||||||
*/
|
*/
|
||||||
bool MariaDBMonitor::switchover_check_new(const MXS_MONITORED_SERVER* monitored_server, json_t** error)
|
bool MariaDBMonitor::switchover_check_new(const MariaDBServer* new_master_cand, json_t** error)
|
||||||
{
|
{
|
||||||
SERVER* server = monitored_server->server;
|
bool is_master = new_master_cand->is_master();
|
||||||
const char* name = server->name;
|
bool is_slave = new_master_cand->is_slave();
|
||||||
bool is_master = SERVER_IS_MASTER(server);
|
|
||||||
bool is_slave = SERVER_IS_SLAVE(server);
|
|
||||||
|
|
||||||
if (is_master)
|
if (is_master)
|
||||||
{
|
{
|
||||||
const char IS_MASTER[] = "Specified new master '%s' is already the current master.";
|
const char IS_MASTER[] = "Specified new master '%s' is already the current master.";
|
||||||
PRINT_MXS_JSON_ERROR(error, IS_MASTER, name);
|
PRINT_MXS_JSON_ERROR(error, IS_MASTER, new_master_cand->name());
|
||||||
}
|
}
|
||||||
else if (!is_slave)
|
else if (!is_slave)
|
||||||
{
|
{
|
||||||
const char NOT_SLAVE[] = "Specified new master '%s' is not a slave.";
|
const char NOT_SLAVE[] = "Specified new master '%s' is not a slave.";
|
||||||
PRINT_MXS_JSON_ERROR(error, NOT_SLAVE, name);
|
PRINT_MXS_JSON_ERROR(error, NOT_SLAVE, new_master_cand->name());
|
||||||
}
|
}
|
||||||
|
|
||||||
return !is_master && is_slave;
|
return !is_master && is_slave;
|
||||||
@ -1274,7 +1272,7 @@ bool MariaDBMonitor::failover_check(json_t** error_out)
|
|||||||
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
|
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
|
||||||
{
|
{
|
||||||
MariaDBServer* server = *iter;
|
MariaDBServer* server = *iter;
|
||||||
uint64_t status_bits = server->m_server_base->server->status;
|
uint64_t status_bits = server->m_server_base->pending_status;
|
||||||
uint64_t master_up = (SERVER_MASTER | SERVER_RUNNING);
|
uint64_t master_up = (SERVER_MASTER | SERVER_RUNNING);
|
||||||
if ((status_bits & master_up) == master_up)
|
if ((status_bits & master_up) == master_up)
|
||||||
{
|
{
|
||||||
@ -1509,13 +1507,17 @@ bool MariaDBMonitor::switchover_check(SERVER* new_master, SERVER* current_master
|
|||||||
new_master_ok = false;
|
new_master_ok = false;
|
||||||
PRINT_MXS_JSON_ERROR(error_out, NO_SERVER, new_master->name, m_monitor_base->name);
|
PRINT_MXS_JSON_ERROR(error_out, NO_SERVER, new_master->name, m_monitor_base->name);
|
||||||
}
|
}
|
||||||
else if (!switchover_check_new(mon_new_master, error_out))
|
|
||||||
{
|
|
||||||
new_master_ok = false;
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
*new_master_out = get_server_info(mon_new_master);
|
MariaDBServer* found_new_master = get_server_info(mon_new_master);
|
||||||
|
if (switchover_check_new(found_new_master, error_out))
|
||||||
|
{
|
||||||
|
*new_master_out = found_new_master;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
new_master_ok = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -351,7 +351,22 @@ void MariaDBMonitor::main_loop()
|
|||||||
clock_gettime(CLOCK_MONOTONIC_COARSE, &loop_start);
|
clock_gettime(CLOCK_MONOTONIC_COARSE, &loop_start);
|
||||||
|
|
||||||
lock_monitor_servers(m_monitor_base);
|
lock_monitor_servers(m_monitor_base);
|
||||||
servers_status_pending_to_current(m_monitor_base);
|
/* Read any admin status changes from SERVER->status_pending. */
|
||||||
|
if (m_monitor_base->server_pending_changes)
|
||||||
|
{
|
||||||
|
servers_status_pending_to_current(m_monitor_base);
|
||||||
|
}
|
||||||
|
/* Update MXS_MONITORED_SERVER->pending_status. This is where the monitor loop writes it's findings.
|
||||||
|
* Also, backup current status so that it can be compared to any deduced state. */
|
||||||
|
for (auto mon_srv = m_monitor_base->monitored_servers; mon_srv; mon_srv = mon_srv->next)
|
||||||
|
{
|
||||||
|
auto status = mon_srv->server->status;
|
||||||
|
mon_srv->pending_status = status;
|
||||||
|
mon_srv->mon_prev_status = status;
|
||||||
|
}
|
||||||
|
/* Avoid reading/writing SERVER->status while servers are not locked. Routers read the state
|
||||||
|
* all the time. */
|
||||||
|
release_monitor_servers(m_monitor_base);
|
||||||
|
|
||||||
// Query all servers for their status.
|
// Query all servers for their status.
|
||||||
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
|
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
|
||||||
@ -409,10 +424,8 @@ void MariaDBMonitor::main_loop()
|
|||||||
|
|
||||||
if (root_master && root_master->is_master())
|
if (root_master && root_master->is_master())
|
||||||
{
|
{
|
||||||
SERVER* root_master_server = root_master->m_server_base->server;
|
|
||||||
// Clear slave and stale slave status bits from current master
|
// Clear slave and stale slave status bits from current master
|
||||||
server_clear_status_nolock(root_master_server, SERVER_SLAVE | SERVER_STALE_SLAVE);
|
root_master->clear_status(SERVER_SLAVE | SERVER_STALE_SLAVE);
|
||||||
monitor_clear_pending_status(root_master->m_server_base, SERVER_SLAVE | SERVER_STALE_SLAVE);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Clear external slave status from master if configured to do so.
|
* Clear external slave status from master if configured to do so.
|
||||||
@ -421,59 +434,83 @@ void MariaDBMonitor::main_loop()
|
|||||||
*/
|
*/
|
||||||
if (m_ignore_external_masters)
|
if (m_ignore_external_masters)
|
||||||
{
|
{
|
||||||
monitor_clear_pending_status(root_master->m_server_base, SERVER_SLAVE_OF_EXTERNAL_MASTER);
|
root_master->clear_status(SERVER_SLAVE_OF_EXTERNAL_MASTER);
|
||||||
server_clear_status_nolock(root_master_server, SERVER_SLAVE_OF_EXTERNAL_MASTER);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ss_dassert(root_master == NULL || root_master == m_master);
|
ss_dassert(root_master == NULL || root_master == m_master);
|
||||||
ss_dassert(root_master == NULL ||
|
ss_dassert(root_master == NULL ||
|
||||||
((root_master->m_server_base->server->status & (SERVER_SLAVE | SERVER_MASTER)) !=
|
((root_master->m_server_base->pending_status & (SERVER_SLAVE | SERVER_MASTER)) !=
|
||||||
(SERVER_SLAVE | SERVER_MASTER)));
|
(SERVER_SLAVE | SERVER_MASTER)));
|
||||||
|
|
||||||
/**
|
|
||||||
* After updating the status of all servers, check if monitor events
|
|
||||||
* need to be launched.
|
|
||||||
*/
|
|
||||||
mon_process_state_changes(m_monitor_base, m_script.c_str(), m_events);
|
|
||||||
m_cluster_modified = false;
|
|
||||||
|
|
||||||
if (m_auto_failover)
|
|
||||||
{
|
|
||||||
m_cluster_modified = handle_auto_failover();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* log master detection failure of first master becomes available after failure */
|
|
||||||
log_master_changes(root_master, &log_no_master);
|
|
||||||
|
|
||||||
/* Generate the replication heartbeat event by performing an update */
|
/* Generate the replication heartbeat event by performing an update */
|
||||||
if (m_detect_replication_lag && root_master &&
|
if (m_detect_replication_lag && root_master &&
|
||||||
(root_master->is_master() || SERVER_IS_RELAY_SERVER(root_master->m_server_base->server)))
|
(root_master->is_master() || root_master->is_relay_server()))
|
||||||
{
|
{
|
||||||
measure_replication_lag(root_master);
|
measure_replication_lag(root_master);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do not auto-join servers on this monitor loop if a failover (or any other cluster modification)
|
/* Servers have been queried and states deduced. Lock servers for a short while again while we update
|
||||||
// has been performed, as server states have not been updated yet. It will happen next iteration.
|
* the shared server states and make the changes visible. There is a problem though: the admin could
|
||||||
if (!config_get_global_options()->passive && m_auto_rejoin &&
|
* have changed the shared state while the queries were running. In this rare case, the deduced
|
||||||
!m_cluster_modified && cluster_can_be_joined())
|
* server states no longer apply and may be inconsistent. The most straightforward solution to this
|
||||||
|
* is to discard the results of the latest monitor loop and start another immediately. Cluster
|
||||||
|
* modifications are not performed until the next loop. The discarding of results is not perfect as
|
||||||
|
* some data has already been written. But this data is only used by the monitor itself and will be
|
||||||
|
* rewritten next loop. TODO: make versions of log_master_changes() and mon_process_state_changes()
|
||||||
|
* which read from MXS_MONITORED_SERVER->pending_status instead of SERVER->status. Then this locking
|
||||||
|
* and releasing can be moved after failover etc and the user changes can be applied gracefully. */
|
||||||
|
lock_monitor_servers(m_monitor_base);
|
||||||
|
if (m_monitor_base->server_pending_changes)
|
||||||
{
|
{
|
||||||
// Check if any servers should be autojoined to the cluster and try to join them.
|
MXS_WARNING("MaxAdmin or REST API modified the state of a server monitored by '%s' during a "
|
||||||
handle_auto_rejoin();
|
"monitor iteration. Discarding the monitor results and retrying.",
|
||||||
|
m_monitor_base->name);
|
||||||
|
release_monitor_servers(m_monitor_base);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
/* Check if any slave servers have read-only off and turn it on if user so wishes. Again, do not
|
|
||||||
* perform this if cluster has been modified this loop since it may not be clear which server
|
|
||||||
* should be a slave. */
|
|
||||||
if (!config_get_global_options()->passive && m_enforce_read_only_slaves && !m_cluster_modified)
|
|
||||||
{
|
{
|
||||||
enforce_read_only_on_slaves();
|
// Update shared status.
|
||||||
}
|
for (auto mon_srv = m_monitor_base->monitored_servers; mon_srv; mon_srv = mon_srv->next)
|
||||||
|
{
|
||||||
|
auto new_status = mon_srv->pending_status;
|
||||||
|
auto srv = mon_srv->server;
|
||||||
|
srv->status = new_status;
|
||||||
|
srv->status_pending = new_status;
|
||||||
|
}
|
||||||
|
|
||||||
mon_hangup_failed_servers(m_monitor_base);
|
/* Check if monitor events need to be launched. */
|
||||||
servers_status_current_to_pending(m_monitor_base);
|
mon_process_state_changes(m_monitor_base, m_script.c_str(), m_events);
|
||||||
store_server_journal(m_monitor_base, m_master ? m_master->m_server_base : NULL);
|
m_cluster_modified = false;
|
||||||
release_monitor_servers(m_monitor_base);
|
if (m_auto_failover)
|
||||||
|
{
|
||||||
|
m_cluster_modified = handle_auto_failover();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* log master detection failure of first master becomes available after failure */
|
||||||
|
log_master_changes(root_master, &log_no_master);
|
||||||
|
|
||||||
|
// Do not auto-join servers on this monitor loop if a failover (or any other cluster modification)
|
||||||
|
// has been performed, as server states have not been updated yet. It will happen next iteration.
|
||||||
|
if (!config_get_global_options()->passive && m_auto_rejoin && !m_cluster_modified &&
|
||||||
|
cluster_can_be_joined())
|
||||||
|
{
|
||||||
|
// Check if any servers should be autojoined to the cluster and try to join them.
|
||||||
|
handle_auto_rejoin();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if any slave servers have read-only off and turn it on if user so wishes. Again, do not
|
||||||
|
* perform this if cluster has been modified this loop since it may not be clear which server
|
||||||
|
* should be a slave. */
|
||||||
|
if (!config_get_global_options()->passive && m_enforce_read_only_slaves && !m_cluster_modified)
|
||||||
|
{
|
||||||
|
enforce_read_only_on_slaves();
|
||||||
|
}
|
||||||
|
|
||||||
|
mon_hangup_failed_servers(m_monitor_base);
|
||||||
|
store_server_journal(m_monitor_base, m_master ? m_master->m_server_base : NULL);
|
||||||
|
release_monitor_servers(m_monitor_base);
|
||||||
|
}
|
||||||
|
|
||||||
// Check how much the monitor should sleep to get one full monitor interval.
|
// Check how much the monitor should sleep to get one full monitor interval.
|
||||||
timespec loop_end;
|
timespec loop_end;
|
||||||
@ -555,10 +592,10 @@ void MariaDBMonitor::measure_replication_lag(MariaDBServer* root_master)
|
|||||||
{
|
{
|
||||||
MariaDBServer* server = *iter;
|
MariaDBServer* server = *iter;
|
||||||
MXS_MONITORED_SERVER* ptr = server->m_server_base;
|
MXS_MONITORED_SERVER* ptr = server->m_server_base;
|
||||||
if ((!SERVER_IN_MAINT(ptr->server)) && server->is_running())
|
if ((!server->is_in_maintenance()) && server->is_running())
|
||||||
{
|
{
|
||||||
if (ptr->server->node_id != mon_root_master->server->node_id &&
|
if (ptr->server->node_id != mon_root_master->server->node_id &&
|
||||||
(server->is_slave() || SERVER_IS_RELAY_SERVER(ptr->server)) &&
|
(server->is_slave() || server->is_relay_server()) &&
|
||||||
(server->m_version == MariaDBServer::version::MARIADB_MYSQL_55 ||
|
(server->m_version == MariaDBServer::version::MARIADB_MYSQL_55 ||
|
||||||
server->m_version == MariaDBServer::version::MARIADB_100)) // No select lag for Binlog Server
|
server->m_version == MariaDBServer::version::MARIADB_100)) // No select lag for Binlog Server
|
||||||
{
|
{
|
||||||
@ -571,14 +608,13 @@ void MariaDBMonitor::measure_replication_lag(MariaDBServer* root_master)
|
|||||||
void MariaDBMonitor::log_master_changes(MariaDBServer* root_master_server, int* log_no_master)
|
void MariaDBMonitor::log_master_changes(MariaDBServer* root_master_server, int* log_no_master)
|
||||||
{
|
{
|
||||||
MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->m_server_base : NULL;
|
MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->m_server_base : NULL;
|
||||||
if (root_master &&
|
if (root_master && mon_status_changed(root_master) &&
|
||||||
mon_status_changed(root_master) &&
|
!(root_master->pending_status & SERVER_STALE_STATUS))
|
||||||
!(root_master->server->status & SERVER_STALE_STATUS))
|
|
||||||
{
|
{
|
||||||
if (root_master->pending_status & (SERVER_MASTER) && SERVER_IS_RUNNING(root_master->server))
|
if ((root_master->pending_status & SERVER_MASTER) && root_master_server->is_running())
|
||||||
{
|
{
|
||||||
if (!(root_master->mon_prev_status & SERVER_STALE_STATUS) &&
|
if (!(root_master->mon_prev_status & SERVER_STALE_STATUS) &&
|
||||||
!(root_master->server->status & SERVER_MAINT))
|
!(root_master->pending_status & SERVER_MAINT))
|
||||||
{
|
{
|
||||||
MXS_NOTICE("A Master Server is now available: %s:%i",
|
MXS_NOTICE("A Master Server is now available: %s:%i",
|
||||||
root_master->server->address,
|
root_master->server->address,
|
||||||
|
@ -201,7 +201,7 @@ private:
|
|||||||
bool switchover_check(SERVER* new_master, SERVER* current_master,
|
bool switchover_check(SERVER* new_master, SERVER* current_master,
|
||||||
MariaDBServer** new_master_out, MariaDBServer** current_master_out,
|
MariaDBServer** new_master_out, MariaDBServer** current_master_out,
|
||||||
json_t** error_out);
|
json_t** error_out);
|
||||||
bool switchover_check_new(const MXS_MONITORED_SERVER* monitored_server, json_t** error);
|
bool switchover_check_new(const MariaDBServer* new_master_cand, json_t** error);
|
||||||
bool switchover_check_current(const MXS_MONITORED_SERVER* suggested_curr_master,
|
bool switchover_check_current(const MXS_MONITORED_SERVER* suggested_curr_master,
|
||||||
json_t** error_out) const;
|
json_t** error_out) const;
|
||||||
bool do_switchover(MariaDBServer** current_master, MariaDBServer** new_master, json_t** err_out);
|
bool do_switchover(MariaDBServer** current_master, MariaDBServer** new_master, json_t** err_out);
|
||||||
|
@ -391,22 +391,38 @@ bool MariaDBServer::wait_until_gtid(const GtidList& target, int timeout, json_t*
|
|||||||
|
|
||||||
bool MariaDBServer::is_master() const
|
bool MariaDBServer::is_master() const
|
||||||
{
|
{
|
||||||
return SERVER_IS_MASTER(m_server_base->server);
|
// Similar to macro SERVER_IS_MASTER
|
||||||
|
return SRV_MASTER_STATUS(m_server_base->pending_status);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MariaDBServer::is_slave() const
|
bool MariaDBServer::is_slave() const
|
||||||
{
|
{
|
||||||
return SERVER_IS_SLAVE(m_server_base->server);
|
// Similar to macro SERVER_IS_SLAVE
|
||||||
|
return (m_server_base->pending_status & (SERVER_RUNNING | SERVER_SLAVE | SERVER_MAINT)) ==
|
||||||
|
(SERVER_RUNNING | SERVER_SLAVE);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MariaDBServer::is_running() const
|
bool MariaDBServer::is_running() const
|
||||||
{
|
{
|
||||||
return SERVER_IS_RUNNING(m_server_base->server);
|
// Similar to macro SERVER_IS_RUNNING
|
||||||
|
return (m_server_base->pending_status & (SERVER_RUNNING | SERVER_MAINT)) == SERVER_RUNNING;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MariaDBServer::is_down() const
|
bool MariaDBServer::is_down() const
|
||||||
{
|
{
|
||||||
return SERVER_IS_DOWN(m_server_base->server);
|
// Similar to macro SERVER_IS_DOWN
|
||||||
|
return (m_server_base->pending_status & SERVER_RUNNING) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MariaDBServer::is_in_maintenance() const
|
||||||
|
{
|
||||||
|
return m_server_base->pending_status & SERVER_MAINT;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MariaDBServer::is_relay_server() const
|
||||||
|
{
|
||||||
|
return (m_server_base->pending_status & (SERVER_RUNNING | SERVER_MASTER | SERVER_SLAVE | SERVER_MAINT)) ==
|
||||||
|
(SERVER_RUNNING | SERVER_MASTER | SERVER_SLAVE);
|
||||||
}
|
}
|
||||||
|
|
||||||
const char* MariaDBServer::name() const
|
const char* MariaDBServer::name() const
|
||||||
@ -666,21 +682,15 @@ bool MariaDBServer::run_sql_from_file(const string& path, json_t** error_out)
|
|||||||
|
|
||||||
void MariaDBServer::update_server(MXS_MONITOR* base_monitor)
|
void MariaDBServer::update_server(MXS_MONITOR* base_monitor)
|
||||||
{
|
{
|
||||||
/* Backup current status so that it can be compared to the new status. Also, we should be careful when
|
|
||||||
* modifying 'server->status' as routers can read it at any moment. It's best to write to
|
|
||||||
* 'mon_server->pending_status' until the status is final and then write it to 'server->status'. */
|
|
||||||
auto current_status = m_server_base->server->status;
|
|
||||||
m_server_base->mon_prev_status = current_status;
|
|
||||||
m_server_base->pending_status = current_status;
|
|
||||||
|
|
||||||
/* Monitor current node if not in maintenance. */
|
/* Monitor current node if not in maintenance. */
|
||||||
if (!SERVER_IN_MAINT(m_server_base->server))
|
bool in_maintenance = m_server_base->pending_status & SERVER_MAINT;
|
||||||
|
if (!in_maintenance)
|
||||||
{
|
{
|
||||||
monitor_server(base_monitor);
|
monitor_server(base_monitor);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Increase or reset the error count of the server. */
|
/** Increase or reset the error count of the server. */
|
||||||
m_server_base->mon_err_count = (is_down()) ? m_server_base->mon_err_count + 1 : 0;
|
bool is_running = m_server_base->pending_status & SERVER_RUNNING;
|
||||||
|
m_server_base->mon_err_count = (is_running || in_maintenance) ? 0 : m_server_base->mon_err_count + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -693,15 +703,11 @@ void MariaDBServer::monitor_server(MXS_MONITOR* base_monitor)
|
|||||||
MXS_MONITORED_SERVER* mon_srv = m_server_base;
|
MXS_MONITORED_SERVER* mon_srv = m_server_base;
|
||||||
mxs_connect_result_t rval = mon_ping_or_connect_to_db(base_monitor, mon_srv);
|
mxs_connect_result_t rval = mon_ping_or_connect_to_db(base_monitor, mon_srv);
|
||||||
|
|
||||||
SERVER* srv = mon_srv->server;
|
|
||||||
MYSQL* conn = mon_srv->con; // mon_ping_or_connect_to_db() may have reallocated the MYSQL struct.
|
MYSQL* conn = mon_srv->con; // mon_ping_or_connect_to_db() may have reallocated the MYSQL struct.
|
||||||
if (mon_connection_is_ok(rval))
|
if (mon_connection_is_ok(rval))
|
||||||
{
|
{
|
||||||
server_clear_status_nolock(srv, SERVER_AUTH_ERROR);
|
clear_status(SERVER_AUTH_ERROR);
|
||||||
monitor_clear_pending_status(mon_srv, SERVER_AUTH_ERROR);
|
set_status(SERVER_RUNNING);
|
||||||
/* Store current status in both server and monitor server pending struct */
|
|
||||||
server_set_status_nolock(srv, SERVER_RUNNING);
|
|
||||||
monitor_set_pending_status(mon_srv, SERVER_RUNNING);
|
|
||||||
|
|
||||||
if (rval == MONITOR_CONN_NEWCONN_OK)
|
if (rval == MONITOR_CONN_NEWCONN_OK)
|
||||||
{
|
{
|
||||||
@ -712,22 +718,19 @@ void MariaDBServer::monitor_server(MXS_MONITOR* base_monitor)
|
|||||||
{
|
{
|
||||||
/* The current server is not running. Clear all but the stale master bit as it is used to detect
|
/* The current server is not running. Clear all but the stale master bit as it is used to detect
|
||||||
* masters that went down but came up. */
|
* masters that went down but came up. */
|
||||||
unsigned int all_bits = ~SERVER_STALE_STATUS;
|
clear_status(~SERVER_STALE_STATUS);
|
||||||
server_clear_status_nolock(srv, all_bits);
|
auto conn_errno = mysql_errno(conn);
|
||||||
monitor_clear_pending_status(mon_srv, all_bits);
|
if (conn_errno == ER_ACCESS_DENIED_ERROR || conn_errno == ER_ACCESS_DENIED_NO_PASSWORD_ERROR)
|
||||||
|
|
||||||
if (mysql_errno(conn) == ER_ACCESS_DENIED_ERROR)
|
|
||||||
{
|
{
|
||||||
server_set_status_nolock(srv, SERVER_AUTH_ERROR);
|
set_status(SERVER_AUTH_ERROR);
|
||||||
monitor_set_pending_status(mon_srv, SERVER_AUTH_ERROR);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Log connect failure only once */
|
/* Log connect failure only once, that is, if server was RUNNING or MAINTENANCE during last
|
||||||
if (mon_status_changed(mon_srv) && mon_print_fail_status(mon_srv))
|
* iteration. */
|
||||||
|
if (m_server_base->mon_prev_status & (SERVER_RUNNING | SERVER_MAINT))
|
||||||
{
|
{
|
||||||
mon_log_connect_error(mon_srv, rval);
|
mon_log_connect_error(mon_srv, rval);
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -777,8 +780,7 @@ void MariaDBServer::monitor_server(MXS_MONITOR* base_monitor)
|
|||||||
bool MariaDBServer::update_slave_status(string* errmsg_out)
|
bool MariaDBServer::update_slave_status(string* errmsg_out)
|
||||||
{
|
{
|
||||||
/** Clear old states */
|
/** Clear old states */
|
||||||
monitor_clear_pending_status(m_server_base, SERVER_SLAVE | SERVER_MASTER | SERVER_RELAY_MASTER |
|
clear_status(SERVER_SLAVE | SERVER_MASTER | SERVER_RELAY_MASTER | SERVER_SLAVE_OF_EXTERNAL_MASTER);
|
||||||
SERVER_SLAVE_OF_EXTERNAL_MASTER);
|
|
||||||
|
|
||||||
bool rval = false;
|
bool rval = false;
|
||||||
if (do_show_slave_status(errmsg_out))
|
if (do_show_slave_status(errmsg_out))
|
||||||
@ -787,7 +789,7 @@ bool MariaDBServer::update_slave_status(string* errmsg_out)
|
|||||||
/* If all configured slaves are running set this node as slave */
|
/* If all configured slaves are running set this node as slave */
|
||||||
if (m_n_slaves_running > 0 && m_n_slaves_running == m_slave_status.size())
|
if (m_n_slaves_running > 0 && m_n_slaves_running == m_slave_status.size())
|
||||||
{
|
{
|
||||||
monitor_set_pending_status(m_server_base, SERVER_SLAVE);
|
set_status(SERVER_SLAVE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Store master_id of current node. */
|
/** Store master_id of current node. */
|
||||||
@ -840,6 +842,16 @@ void MariaDBServer::update_server_info()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MariaDBServer::clear_status(uint64_t bits)
|
||||||
|
{
|
||||||
|
monitor_clear_pending_status(m_server_base, bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MariaDBServer::set_status(uint64_t bits)
|
||||||
|
{
|
||||||
|
monitor_set_pending_status(m_server_base, bits);
|
||||||
|
}
|
||||||
|
|
||||||
string SlaveStatus::to_string() const
|
string SlaveStatus::to_string() const
|
||||||
{
|
{
|
||||||
using std::setw;
|
using std::setw;
|
||||||
|
@ -205,6 +205,16 @@ public:
|
|||||||
*/
|
*/
|
||||||
bool is_down() const;
|
bool is_down() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience method for SERVER_IN_MAINT
|
||||||
|
*/
|
||||||
|
bool is_in_maintenance() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience method for SERVER_IS_RELAY_SERVER
|
||||||
|
*/
|
||||||
|
bool is_relay_server() const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the server name.
|
* Returns the server name.
|
||||||
*
|
*
|
||||||
@ -299,6 +309,20 @@ public:
|
|||||||
*/
|
*/
|
||||||
void update_server(MXS_MONITOR* base_monitor);
|
void update_server(MXS_MONITOR* base_monitor);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear server pending status flags.
|
||||||
|
*
|
||||||
|
* @param bits Which flags to clear
|
||||||
|
*/
|
||||||
|
void clear_status(uint64_t bits);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set server pending status flags.
|
||||||
|
*
|
||||||
|
* @param bits Which flags to set
|
||||||
|
*/
|
||||||
|
void set_status(uint64_t bits);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void monitor_server(MXS_MONITOR* base_monitor);
|
void monitor_server(MXS_MONITOR* base_monitor);
|
||||||
bool update_slave_status(std::string* errmsg_out = NULL);
|
bool update_slave_status(std::string* errmsg_out = NULL);
|
||||||
|
Reference in New Issue
Block a user