MXS-1775 Monitor loop and monitoring separated

In preparation for moving the monitor loop to MonitorInstance.
This commit is contained in:
Johan Wikman 2018-05-16 15:28:49 +03:00
parent 6fff5a4f23
commit 2396b555f8
10 changed files with 234 additions and 204 deletions

View File

@ -128,16 +128,7 @@ void AuroraMonitor::main()
lock_monitor_servers(m_monitor);
servers_status_pending_to_current(m_monitor);
for (MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers; ptr; ptr = ptr->next)
{
update_server_status(m_monitor, ptr);
if (SERVER_IS_DOWN(ptr->server))
{
/** Hang up all DCBs connected to the failed server */
dcb_hangup_foreach(ptr->server);
}
}
tick();
/**
* After updating the status of all servers, check if monitor events
@ -164,6 +155,20 @@ void AuroraMonitor::main()
}
}
void AuroraMonitor::tick()
{
for (MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers; ptr; ptr = ptr->next)
{
update_server_status(m_monitor, ptr);
if (SERVER_IS_DOWN(ptr->server))
{
/** Hang up all DCBs connected to the failed server */
dcb_hangup_foreach(ptr->server);
}
}
}
bool AuroraMonitor::has_sufficient_permissions() const
{
return check_monitor_permissions(m_monitor, "SELECT @@aurora_server_id, server_id FROM "

View File

@ -37,6 +37,7 @@ private:
bool has_sufficient_permissions() const;
void configure(const MXS_CONFIG_PARAMETER* params);
void tick();
void main();
};

View File

@ -477,110 +477,10 @@ void GaleraMonitor::main()
nrounds += 1;
int is_cluster = 0;
lock_monitor_servers(m_monitor);
servers_status_pending_to_current(m_monitor);
MXS_MONITORED_SERVER* ptr = m_monitor->monitored_servers;
while (ptr)
{
ptr->mon_prev_status = ptr->server->status;
monitorDatabase(ptr);
/* Log server status change */
if (mon_status_changed(ptr))
{
MXS_DEBUG("Backend server [%s]:%d state : %s",
ptr->server->address,
ptr->server->port,
STRSRVSTATUS(ptr->server));
}
if (SERVER_IS_DOWN(ptr->server))
{
/** Increase this server'e error count */
ptr->mon_err_count += 1;
}
else
{
/** Reset this server's error count */
ptr->mon_err_count = 0;
}
ptr = ptr->next;
}
/* Try to set a Galera cluster based on
* UUID and cluster_size each node reports:
* no multiple clusters UUID are allowed.
*/
set_galera_cluster();
/*
* Let's select a master server:
* it could be the candidate master following MXS_MIN(node_id) rule or
* the server that was master in the previous monitor polling cycle
* Decision depends on master_stickiness value set in configuration
*/
/* get the candidate master, following MXS_MIN(node_id) rule */
MXS_MONITORED_SERVER *candidate_master = get_candidate_master();
m_master = set_cluster_master(m_master, candidate_master, m_disableMasterFailback);
ptr = m_monitor->monitored_servers;
while (ptr)
{
const int repl_bits = (SERVER_SLAVE | SERVER_MASTER | SERVER_MASTER_STICKINESS);
if (SERVER_IS_JOINED(ptr->server) && !m_disableMasterRoleSetting)
{
if (ptr != m_master)
{
/* set the Slave role and clear master stickiness */
server_clear_set_status(ptr->server, repl_bits, SERVER_SLAVE);
}
else
{
if (candidate_master &&
m_master->server->node_id != candidate_master->server->node_id)
{
/* set master role and master stickiness */
server_clear_set_status(ptr->server, repl_bits,
(SERVER_MASTER | SERVER_MASTER_STICKINESS));
}
else
{
/* set master role and clear master stickiness */
server_clear_set_status(ptr->server, repl_bits, SERVER_MASTER);
}
}
is_cluster++;
}
else
{
server_clear_set_status(ptr->server, repl_bits, 0);
}
ptr = ptr->next;
}
if (is_cluster == 0 && m_log_no_members)
{
MXS_ERROR("There are no cluster members");
m_log_no_members = false;
}
else
{
if (is_cluster > 0 && m_log_no_members == 0)
{
MXS_NOTICE("Found cluster members");
m_log_no_members = true;
}
}
tick();
/**
* After updating the status of all servers, check if monitor events
@ -592,19 +492,124 @@ void GaleraMonitor::main()
servers_status_current_to_pending(m_monitor);
/* Set the global var "wsrep_sst_donor"
* with a sorted list of "wsrep_node_name" for slave nodes
*/
if (m_set_donor_nodes)
{
update_sst_donor_nodes(is_cluster);
}
store_server_journal(m_monitor, NULL);
release_monitor_servers(m_monitor);
}
}
void GaleraMonitor::tick()
{
int is_cluster = 0;
MXS_MONITORED_SERVER* ptr = m_monitor->monitored_servers;
while (ptr)
{
ptr->mon_prev_status = ptr->server->status;
monitorDatabase(ptr);
/* Log server status change */
if (mon_status_changed(ptr))
{
MXS_DEBUG("Backend server [%s]:%d state : %s",
ptr->server->address,
ptr->server->port,
STRSRVSTATUS(ptr->server));
}
if (SERVER_IS_DOWN(ptr->server))
{
/** Increase this server'e error count */
ptr->mon_err_count += 1;
}
else
{
/** Reset this server's error count */
ptr->mon_err_count = 0;
}
ptr = ptr->next;
}
/* Try to set a Galera cluster based on
* UUID and cluster_size each node reports:
* no multiple clusters UUID are allowed.
*/
set_galera_cluster();
/*
* Let's select a master server:
* it could be the candidate master following MXS_MIN(node_id) rule or
* the server that was master in the previous monitor polling cycle
* Decision depends on master_stickiness value set in configuration
*/
/* get the candidate master, following MXS_MIN(node_id) rule */
MXS_MONITORED_SERVER *candidate_master = get_candidate_master();
m_master = set_cluster_master(m_master, candidate_master, m_disableMasterFailback);
ptr = m_monitor->monitored_servers;
while (ptr)
{
const int repl_bits = (SERVER_SLAVE | SERVER_MASTER | SERVER_MASTER_STICKINESS);
if (SERVER_IS_JOINED(ptr->server) && !m_disableMasterRoleSetting)
{
if (ptr != m_master)
{
/* set the Slave role and clear master stickiness */
server_clear_set_status(ptr->server, repl_bits, SERVER_SLAVE);
}
else
{
if (candidate_master &&
m_master->server->node_id != candidate_master->server->node_id)
{
/* set master role and master stickiness */
server_clear_set_status(ptr->server, repl_bits,
(SERVER_MASTER | SERVER_MASTER_STICKINESS));
}
else
{
/* set master role and clear master stickiness */
server_clear_set_status(ptr->server, repl_bits, SERVER_MASTER);
}
}
is_cluster++;
}
else
{
server_clear_set_status(ptr->server, repl_bits, 0);
}
ptr = ptr->next;
}
if (is_cluster == 0 && m_log_no_members)
{
MXS_ERROR("There are no cluster members");
m_log_no_members = false;
}
else
{
if (is_cluster > 0 && m_log_no_members == 0)
{
MXS_NOTICE("Found cluster members");
m_log_no_members = true;
}
}
/* Set the global var "wsrep_sst_donor"
* with a sorted list of "wsrep_node_name" for slave nodes
*/
if (m_set_donor_nodes)
{
update_sst_donor_nodes(is_cluster);
}
}
/**
* get candidate master from all nodes
*

View File

@ -93,6 +93,7 @@ private:
bool has_sufficient_permissions() const;
void configure(const MXS_CONFIG_PARAMETER* param);
void tick();
void main();
};

View File

@ -192,12 +192,8 @@ void GRMon::main()
lock_monitor_servers(m_monitor);
servers_status_pending_to_current(m_monitor);
for (MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers; ptr; ptr = ptr->next)
{
update_server_status(m_monitor, ptr);
}
tick();
mon_hangup_failed_servers(m_monitor);
/**
* After updating the status of all servers, check if monitor events
* need to be launched.
@ -206,6 +202,7 @@ void GRMon::main()
m_script.empty() ? NULL : m_script.c_str(),
m_events);
mon_hangup_failed_servers(m_monitor);
servers_status_current_to_pending(m_monitor);
store_server_journal(m_monitor, NULL);
release_monitor_servers(m_monitor);
@ -225,6 +222,14 @@ void GRMon::main()
}
}
void GRMon::tick()
{
for (MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers; ptr; ptr = ptr->next)
{
update_server_status(m_monitor, ptr);
}
}
/**
* The module entry point routine. It is this routine that
* must populate the structure that is referred to as the

View File

@ -39,6 +39,7 @@ private:
bool has_sufficient_permissions() const;
void configure(const MXS_CONFIG_PARAMETER* params);
void tick();
void main();
};

View File

@ -467,69 +467,7 @@ void MMMonitor::main()
lock_monitor_servers(m_monitor);
servers_status_pending_to_current(m_monitor);
/* start from the first server in the list */
MXS_MONITORED_SERVER* ptr = m_monitor->monitored_servers;
while (ptr)
{
/* copy server status into monitor pending_status */
ptr->pending_status = ptr->server->status;
/* monitor current node */
monitorDatabase(m_monitor, ptr);
if (mon_status_changed(ptr) ||
mon_print_fail_status(ptr))
{
MXS_DEBUG("Backend server [%s]:%d state : %s",
ptr->server->address,
ptr->server->port,
STRSRVSTATUS(ptr->server));
}
if (SERVER_IS_DOWN(ptr->server))
{
/** Increase this server'e error count */
ptr->mon_err_count += 1;
}
else
{
/** Reset this server's error count */
ptr->mon_err_count = 0;
}
ptr = ptr->next;
}
/* Get Master server pointer */
MXS_MONITORED_SERVER *root_master = get_current_master();
/* Update server status from monitor pending status on that server*/
ptr = m_monitor->monitored_servers;
while (ptr)
{
if (!SERVER_IN_MAINT(ptr->server))
{
/* If "detect_stale_master" option is On, let's use the previus master */
if (m_detectStaleMaster && root_master &&
(!strcmp(ptr->server->address, root_master->server->address) &&
ptr->server->port == root_master->server->port) && (ptr->server->status & SERVER_MASTER) &&
!(ptr->pending_status & SERVER_MASTER))
{
/* in this case server->status will not be updated from pending_status */
MXS_NOTICE("root server [%s:%i] is no longer Master, let's "
"use it again even if it could be a stale master, you have "
"been warned!", ptr->server->address, ptr->server->port);
/* Set the STALE bit for this server in server struct */
server_set_status_nolock(ptr->server, SERVER_STALE_STATUS);
}
else
{
ptr->server->status = ptr->pending_status;
}
}
ptr = ptr->next;
}
tick();
/**
* After updating the status of all servers, check if monitor events
@ -544,6 +482,73 @@ void MMMonitor::main()
}
}
void MMMonitor::tick()
{
/* start from the first server in the list */
MXS_MONITORED_SERVER* ptr = m_monitor->monitored_servers;
while (ptr)
{
/* copy server status into monitor pending_status */
ptr->pending_status = ptr->server->status;
/* monitor current node */
monitorDatabase(m_monitor, ptr);
if (mon_status_changed(ptr) ||
mon_print_fail_status(ptr))
{
MXS_DEBUG("Backend server [%s]:%d state : %s",
ptr->server->address,
ptr->server->port,
STRSRVSTATUS(ptr->server));
}
if (SERVER_IS_DOWN(ptr->server))
{
/** Increase this server'e error count */
ptr->mon_err_count += 1;
}
else
{
/** Reset this server's error count */
ptr->mon_err_count = 0;
}
ptr = ptr->next;
}
/* Get Master server pointer */
MXS_MONITORED_SERVER *root_master = get_current_master();
/* Update server status from monitor pending status on that server*/
ptr = m_monitor->monitored_servers;
while (ptr)
{
if (!SERVER_IN_MAINT(ptr->server))
{
/* If "detect_stale_master" option is On, let's use the previus master */
if (m_detectStaleMaster && root_master &&
(!strcmp(ptr->server->address, root_master->server->address) &&
ptr->server->port == root_master->server->port) && (ptr->server->status & SERVER_MASTER) &&
!(ptr->pending_status & SERVER_MASTER))
{
/* in this case server->status will not be updated from pending_status */
MXS_NOTICE("root server [%s:%i] is no longer Master, let's "
"use it again even if it could be a stale master, you have "
"been warned!", ptr->server->address, ptr->server->port);
/* Set the STALE bit for this server in server struct */
server_set_status_nolock(ptr->server, SERVER_STALE_STATUS);
}
else
{
ptr->server->status = ptr->pending_status;
}
}
ptr = ptr->next;
}
}
/**
* Enable/Disable the MySQL Replication Stale Master dectection, allowing a previouvsly detected master to still act as a Master.
* This option must be enabled in order to keep the Master when the replication is stopped or removed from slaves.

View File

@ -43,6 +43,7 @@ private:
bool has_sufficient_permissions() const;
void configure(const MXS_CONFIG_PARAMETER* params);
void tick();
void main();
};

View File

@ -272,23 +272,7 @@ void NDBCMonitor::main()
lock_monitor_servers(m_monitor);
servers_status_pending_to_current(m_monitor);
MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers;
while (ptr)
{
ptr->mon_prev_status = ptr->server->status;
monitorDatabase(ptr, m_monitor->user, m_monitor->password, m_monitor);
if (ptr->server->status != ptr->mon_prev_status ||
SERVER_IS_DOWN(ptr->server))
{
MXS_DEBUG("Backend server [%s]:%d state : %s",
ptr->server->address,
ptr->server->port,
STRSRVSTATUS(ptr->server));
}
ptr = ptr->next;
}
tick();
/**
* After updating the status of all servers, check if monitor events
@ -302,3 +286,24 @@ void NDBCMonitor::main()
release_monitor_servers(m_monitor);
}
}
void NDBCMonitor::tick()
{
MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers;
while (ptr)
{
ptr->mon_prev_status = ptr->server->status;
monitorDatabase(ptr, m_monitor->user, m_monitor->password, m_monitor);
if (ptr->server->status != ptr->mon_prev_status ||
SERVER_IS_DOWN(ptr->server))
{
MXS_DEBUG("Backend server [%s]:%d state : %s",
ptr->server->address,
ptr->server->port,
STRSRVSTATUS(ptr->server));
}
ptr = ptr->next;
}
}

View File

@ -40,6 +40,7 @@ private:
bool has_sufficient_permissions() const;
void configure(const MXS_CONFIG_PARAMETER* params);
void tick();
void main();
};