MXS-1775 Monitor loop and monitoring separated
In preparation for moving the monitor loop to MonitorInstance.
This commit is contained in:
parent
6fff5a4f23
commit
2396b555f8
@ -128,16 +128,7 @@ void AuroraMonitor::main()
|
||||
lock_monitor_servers(m_monitor);
|
||||
servers_status_pending_to_current(m_monitor);
|
||||
|
||||
for (MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers; ptr; ptr = ptr->next)
|
||||
{
|
||||
update_server_status(m_monitor, ptr);
|
||||
|
||||
if (SERVER_IS_DOWN(ptr->server))
|
||||
{
|
||||
/** Hang up all DCBs connected to the failed server */
|
||||
dcb_hangup_foreach(ptr->server);
|
||||
}
|
||||
}
|
||||
tick();
|
||||
|
||||
/**
|
||||
* After updating the status of all servers, check if monitor events
|
||||
@ -164,6 +155,20 @@ void AuroraMonitor::main()
|
||||
}
|
||||
}
|
||||
|
||||
void AuroraMonitor::tick()
|
||||
{
|
||||
for (MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers; ptr; ptr = ptr->next)
|
||||
{
|
||||
update_server_status(m_monitor, ptr);
|
||||
|
||||
if (SERVER_IS_DOWN(ptr->server))
|
||||
{
|
||||
/** Hang up all DCBs connected to the failed server */
|
||||
dcb_hangup_foreach(ptr->server);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool AuroraMonitor::has_sufficient_permissions() const
|
||||
{
|
||||
return check_monitor_permissions(m_monitor, "SELECT @@aurora_server_id, server_id FROM "
|
||||
|
@ -37,6 +37,7 @@ private:
|
||||
|
||||
bool has_sufficient_permissions() const;
|
||||
void configure(const MXS_CONFIG_PARAMETER* params);
|
||||
void tick();
|
||||
|
||||
void main();
|
||||
};
|
||||
|
@ -477,110 +477,10 @@ void GaleraMonitor::main()
|
||||
|
||||
nrounds += 1;
|
||||
|
||||
int is_cluster = 0;
|
||||
|
||||
lock_monitor_servers(m_monitor);
|
||||
servers_status_pending_to_current(m_monitor);
|
||||
|
||||
MXS_MONITORED_SERVER* ptr = m_monitor->monitored_servers;
|
||||
while (ptr)
|
||||
{
|
||||
ptr->mon_prev_status = ptr->server->status;
|
||||
|
||||
monitorDatabase(ptr);
|
||||
|
||||
/* Log server status change */
|
||||
if (mon_status_changed(ptr))
|
||||
{
|
||||
MXS_DEBUG("Backend server [%s]:%d state : %s",
|
||||
ptr->server->address,
|
||||
ptr->server->port,
|
||||
STRSRVSTATUS(ptr->server));
|
||||
}
|
||||
|
||||
if (SERVER_IS_DOWN(ptr->server))
|
||||
{
|
||||
/** Increase this server'e error count */
|
||||
ptr->mon_err_count += 1;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
/** Reset this server's error count */
|
||||
ptr->mon_err_count = 0;
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
/* Try to set a Galera cluster based on
|
||||
* UUID and cluster_size each node reports:
|
||||
* no multiple clusters UUID are allowed.
|
||||
*/
|
||||
set_galera_cluster();
|
||||
|
||||
/*
|
||||
* Let's select a master server:
|
||||
* it could be the candidate master following MXS_MIN(node_id) rule or
|
||||
* the server that was master in the previous monitor polling cycle
|
||||
* Decision depends on master_stickiness value set in configuration
|
||||
*/
|
||||
|
||||
/* get the candidate master, following MXS_MIN(node_id) rule */
|
||||
MXS_MONITORED_SERVER *candidate_master = get_candidate_master();
|
||||
|
||||
m_master = set_cluster_master(m_master, candidate_master, m_disableMasterFailback);
|
||||
|
||||
ptr = m_monitor->monitored_servers;
|
||||
|
||||
while (ptr)
|
||||
{
|
||||
const int repl_bits = (SERVER_SLAVE | SERVER_MASTER | SERVER_MASTER_STICKINESS);
|
||||
if (SERVER_IS_JOINED(ptr->server) && !m_disableMasterRoleSetting)
|
||||
{
|
||||
if (ptr != m_master)
|
||||
{
|
||||
/* set the Slave role and clear master stickiness */
|
||||
server_clear_set_status(ptr->server, repl_bits, SERVER_SLAVE);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (candidate_master &&
|
||||
m_master->server->node_id != candidate_master->server->node_id)
|
||||
{
|
||||
/* set master role and master stickiness */
|
||||
server_clear_set_status(ptr->server, repl_bits,
|
||||
(SERVER_MASTER | SERVER_MASTER_STICKINESS));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* set master role and clear master stickiness */
|
||||
server_clear_set_status(ptr->server, repl_bits, SERVER_MASTER);
|
||||
}
|
||||
}
|
||||
|
||||
is_cluster++;
|
||||
}
|
||||
else
|
||||
{
|
||||
server_clear_set_status(ptr->server, repl_bits, 0);
|
||||
}
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if (is_cluster == 0 && m_log_no_members)
|
||||
{
|
||||
MXS_ERROR("There are no cluster members");
|
||||
m_log_no_members = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_cluster > 0 && m_log_no_members == 0)
|
||||
{
|
||||
MXS_NOTICE("Found cluster members");
|
||||
m_log_no_members = true;
|
||||
}
|
||||
}
|
||||
tick();
|
||||
|
||||
/**
|
||||
* After updating the status of all servers, check if monitor events
|
||||
@ -592,19 +492,124 @@ void GaleraMonitor::main()
|
||||
|
||||
servers_status_current_to_pending(m_monitor);
|
||||
|
||||
/* Set the global var "wsrep_sst_donor"
|
||||
* with a sorted list of "wsrep_node_name" for slave nodes
|
||||
*/
|
||||
if (m_set_donor_nodes)
|
||||
{
|
||||
update_sst_donor_nodes(is_cluster);
|
||||
}
|
||||
|
||||
store_server_journal(m_monitor, NULL);
|
||||
release_monitor_servers(m_monitor);
|
||||
}
|
||||
}
|
||||
|
||||
void GaleraMonitor::tick()
|
||||
{
|
||||
int is_cluster = 0;
|
||||
|
||||
MXS_MONITORED_SERVER* ptr = m_monitor->monitored_servers;
|
||||
while (ptr)
|
||||
{
|
||||
ptr->mon_prev_status = ptr->server->status;
|
||||
|
||||
monitorDatabase(ptr);
|
||||
|
||||
/* Log server status change */
|
||||
if (mon_status_changed(ptr))
|
||||
{
|
||||
MXS_DEBUG("Backend server [%s]:%d state : %s",
|
||||
ptr->server->address,
|
||||
ptr->server->port,
|
||||
STRSRVSTATUS(ptr->server));
|
||||
}
|
||||
|
||||
if (SERVER_IS_DOWN(ptr->server))
|
||||
{
|
||||
/** Increase this server'e error count */
|
||||
ptr->mon_err_count += 1;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
/** Reset this server's error count */
|
||||
ptr->mon_err_count = 0;
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
/* Try to set a Galera cluster based on
|
||||
* UUID and cluster_size each node reports:
|
||||
* no multiple clusters UUID are allowed.
|
||||
*/
|
||||
set_galera_cluster();
|
||||
|
||||
/*
|
||||
* Let's select a master server:
|
||||
* it could be the candidate master following MXS_MIN(node_id) rule or
|
||||
* the server that was master in the previous monitor polling cycle
|
||||
* Decision depends on master_stickiness value set in configuration
|
||||
*/
|
||||
|
||||
/* get the candidate master, following MXS_MIN(node_id) rule */
|
||||
MXS_MONITORED_SERVER *candidate_master = get_candidate_master();
|
||||
|
||||
m_master = set_cluster_master(m_master, candidate_master, m_disableMasterFailback);
|
||||
|
||||
ptr = m_monitor->monitored_servers;
|
||||
|
||||
while (ptr)
|
||||
{
|
||||
const int repl_bits = (SERVER_SLAVE | SERVER_MASTER | SERVER_MASTER_STICKINESS);
|
||||
if (SERVER_IS_JOINED(ptr->server) && !m_disableMasterRoleSetting)
|
||||
{
|
||||
if (ptr != m_master)
|
||||
{
|
||||
/* set the Slave role and clear master stickiness */
|
||||
server_clear_set_status(ptr->server, repl_bits, SERVER_SLAVE);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (candidate_master &&
|
||||
m_master->server->node_id != candidate_master->server->node_id)
|
||||
{
|
||||
/* set master role and master stickiness */
|
||||
server_clear_set_status(ptr->server, repl_bits,
|
||||
(SERVER_MASTER | SERVER_MASTER_STICKINESS));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* set master role and clear master stickiness */
|
||||
server_clear_set_status(ptr->server, repl_bits, SERVER_MASTER);
|
||||
}
|
||||
}
|
||||
|
||||
is_cluster++;
|
||||
}
|
||||
else
|
||||
{
|
||||
server_clear_set_status(ptr->server, repl_bits, 0);
|
||||
}
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if (is_cluster == 0 && m_log_no_members)
|
||||
{
|
||||
MXS_ERROR("There are no cluster members");
|
||||
m_log_no_members = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_cluster > 0 && m_log_no_members == 0)
|
||||
{
|
||||
MXS_NOTICE("Found cluster members");
|
||||
m_log_no_members = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Set the global var "wsrep_sst_donor"
|
||||
* with a sorted list of "wsrep_node_name" for slave nodes
|
||||
*/
|
||||
if (m_set_donor_nodes)
|
||||
{
|
||||
update_sst_donor_nodes(is_cluster);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* get candidate master from all nodes
|
||||
*
|
||||
|
@ -93,6 +93,7 @@ private:
|
||||
|
||||
bool has_sufficient_permissions() const;
|
||||
void configure(const MXS_CONFIG_PARAMETER* param);
|
||||
void tick();
|
||||
|
||||
void main();
|
||||
};
|
||||
|
@ -192,12 +192,8 @@ void GRMon::main()
|
||||
lock_monitor_servers(m_monitor);
|
||||
servers_status_pending_to_current(m_monitor);
|
||||
|
||||
for (MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers; ptr; ptr = ptr->next)
|
||||
{
|
||||
update_server_status(m_monitor, ptr);
|
||||
}
|
||||
tick();
|
||||
|
||||
mon_hangup_failed_servers(m_monitor);
|
||||
/**
|
||||
* After updating the status of all servers, check if monitor events
|
||||
* need to be launched.
|
||||
@ -206,6 +202,7 @@ void GRMon::main()
|
||||
m_script.empty() ? NULL : m_script.c_str(),
|
||||
m_events);
|
||||
|
||||
mon_hangup_failed_servers(m_monitor);
|
||||
servers_status_current_to_pending(m_monitor);
|
||||
store_server_journal(m_monitor, NULL);
|
||||
release_monitor_servers(m_monitor);
|
||||
@ -225,6 +222,14 @@ void GRMon::main()
|
||||
}
|
||||
}
|
||||
|
||||
void GRMon::tick()
|
||||
{
|
||||
for (MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers; ptr; ptr = ptr->next)
|
||||
{
|
||||
update_server_status(m_monitor, ptr);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The module entry point routine. It is this routine that
|
||||
* must populate the structure that is referred to as the
|
||||
|
@ -39,6 +39,7 @@ private:
|
||||
|
||||
bool has_sufficient_permissions() const;
|
||||
void configure(const MXS_CONFIG_PARAMETER* params);
|
||||
void tick();
|
||||
|
||||
void main();
|
||||
};
|
||||
|
@ -467,69 +467,7 @@ void MMMonitor::main()
|
||||
lock_monitor_servers(m_monitor);
|
||||
servers_status_pending_to_current(m_monitor);
|
||||
|
||||
/* start from the first server in the list */
|
||||
MXS_MONITORED_SERVER* ptr = m_monitor->monitored_servers;
|
||||
|
||||
while (ptr)
|
||||
{
|
||||
/* copy server status into monitor pending_status */
|
||||
ptr->pending_status = ptr->server->status;
|
||||
|
||||
/* monitor current node */
|
||||
monitorDatabase(m_monitor, ptr);
|
||||
|
||||
if (mon_status_changed(ptr) ||
|
||||
mon_print_fail_status(ptr))
|
||||
{
|
||||
MXS_DEBUG("Backend server [%s]:%d state : %s",
|
||||
ptr->server->address,
|
||||
ptr->server->port,
|
||||
STRSRVSTATUS(ptr->server));
|
||||
}
|
||||
if (SERVER_IS_DOWN(ptr->server))
|
||||
{
|
||||
/** Increase this server'e error count */
|
||||
ptr->mon_err_count += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/** Reset this server's error count */
|
||||
ptr->mon_err_count = 0;
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
/* Get Master server pointer */
|
||||
MXS_MONITORED_SERVER *root_master = get_current_master();
|
||||
|
||||
/* Update server status from monitor pending status on that server*/
|
||||
|
||||
ptr = m_monitor->monitored_servers;
|
||||
while (ptr)
|
||||
{
|
||||
if (!SERVER_IN_MAINT(ptr->server))
|
||||
{
|
||||
/* If "detect_stale_master" option is On, let's use the previus master */
|
||||
if (m_detectStaleMaster && root_master &&
|
||||
(!strcmp(ptr->server->address, root_master->server->address) &&
|
||||
ptr->server->port == root_master->server->port) && (ptr->server->status & SERVER_MASTER) &&
|
||||
!(ptr->pending_status & SERVER_MASTER))
|
||||
{
|
||||
/* in this case server->status will not be updated from pending_status */
|
||||
MXS_NOTICE("root server [%s:%i] is no longer Master, let's "
|
||||
"use it again even if it could be a stale master, you have "
|
||||
"been warned!", ptr->server->address, ptr->server->port);
|
||||
/* Set the STALE bit for this server in server struct */
|
||||
server_set_status_nolock(ptr->server, SERVER_STALE_STATUS);
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr->server->status = ptr->pending_status;
|
||||
}
|
||||
}
|
||||
ptr = ptr->next;
|
||||
}
|
||||
tick();
|
||||
|
||||
/**
|
||||
* After updating the status of all servers, check if monitor events
|
||||
@ -544,6 +482,73 @@ void MMMonitor::main()
|
||||
}
|
||||
}
|
||||
|
||||
void MMMonitor::tick()
|
||||
{
|
||||
/* start from the first server in the list */
|
||||
MXS_MONITORED_SERVER* ptr = m_monitor->monitored_servers;
|
||||
|
||||
while (ptr)
|
||||
{
|
||||
/* copy server status into monitor pending_status */
|
||||
ptr->pending_status = ptr->server->status;
|
||||
|
||||
/* monitor current node */
|
||||
monitorDatabase(m_monitor, ptr);
|
||||
|
||||
if (mon_status_changed(ptr) ||
|
||||
mon_print_fail_status(ptr))
|
||||
{
|
||||
MXS_DEBUG("Backend server [%s]:%d state : %s",
|
||||
ptr->server->address,
|
||||
ptr->server->port,
|
||||
STRSRVSTATUS(ptr->server));
|
||||
}
|
||||
if (SERVER_IS_DOWN(ptr->server))
|
||||
{
|
||||
/** Increase this server'e error count */
|
||||
ptr->mon_err_count += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/** Reset this server's error count */
|
||||
ptr->mon_err_count = 0;
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
/* Get Master server pointer */
|
||||
MXS_MONITORED_SERVER *root_master = get_current_master();
|
||||
|
||||
/* Update server status from monitor pending status on that server*/
|
||||
|
||||
ptr = m_monitor->monitored_servers;
|
||||
while (ptr)
|
||||
{
|
||||
if (!SERVER_IN_MAINT(ptr->server))
|
||||
{
|
||||
/* If "detect_stale_master" option is On, let's use the previus master */
|
||||
if (m_detectStaleMaster && root_master &&
|
||||
(!strcmp(ptr->server->address, root_master->server->address) &&
|
||||
ptr->server->port == root_master->server->port) && (ptr->server->status & SERVER_MASTER) &&
|
||||
!(ptr->pending_status & SERVER_MASTER))
|
||||
{
|
||||
/* in this case server->status will not be updated from pending_status */
|
||||
MXS_NOTICE("root server [%s:%i] is no longer Master, let's "
|
||||
"use it again even if it could be a stale master, you have "
|
||||
"been warned!", ptr->server->address, ptr->server->port);
|
||||
/* Set the STALE bit for this server in server struct */
|
||||
server_set_status_nolock(ptr->server, SERVER_STALE_STATUS);
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr->server->status = ptr->pending_status;
|
||||
}
|
||||
}
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable/Disable the MySQL Replication Stale Master dectection, allowing a previouvsly detected master to still act as a Master.
|
||||
* This option must be enabled in order to keep the Master when the replication is stopped or removed from slaves.
|
||||
|
@ -43,6 +43,7 @@ private:
|
||||
|
||||
bool has_sufficient_permissions() const;
|
||||
void configure(const MXS_CONFIG_PARAMETER* params);
|
||||
void tick();
|
||||
|
||||
void main();
|
||||
};
|
||||
|
@ -272,23 +272,7 @@ void NDBCMonitor::main()
|
||||
lock_monitor_servers(m_monitor);
|
||||
servers_status_pending_to_current(m_monitor);
|
||||
|
||||
MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers;
|
||||
while (ptr)
|
||||
{
|
||||
ptr->mon_prev_status = ptr->server->status;
|
||||
monitorDatabase(ptr, m_monitor->user, m_monitor->password, m_monitor);
|
||||
|
||||
if (ptr->server->status != ptr->mon_prev_status ||
|
||||
SERVER_IS_DOWN(ptr->server))
|
||||
{
|
||||
MXS_DEBUG("Backend server [%s]:%d state : %s",
|
||||
ptr->server->address,
|
||||
ptr->server->port,
|
||||
STRSRVSTATUS(ptr->server));
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
tick();
|
||||
|
||||
/**
|
||||
* After updating the status of all servers, check if monitor events
|
||||
@ -302,3 +286,24 @@ void NDBCMonitor::main()
|
||||
release_monitor_servers(m_monitor);
|
||||
}
|
||||
}
|
||||
|
||||
void NDBCMonitor::tick()
|
||||
{
|
||||
MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers;
|
||||
while (ptr)
|
||||
{
|
||||
ptr->mon_prev_status = ptr->server->status;
|
||||
monitorDatabase(ptr, m_monitor->user, m_monitor->password, m_monitor);
|
||||
|
||||
if (ptr->server->status != ptr->mon_prev_status ||
|
||||
SERVER_IS_DOWN(ptr->server))
|
||||
{
|
||||
MXS_DEBUG("Backend server [%s]:%d state : %s",
|
||||
ptr->server->address,
|
||||
ptr->server->port,
|
||||
STRSRVSTATUS(ptr->server));
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
|
@ -40,6 +40,7 @@ private:
|
||||
|
||||
bool has_sufficient_permissions() const;
|
||||
void configure(const MXS_CONFIG_PARAMETER* params);
|
||||
void tick();
|
||||
|
||||
void main();
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user