From 2396b555f8acf60315bbd3372141c7657fcdd4b6 Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Wed, 16 May 2018 15:28:49 +0300 Subject: [PATCH] MXS-1775 Monitor loop and monitoring separated In preparation for moving the monitor loop to MonitorInstance. --- server/modules/monitor/auroramon/auroramon.cc | 25 +- server/modules/monitor/auroramon/auroramon.hh | 1 + server/modules/monitor/galeramon/galeramon.cc | 223 +++++++++--------- server/modules/monitor/galeramon/galeramon.hh | 1 + server/modules/monitor/grmon/grmon.cc | 15 +- server/modules/monitor/grmon/grmon.hh | 1 + server/modules/monitor/mmmon/mmmon.cc | 131 +++++----- server/modules/monitor/mmmon/mmmon.hh | 1 + .../monitor/ndbclustermon/ndbclustermon.cc | 39 +-- .../monitor/ndbclustermon/ndbclustermon.hh | 1 + 10 files changed, 234 insertions(+), 204 deletions(-) diff --git a/server/modules/monitor/auroramon/auroramon.cc b/server/modules/monitor/auroramon/auroramon.cc index 633640305..b31663f95 100644 --- a/server/modules/monitor/auroramon/auroramon.cc +++ b/server/modules/monitor/auroramon/auroramon.cc @@ -128,16 +128,7 @@ void AuroraMonitor::main() lock_monitor_servers(m_monitor); servers_status_pending_to_current(m_monitor); - for (MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers; ptr; ptr = ptr->next) - { - update_server_status(m_monitor, ptr); - - if (SERVER_IS_DOWN(ptr->server)) - { - /** Hang up all DCBs connected to the failed server */ - dcb_hangup_foreach(ptr->server); - } - } + tick(); /** * After updating the status of all servers, check if monitor events @@ -164,6 +155,20 @@ void AuroraMonitor::main() } } +void AuroraMonitor::tick() +{ + for (MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers; ptr; ptr = ptr->next) + { + update_server_status(m_monitor, ptr); + + if (SERVER_IS_DOWN(ptr->server)) + { + /** Hang up all DCBs connected to the failed server */ + dcb_hangup_foreach(ptr->server); + } + } +} + bool AuroraMonitor::has_sufficient_permissions() const { return check_monitor_permissions(m_monitor, "SELECT @@aurora_server_id, server_id FROM " diff --git a/server/modules/monitor/auroramon/auroramon.hh b/server/modules/monitor/auroramon/auroramon.hh index a7d24ca4f..ea99d6c69 100644 --- a/server/modules/monitor/auroramon/auroramon.hh +++ b/server/modules/monitor/auroramon/auroramon.hh @@ -37,6 +37,7 @@ private: bool has_sufficient_permissions() const; void configure(const MXS_CONFIG_PARAMETER* params); + void tick(); void main(); }; diff --git a/server/modules/monitor/galeramon/galeramon.cc b/server/modules/monitor/galeramon/galeramon.cc index 9367e6290..c974c836e 100644 --- a/server/modules/monitor/galeramon/galeramon.cc +++ b/server/modules/monitor/galeramon/galeramon.cc @@ -477,110 +477,10 @@ void GaleraMonitor::main() nrounds += 1; - int is_cluster = 0; - lock_monitor_servers(m_monitor); servers_status_pending_to_current(m_monitor); - MXS_MONITORED_SERVER* ptr = m_monitor->monitored_servers; - while (ptr) - { - ptr->mon_prev_status = ptr->server->status; - - monitorDatabase(ptr); - - /* Log server status change */ - if (mon_status_changed(ptr)) - { - MXS_DEBUG("Backend server [%s]:%d state : %s", - ptr->server->address, - ptr->server->port, - STRSRVSTATUS(ptr->server)); - } - - if (SERVER_IS_DOWN(ptr->server)) - { - /** Increase this server'e error count */ - ptr->mon_err_count += 1; - - } - else - { - /** Reset this server's error count */ - ptr->mon_err_count = 0; - } - - ptr = ptr->next; - } - - /* Try to set a Galera cluster based on - * UUID and cluster_size each node reports: - * no multiple clusters UUID are allowed. - */ - set_galera_cluster(); - - /* - * Let's select a master server: - * it could be the candidate master following MXS_MIN(node_id) rule or - * the server that was master in the previous monitor polling cycle - * Decision depends on master_stickiness value set in configuration - */ - - /* get the candidate master, following MXS_MIN(node_id) rule */ - MXS_MONITORED_SERVER *candidate_master = get_candidate_master(); - - m_master = set_cluster_master(m_master, candidate_master, m_disableMasterFailback); - - ptr = m_monitor->monitored_servers; - - while (ptr) - { - const int repl_bits = (SERVER_SLAVE | SERVER_MASTER | SERVER_MASTER_STICKINESS); - if (SERVER_IS_JOINED(ptr->server) && !m_disableMasterRoleSetting) - { - if (ptr != m_master) - { - /* set the Slave role and clear master stickiness */ - server_clear_set_status(ptr->server, repl_bits, SERVER_SLAVE); - } - else - { - if (candidate_master && - m_master->server->node_id != candidate_master->server->node_id) - { - /* set master role and master stickiness */ - server_clear_set_status(ptr->server, repl_bits, - (SERVER_MASTER | SERVER_MASTER_STICKINESS)); - } - else - { - /* set master role and clear master stickiness */ - server_clear_set_status(ptr->server, repl_bits, SERVER_MASTER); - } - } - - is_cluster++; - } - else - { - server_clear_set_status(ptr->server, repl_bits, 0); - } - ptr = ptr->next; - } - - if (is_cluster == 0 && m_log_no_members) - { - MXS_ERROR("There are no cluster members"); - m_log_no_members = false; - } - else - { - if (is_cluster > 0 && m_log_no_members == 0) - { - MXS_NOTICE("Found cluster members"); - m_log_no_members = true; - } - } + tick(); /** * After updating the status of all servers, check if monitor events @@ -592,19 +492,124 @@ void GaleraMonitor::main() servers_status_current_to_pending(m_monitor); - /* Set the global var "wsrep_sst_donor" - * with a sorted list of "wsrep_node_name" for slave nodes - */ - if (m_set_donor_nodes) - { - update_sst_donor_nodes(is_cluster); - } - store_server_journal(m_monitor, NULL); release_monitor_servers(m_monitor); } } +void GaleraMonitor::tick() +{ + int is_cluster = 0; + + MXS_MONITORED_SERVER* ptr = m_monitor->monitored_servers; + while (ptr) + { + ptr->mon_prev_status = ptr->server->status; + + monitorDatabase(ptr); + + /* Log server status change */ + if (mon_status_changed(ptr)) + { + MXS_DEBUG("Backend server [%s]:%d state : %s", + ptr->server->address, + ptr->server->port, + STRSRVSTATUS(ptr->server)); + } + + if (SERVER_IS_DOWN(ptr->server)) + { + /** Increase this server'e error count */ + ptr->mon_err_count += 1; + + } + else + { + /** Reset this server's error count */ + ptr->mon_err_count = 0; + } + + ptr = ptr->next; + } + + /* Try to set a Galera cluster based on + * UUID and cluster_size each node reports: + * no multiple clusters UUID are allowed. + */ + set_galera_cluster(); + + /* + * Let's select a master server: + * it could be the candidate master following MXS_MIN(node_id) rule or + * the server that was master in the previous monitor polling cycle + * Decision depends on master_stickiness value set in configuration + */ + + /* get the candidate master, following MXS_MIN(node_id) rule */ + MXS_MONITORED_SERVER *candidate_master = get_candidate_master(); + + m_master = set_cluster_master(m_master, candidate_master, m_disableMasterFailback); + + ptr = m_monitor->monitored_servers; + + while (ptr) + { + const int repl_bits = (SERVER_SLAVE | SERVER_MASTER | SERVER_MASTER_STICKINESS); + if (SERVER_IS_JOINED(ptr->server) && !m_disableMasterRoleSetting) + { + if (ptr != m_master) + { + /* set the Slave role and clear master stickiness */ + server_clear_set_status(ptr->server, repl_bits, SERVER_SLAVE); + } + else + { + if (candidate_master && + m_master->server->node_id != candidate_master->server->node_id) + { + /* set master role and master stickiness */ + server_clear_set_status(ptr->server, repl_bits, + (SERVER_MASTER | SERVER_MASTER_STICKINESS)); + } + else + { + /* set master role and clear master stickiness */ + server_clear_set_status(ptr->server, repl_bits, SERVER_MASTER); + } + } + + is_cluster++; + } + else + { + server_clear_set_status(ptr->server, repl_bits, 0); + } + ptr = ptr->next; + } + + if (is_cluster == 0 && m_log_no_members) + { + MXS_ERROR("There are no cluster members"); + m_log_no_members = false; + } + else + { + if (is_cluster > 0 && m_log_no_members == 0) + { + MXS_NOTICE("Found cluster members"); + m_log_no_members = true; + } + } + + /* Set the global var "wsrep_sst_donor" + * with a sorted list of "wsrep_node_name" for slave nodes + */ + if (m_set_donor_nodes) + { + update_sst_donor_nodes(is_cluster); + } +} + /** * get candidate master from all nodes * diff --git a/server/modules/monitor/galeramon/galeramon.hh b/server/modules/monitor/galeramon/galeramon.hh index 87c1c2072..553e65971 100644 --- a/server/modules/monitor/galeramon/galeramon.hh +++ b/server/modules/monitor/galeramon/galeramon.hh @@ -93,6 +93,7 @@ private: bool has_sufficient_permissions() const; void configure(const MXS_CONFIG_PARAMETER* param); + void tick(); void main(); }; diff --git a/server/modules/monitor/grmon/grmon.cc b/server/modules/monitor/grmon/grmon.cc index 3183df9f8..0ab91b13f 100644 --- a/server/modules/monitor/grmon/grmon.cc +++ b/server/modules/monitor/grmon/grmon.cc @@ -192,12 +192,8 @@ void GRMon::main() lock_monitor_servers(m_monitor); servers_status_pending_to_current(m_monitor); - for (MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers; ptr; ptr = ptr->next) - { - update_server_status(m_monitor, ptr); - } + tick(); - mon_hangup_failed_servers(m_monitor); /** * After updating the status of all servers, check if monitor events * need to be launched. @@ -206,6 +202,7 @@ void GRMon::main() m_script.empty() ? NULL : m_script.c_str(), m_events); + mon_hangup_failed_servers(m_monitor); servers_status_current_to_pending(m_monitor); store_server_journal(m_monitor, NULL); release_monitor_servers(m_monitor); @@ -225,6 +222,14 @@ void GRMon::main() } } +void GRMon::tick() +{ + for (MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers; ptr; ptr = ptr->next) + { + update_server_status(m_monitor, ptr); + } +} + /** * The module entry point routine. It is this routine that * must populate the structure that is referred to as the diff --git a/server/modules/monitor/grmon/grmon.hh b/server/modules/monitor/grmon/grmon.hh index ac5c793bb..2210b9387 100644 --- a/server/modules/monitor/grmon/grmon.hh +++ b/server/modules/monitor/grmon/grmon.hh @@ -39,6 +39,7 @@ private: bool has_sufficient_permissions() const; void configure(const MXS_CONFIG_PARAMETER* params); + void tick(); void main(); }; diff --git a/server/modules/monitor/mmmon/mmmon.cc b/server/modules/monitor/mmmon/mmmon.cc index 47c2e8844..eeddc569d 100644 --- a/server/modules/monitor/mmmon/mmmon.cc +++ b/server/modules/monitor/mmmon/mmmon.cc @@ -467,69 +467,7 @@ void MMMonitor::main() lock_monitor_servers(m_monitor); servers_status_pending_to_current(m_monitor); - /* start from the first server in the list */ - MXS_MONITORED_SERVER* ptr = m_monitor->monitored_servers; - - while (ptr) - { - /* copy server status into monitor pending_status */ - ptr->pending_status = ptr->server->status; - - /* monitor current node */ - monitorDatabase(m_monitor, ptr); - - if (mon_status_changed(ptr) || - mon_print_fail_status(ptr)) - { - MXS_DEBUG("Backend server [%s]:%d state : %s", - ptr->server->address, - ptr->server->port, - STRSRVSTATUS(ptr->server)); - } - if (SERVER_IS_DOWN(ptr->server)) - { - /** Increase this server'e error count */ - ptr->mon_err_count += 1; - } - else - { - /** Reset this server's error count */ - ptr->mon_err_count = 0; - } - - ptr = ptr->next; - } - - /* Get Master server pointer */ - MXS_MONITORED_SERVER *root_master = get_current_master(); - - /* Update server status from monitor pending status on that server*/ - - ptr = m_monitor->monitored_servers; - while (ptr) - { - if (!SERVER_IN_MAINT(ptr->server)) - { - /* If "detect_stale_master" option is On, let's use the previus master */ - if (m_detectStaleMaster && root_master && - (!strcmp(ptr->server->address, root_master->server->address) && - ptr->server->port == root_master->server->port) && (ptr->server->status & SERVER_MASTER) && - !(ptr->pending_status & SERVER_MASTER)) - { - /* in this case server->status will not be updated from pending_status */ - MXS_NOTICE("root server [%s:%i] is no longer Master, let's " - "use it again even if it could be a stale master, you have " - "been warned!", ptr->server->address, ptr->server->port); - /* Set the STALE bit for this server in server struct */ - server_set_status_nolock(ptr->server, SERVER_STALE_STATUS); - } - else - { - ptr->server->status = ptr->pending_status; - } - } - ptr = ptr->next; - } + tick(); /** * After updating the status of all servers, check if monitor events @@ -544,6 +482,73 @@ void MMMonitor::main() } } +void MMMonitor::tick() +{ + /* start from the first server in the list */ + MXS_MONITORED_SERVER* ptr = m_monitor->monitored_servers; + + while (ptr) + { + /* copy server status into monitor pending_status */ + ptr->pending_status = ptr->server->status; + + /* monitor current node */ + monitorDatabase(m_monitor, ptr); + + if (mon_status_changed(ptr) || + mon_print_fail_status(ptr)) + { + MXS_DEBUG("Backend server [%s]:%d state : %s", + ptr->server->address, + ptr->server->port, + STRSRVSTATUS(ptr->server)); + } + if (SERVER_IS_DOWN(ptr->server)) + { + /** Increase this server'e error count */ + ptr->mon_err_count += 1; + } + else + { + /** Reset this server's error count */ + ptr->mon_err_count = 0; + } + + ptr = ptr->next; + } + + /* Get Master server pointer */ + MXS_MONITORED_SERVER *root_master = get_current_master(); + + /* Update server status from monitor pending status on that server*/ + + ptr = m_monitor->monitored_servers; + while (ptr) + { + if (!SERVER_IN_MAINT(ptr->server)) + { + /* If "detect_stale_master" option is On, let's use the previus master */ + if (m_detectStaleMaster && root_master && + (!strcmp(ptr->server->address, root_master->server->address) && + ptr->server->port == root_master->server->port) && (ptr->server->status & SERVER_MASTER) && + !(ptr->pending_status & SERVER_MASTER)) + { + /* in this case server->status will not be updated from pending_status */ + MXS_NOTICE("root server [%s:%i] is no longer Master, let's " + "use it again even if it could be a stale master, you have " + "been warned!", ptr->server->address, ptr->server->port); + /* Set the STALE bit for this server in server struct */ + server_set_status_nolock(ptr->server, SERVER_STALE_STATUS); + } + else + { + ptr->server->status = ptr->pending_status; + } + } + ptr = ptr->next; + } +} + /** * Enable/Disable the MySQL Replication Stale Master dectection, allowing a previouvsly detected master to still act as a Master. * This option must be enabled in order to keep the Master when the replication is stopped or removed from slaves. diff --git a/server/modules/monitor/mmmon/mmmon.hh b/server/modules/monitor/mmmon/mmmon.hh index 9bec27bfd..d8fc4630e 100644 --- a/server/modules/monitor/mmmon/mmmon.hh +++ b/server/modules/monitor/mmmon/mmmon.hh @@ -43,6 +43,7 @@ private: bool has_sufficient_permissions() const; void configure(const MXS_CONFIG_PARAMETER* params); + void tick(); void main(); }; diff --git a/server/modules/monitor/ndbclustermon/ndbclustermon.cc b/server/modules/monitor/ndbclustermon/ndbclustermon.cc index 0a0cfadc3..cc0c330aa 100644 --- a/server/modules/monitor/ndbclustermon/ndbclustermon.cc +++ b/server/modules/monitor/ndbclustermon/ndbclustermon.cc @@ -272,23 +272,7 @@ void NDBCMonitor::main() lock_monitor_servers(m_monitor); servers_status_pending_to_current(m_monitor); - MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers; - while (ptr) - { - ptr->mon_prev_status = ptr->server->status; - monitorDatabase(ptr, m_monitor->user, m_monitor->password, m_monitor); - - if (ptr->server->status != ptr->mon_prev_status || - SERVER_IS_DOWN(ptr->server)) - { - MXS_DEBUG("Backend server [%s]:%d state : %s", - ptr->server->address, - ptr->server->port, - STRSRVSTATUS(ptr->server)); - } - - ptr = ptr->next; - } + tick(); /** * After updating the status of all servers, check if monitor events @@ -302,3 +286,24 @@ void NDBCMonitor::main() release_monitor_servers(m_monitor); } } + +void NDBCMonitor::tick() +{ + MXS_MONITORED_SERVER *ptr = m_monitor->monitored_servers; + while (ptr) + { + ptr->mon_prev_status = ptr->server->status; + monitorDatabase(ptr, m_monitor->user, m_monitor->password, m_monitor); + + if (ptr->server->status != ptr->mon_prev_status || + SERVER_IS_DOWN(ptr->server)) + { + MXS_DEBUG("Backend server [%s]:%d state : %s", + ptr->server->address, + ptr->server->port, + STRSRVSTATUS(ptr->server)); + } + + ptr = ptr->next; + } +} diff --git a/server/modules/monitor/ndbclustermon/ndbclustermon.hh b/server/modules/monitor/ndbclustermon/ndbclustermon.hh index 0a9b502a6..92b006ea9 100644 --- a/server/modules/monitor/ndbclustermon/ndbclustermon.hh +++ b/server/modules/monitor/ndbclustermon/ndbclustermon.hh @@ -40,6 +40,7 @@ private: bool has_sufficient_permissions() const; void configure(const MXS_CONFIG_PARAMETER* params); + void tick(); void main(); };