diff --git a/Documentation/Monitors/Clustrix-Monitor.md b/Documentation/Monitors/Clustrix-Monitor.md index 31476f02b..9d67a1081 100644 --- a/Documentation/Monitors/Clustrix-Monitor.md +++ b/Documentation/Monitors/Clustrix-Monitor.md @@ -60,3 +60,12 @@ be lowered as that may have an adverse effect on the Cluster itself. ``` cluster_monitor_interval=120000 ``` + +### `health_check_threshold` + +Defines how many times the health check may fail before the monitor +considers a particular node to be down. The default value is 2. + +``` +health_check_threshold=3 +``` diff --git a/server/modules/monitor/clustrixmon/clustrixmon.cc b/server/modules/monitor/clustrixmon/clustrixmon.cc index e971401b2..fdaa98188 100644 --- a/server/modules/monitor/clustrixmon/clustrixmon.cc +++ b/server/modules/monitor/clustrixmon/clustrixmon.cc @@ -46,6 +46,11 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE() MXS_MODULE_PARAM_COUNT, DEFAULT_CLUSTER_MONITOR_INTERVAL_ZVALUE }, + { + HEALTH_CHECK_THRESHOLD_NAME, + MXS_MODULE_PARAM_COUNT, + DEFAULT_HEALTH_CHECK_THRESHOLD_ZVALUE + }, {MXS_END_MODULE_PARAMS} } }; diff --git a/server/modules/monitor/clustrixmon/clustrixmon.hh b/server/modules/monitor/clustrixmon/clustrixmon.hh index 63d8bb1ec..0ca39406f 100644 --- a/server/modules/monitor/clustrixmon/clustrixmon.hh +++ b/server/modules/monitor/clustrixmon/clustrixmon.hh @@ -20,3 +20,7 @@ #define CLUSTER_MONITOR_INTERVAL_NAME "cluster_monitor_interval" const long DEFAULT_CLUSTER_MONITOR_INTERVAL_VALUE = 60000; #define DEFAULT_CLUSTER_MONITOR_INTERVAL_ZVALUE "60000" + +#define HEALTH_CHECK_THRESHOLD_NAME "health_check_threshold" +const long DEFAULT_HEALTH_CHECK_THRESHOLD_VALUE = 2; +#define DEFAULT_HEALTH_CHECK_THRESHOLD_ZVALUE "2" diff --git a/server/modules/monitor/clustrixmon/clustrixmonitor.cc b/server/modules/monitor/clustrixmon/clustrixmonitor.cc index 4fe8ab400..ee35a257b 100644 --- a/server/modules/monitor/clustrixmon/clustrixmonitor.cc +++ b/server/modules/monitor/clustrixmon/clustrixmonitor.cc @@ -49,8 +49,10 @@ bool ClustrixMonitor::configure(const MXS_CONFIG_PARAMETER* pParams) } m_health_urls.clear(); + m_node_infos.clear(); m_config.set_cluster_monitor_interval(config_get_integer(pParams, CLUSTER_MONITOR_INTERVAL_NAME)); + m_config.set_health_check_threshold(config_get_integer(pParams, HEALTH_CHECK_THRESHOLD_NAME)); refresh_cluster_nodes(); @@ -179,8 +181,9 @@ void ClustrixMonitor::fetch_cluster_nodes_from(MXS_MONITORED_SERVER& ms) string ip = row[1]; int mysql_port = row[2] ? atoi(row[2]) : DEFAULT_MYSQL_PORT; int health_port = row[3] ? atoi(row[3]) : DEFAULT_HEALTH_PORT; + int health_check_threshold = m_config.health_check_threshold(); - node_infos.emplace_back(id, ip, mysql_port, health_port); + node_infos.emplace_back(id, ip, mysql_port, health_port, health_check_threshold); string health_url = "http://" + ip + ":" + std::to_string(health_port); health_urls.push_back(health_url); diff --git a/server/modules/monitor/clustrixmon/clustrixmonitor.hh b/server/modules/monitor/clustrixmon/clustrixmonitor.hh index 8faa968c4..aa6291556 100644 --- a/server/modules/monitor/clustrixmon/clustrixmonitor.hh +++ b/server/modules/monitor/clustrixmon/clustrixmonitor.hh @@ -27,6 +27,7 @@ public: public: Config() : m_cluster_monitor_interval(DEFAULT_CLUSTER_MONITOR_INTERVAL_VALUE) + , m_health_check_threshold(DEFAULT_HEALTH_CHECK_THRESHOLD_VALUE) { }; @@ -40,8 +41,19 @@ public: m_cluster_monitor_interval = l; } + long health_check_threshold() const + { + return m_health_check_threshold; + } + + void set_health_check_threshold(long l) + { + m_health_check_threshold = l; + } + private: long m_cluster_monitor_interval; + long m_health_check_threshold; }; ~ClustrixMonitor(); diff --git a/server/modules/monitor/clustrixmon/clustrixnodeinfo.hh b/server/modules/monitor/clustrixmon/clustrixnodeinfo.hh index 681f35ac2..4baaf4ff5 100644 --- a/server/modules/monitor/clustrixmon/clustrixnodeinfo.hh +++ b/server/modules/monitor/clustrixmon/clustrixnodeinfo.hh @@ -23,11 +23,14 @@ public: ClustrixNodeInfo(int id, const std::string& ip, int mysql_port, - int health_port) + int health_port, + int health_check_threshold) : m_id(id) , m_ip(ip) , m_mysql_port(mysql_port) , m_health_port(health_port) + , m_health_check_threshold(health_check_threshold) + , m_nRunning(m_health_check_threshold) { } @@ -53,12 +56,22 @@ public: bool is_running() const { - return m_is_running; + return m_nRunning > 0; } void set_running(bool running) { - m_is_running = running; + if (running) + { + m_nRunning = m_health_check_threshold; + } + else + { + if (m_nRunning > 0) + { + --m_nRunning; + } + } } std::string to_string() const @@ -78,7 +91,8 @@ private: std::string m_ip; int m_mysql_port; int m_health_port; - bool m_is_running { true }; // Assume running, until proven otherwise. + int m_health_check_threshold; + int m_nRunning; }; inline std::ostream& operator << (std::ostream& out, const ClustrixNodeInfo& x)