MXS-2219 Add health check threshold

Make it configurable how many times a node may fail to respond
on the health check port before it is considered to be down.
This commit is contained in:
Johan Wikman
2019-01-14 15:10:20 +02:00
parent 880842e55d
commit 89c059411d
6 changed files with 52 additions and 5 deletions

View File

@ -60,3 +60,12 @@ be lowered as that may have an adverse effect on the Cluster itself.
``` ```
cluster_monitor_interval=120000 cluster_monitor_interval=120000
``` ```
### `health_check_threshold`
Defines how many times the health check may fail before the monitor
considers a particular node to be down. The default value is 2.
```
health_check_threshold=3
```

View File

@ -46,6 +46,11 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE()
MXS_MODULE_PARAM_COUNT, MXS_MODULE_PARAM_COUNT,
DEFAULT_CLUSTER_MONITOR_INTERVAL_ZVALUE DEFAULT_CLUSTER_MONITOR_INTERVAL_ZVALUE
}, },
{
HEALTH_CHECK_THRESHOLD_NAME,
MXS_MODULE_PARAM_COUNT,
DEFAULT_HEALTH_CHECK_THRESHOLD_ZVALUE
},
{MXS_END_MODULE_PARAMS} {MXS_END_MODULE_PARAMS}
} }
}; };

View File

@ -20,3 +20,7 @@
#define CLUSTER_MONITOR_INTERVAL_NAME "cluster_monitor_interval" #define CLUSTER_MONITOR_INTERVAL_NAME "cluster_monitor_interval"
const long DEFAULT_CLUSTER_MONITOR_INTERVAL_VALUE = 60000; const long DEFAULT_CLUSTER_MONITOR_INTERVAL_VALUE = 60000;
#define DEFAULT_CLUSTER_MONITOR_INTERVAL_ZVALUE "60000" #define DEFAULT_CLUSTER_MONITOR_INTERVAL_ZVALUE "60000"
#define HEALTH_CHECK_THRESHOLD_NAME "health_check_threshold"
const long DEFAULT_HEALTH_CHECK_THRESHOLD_VALUE = 2;
#define DEFAULT_HEALTH_CHECK_THRESHOLD_ZVALUE "2"

View File

@ -49,8 +49,10 @@ bool ClustrixMonitor::configure(const MXS_CONFIG_PARAMETER* pParams)
} }
m_health_urls.clear(); m_health_urls.clear();
m_node_infos.clear();
m_config.set_cluster_monitor_interval(config_get_integer(pParams, CLUSTER_MONITOR_INTERVAL_NAME)); m_config.set_cluster_monitor_interval(config_get_integer(pParams, CLUSTER_MONITOR_INTERVAL_NAME));
m_config.set_health_check_threshold(config_get_integer(pParams, HEALTH_CHECK_THRESHOLD_NAME));
refresh_cluster_nodes(); refresh_cluster_nodes();
@ -179,8 +181,9 @@ void ClustrixMonitor::fetch_cluster_nodes_from(MXS_MONITORED_SERVER& ms)
string ip = row[1]; string ip = row[1];
int mysql_port = row[2] ? atoi(row[2]) : DEFAULT_MYSQL_PORT; int mysql_port = row[2] ? atoi(row[2]) : DEFAULT_MYSQL_PORT;
int health_port = row[3] ? atoi(row[3]) : DEFAULT_HEALTH_PORT; int health_port = row[3] ? atoi(row[3]) : DEFAULT_HEALTH_PORT;
int health_check_threshold = m_config.health_check_threshold();
node_infos.emplace_back(id, ip, mysql_port, health_port); node_infos.emplace_back(id, ip, mysql_port, health_port, health_check_threshold);
string health_url = "http://" + ip + ":" + std::to_string(health_port); string health_url = "http://" + ip + ":" + std::to_string(health_port);
health_urls.push_back(health_url); health_urls.push_back(health_url);

View File

@ -27,6 +27,7 @@ public:
public: public:
Config() Config()
: m_cluster_monitor_interval(DEFAULT_CLUSTER_MONITOR_INTERVAL_VALUE) : m_cluster_monitor_interval(DEFAULT_CLUSTER_MONITOR_INTERVAL_VALUE)
, m_health_check_threshold(DEFAULT_HEALTH_CHECK_THRESHOLD_VALUE)
{ {
}; };
@ -40,8 +41,19 @@ public:
m_cluster_monitor_interval = l; m_cluster_monitor_interval = l;
} }
long health_check_threshold() const
{
return m_health_check_threshold;
}
void set_health_check_threshold(long l)
{
m_health_check_threshold = l;
}
private: private:
long m_cluster_monitor_interval; long m_cluster_monitor_interval;
long m_health_check_threshold;
}; };
~ClustrixMonitor(); ~ClustrixMonitor();

View File

@ -23,11 +23,14 @@ public:
ClustrixNodeInfo(int id, ClustrixNodeInfo(int id,
const std::string& ip, const std::string& ip,
int mysql_port, int mysql_port,
int health_port) int health_port,
int health_check_threshold)
: m_id(id) : m_id(id)
, m_ip(ip) , m_ip(ip)
, m_mysql_port(mysql_port) , m_mysql_port(mysql_port)
, m_health_port(health_port) , m_health_port(health_port)
, m_health_check_threshold(health_check_threshold)
, m_nRunning(m_health_check_threshold)
{ {
} }
@ -53,12 +56,22 @@ public:
bool is_running() const bool is_running() const
{ {
return m_is_running; return m_nRunning > 0;
} }
void set_running(bool running) void set_running(bool running)
{ {
m_is_running = running; if (running)
{
m_nRunning = m_health_check_threshold;
}
else
{
if (m_nRunning > 0)
{
--m_nRunning;
}
}
} }
std::string to_string() const std::string to_string() const
@ -78,7 +91,8 @@ private:
std::string m_ip; std::string m_ip;
int m_mysql_port; int m_mysql_port;
int m_health_port; int m_health_port;
bool m_is_running { true }; // Assume running, until proven otherwise. int m_health_check_threshold;
int m_nRunning;
}; };
inline std::ostream& operator << (std::ostream& out, const ClustrixNodeInfo& x) inline std::ostream& operator << (std::ostream& out, const ClustrixNodeInfo& x)