MXS-2219 Add health check threshold

Make it configurable how many times a node may fail to respond
on the health check port before it is considered to be down.
This commit is contained in:
Johan Wikman
2019-01-14 15:10:20 +02:00
parent 880842e55d
commit 89c059411d
6 changed files with 52 additions and 5 deletions

View File

@ -60,3 +60,12 @@ be lowered as that may have an adverse effect on the Cluster itself.
```
cluster_monitor_interval=120000
```
### `health_check_threshold`
Defines how many times the health check may fail before the monitor
considers a particular node to be down. The default value is 2.
```
health_check_threshold=3
```

View File

@ -46,6 +46,11 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE()
MXS_MODULE_PARAM_COUNT,
DEFAULT_CLUSTER_MONITOR_INTERVAL_ZVALUE
},
{
HEALTH_CHECK_THRESHOLD_NAME,
MXS_MODULE_PARAM_COUNT,
DEFAULT_HEALTH_CHECK_THRESHOLD_ZVALUE
},
{MXS_END_MODULE_PARAMS}
}
};

View File

@ -20,3 +20,7 @@
#define CLUSTER_MONITOR_INTERVAL_NAME "cluster_monitor_interval"
const long DEFAULT_CLUSTER_MONITOR_INTERVAL_VALUE = 60000;
#define DEFAULT_CLUSTER_MONITOR_INTERVAL_ZVALUE "60000"
#define HEALTH_CHECK_THRESHOLD_NAME "health_check_threshold"
const long DEFAULT_HEALTH_CHECK_THRESHOLD_VALUE = 2;
#define DEFAULT_HEALTH_CHECK_THRESHOLD_ZVALUE "2"

View File

@ -49,8 +49,10 @@ bool ClustrixMonitor::configure(const MXS_CONFIG_PARAMETER* pParams)
}
m_health_urls.clear();
m_node_infos.clear();
m_config.set_cluster_monitor_interval(config_get_integer(pParams, CLUSTER_MONITOR_INTERVAL_NAME));
m_config.set_health_check_threshold(config_get_integer(pParams, HEALTH_CHECK_THRESHOLD_NAME));
refresh_cluster_nodes();
@ -179,8 +181,9 @@ void ClustrixMonitor::fetch_cluster_nodes_from(MXS_MONITORED_SERVER& ms)
string ip = row[1];
int mysql_port = row[2] ? atoi(row[2]) : DEFAULT_MYSQL_PORT;
int health_port = row[3] ? atoi(row[3]) : DEFAULT_HEALTH_PORT;
int health_check_threshold = m_config.health_check_threshold();
node_infos.emplace_back(id, ip, mysql_port, health_port);
node_infos.emplace_back(id, ip, mysql_port, health_port, health_check_threshold);
string health_url = "http://" + ip + ":" + std::to_string(health_port);
health_urls.push_back(health_url);

View File

@ -27,6 +27,7 @@ public:
public:
Config()
: m_cluster_monitor_interval(DEFAULT_CLUSTER_MONITOR_INTERVAL_VALUE)
, m_health_check_threshold(DEFAULT_HEALTH_CHECK_THRESHOLD_VALUE)
{
};
@ -40,8 +41,19 @@ public:
m_cluster_monitor_interval = l;
}
long health_check_threshold() const
{
return m_health_check_threshold;
}
void set_health_check_threshold(long l)
{
m_health_check_threshold = l;
}
private:
long m_cluster_monitor_interval;
long m_health_check_threshold;
};
~ClustrixMonitor();

View File

@ -23,11 +23,14 @@ public:
ClustrixNodeInfo(int id,
const std::string& ip,
int mysql_port,
int health_port)
int health_port,
int health_check_threshold)
: m_id(id)
, m_ip(ip)
, m_mysql_port(mysql_port)
, m_health_port(health_port)
, m_health_check_threshold(health_check_threshold)
, m_nRunning(m_health_check_threshold)
{
}
@ -53,12 +56,22 @@ public:
bool is_running() const
{
return m_is_running;
return m_nRunning > 0;
}
void set_running(bool running)
{
m_is_running = running;
if (running)
{
m_nRunning = m_health_check_threshold;
}
else
{
if (m_nRunning > 0)
{
--m_nRunning;
}
}
}
std::string to_string() const
@ -78,7 +91,8 @@ private:
std::string m_ip;
int m_mysql_port;
int m_health_port;
bool m_is_running { true }; // Assume running, until proven otherwise.
int m_health_check_threshold;
int m_nRunning;
};
inline std::ostream& operator << (std::ostream& out, const ClustrixNodeInfo& x)