MXS-2219 Add health check threshold
Make it configurable how many times a node may fail to respond on the health check port before it is considered to be down.
This commit is contained in:
@ -60,3 +60,12 @@ be lowered as that may have an adverse effect on the Cluster itself.
|
||||
```
|
||||
cluster_monitor_interval=120000
|
||||
```
|
||||
|
||||
### `health_check_threshold`
|
||||
|
||||
Defines how many times the health check may fail before the monitor
|
||||
considers a particular node to be down. The default value is 2.
|
||||
|
||||
```
|
||||
health_check_threshold=3
|
||||
```
|
||||
|
@ -46,6 +46,11 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE()
|
||||
MXS_MODULE_PARAM_COUNT,
|
||||
DEFAULT_CLUSTER_MONITOR_INTERVAL_ZVALUE
|
||||
},
|
||||
{
|
||||
HEALTH_CHECK_THRESHOLD_NAME,
|
||||
MXS_MODULE_PARAM_COUNT,
|
||||
DEFAULT_HEALTH_CHECK_THRESHOLD_ZVALUE
|
||||
},
|
||||
{MXS_END_MODULE_PARAMS}
|
||||
}
|
||||
};
|
||||
|
@ -20,3 +20,7 @@
|
||||
#define CLUSTER_MONITOR_INTERVAL_NAME "cluster_monitor_interval"
|
||||
const long DEFAULT_CLUSTER_MONITOR_INTERVAL_VALUE = 60000;
|
||||
#define DEFAULT_CLUSTER_MONITOR_INTERVAL_ZVALUE "60000"
|
||||
|
||||
#define HEALTH_CHECK_THRESHOLD_NAME "health_check_threshold"
|
||||
const long DEFAULT_HEALTH_CHECK_THRESHOLD_VALUE = 2;
|
||||
#define DEFAULT_HEALTH_CHECK_THRESHOLD_ZVALUE "2"
|
||||
|
@ -49,8 +49,10 @@ bool ClustrixMonitor::configure(const MXS_CONFIG_PARAMETER* pParams)
|
||||
}
|
||||
|
||||
m_health_urls.clear();
|
||||
m_node_infos.clear();
|
||||
|
||||
m_config.set_cluster_monitor_interval(config_get_integer(pParams, CLUSTER_MONITOR_INTERVAL_NAME));
|
||||
m_config.set_health_check_threshold(config_get_integer(pParams, HEALTH_CHECK_THRESHOLD_NAME));
|
||||
|
||||
refresh_cluster_nodes();
|
||||
|
||||
@ -179,8 +181,9 @@ void ClustrixMonitor::fetch_cluster_nodes_from(MXS_MONITORED_SERVER& ms)
|
||||
string ip = row[1];
|
||||
int mysql_port = row[2] ? atoi(row[2]) : DEFAULT_MYSQL_PORT;
|
||||
int health_port = row[3] ? atoi(row[3]) : DEFAULT_HEALTH_PORT;
|
||||
int health_check_threshold = m_config.health_check_threshold();
|
||||
|
||||
node_infos.emplace_back(id, ip, mysql_port, health_port);
|
||||
node_infos.emplace_back(id, ip, mysql_port, health_port, health_check_threshold);
|
||||
|
||||
string health_url = "http://" + ip + ":" + std::to_string(health_port);
|
||||
health_urls.push_back(health_url);
|
||||
|
@ -27,6 +27,7 @@ public:
|
||||
public:
|
||||
Config()
|
||||
: m_cluster_monitor_interval(DEFAULT_CLUSTER_MONITOR_INTERVAL_VALUE)
|
||||
, m_health_check_threshold(DEFAULT_HEALTH_CHECK_THRESHOLD_VALUE)
|
||||
{
|
||||
};
|
||||
|
||||
@ -40,8 +41,19 @@ public:
|
||||
m_cluster_monitor_interval = l;
|
||||
}
|
||||
|
||||
long health_check_threshold() const
|
||||
{
|
||||
return m_health_check_threshold;
|
||||
}
|
||||
|
||||
void set_health_check_threshold(long l)
|
||||
{
|
||||
m_health_check_threshold = l;
|
||||
}
|
||||
|
||||
private:
|
||||
long m_cluster_monitor_interval;
|
||||
long m_health_check_threshold;
|
||||
};
|
||||
|
||||
~ClustrixMonitor();
|
||||
|
@ -23,11 +23,14 @@ public:
|
||||
ClustrixNodeInfo(int id,
|
||||
const std::string& ip,
|
||||
int mysql_port,
|
||||
int health_port)
|
||||
int health_port,
|
||||
int health_check_threshold)
|
||||
: m_id(id)
|
||||
, m_ip(ip)
|
||||
, m_mysql_port(mysql_port)
|
||||
, m_health_port(health_port)
|
||||
, m_health_check_threshold(health_check_threshold)
|
||||
, m_nRunning(m_health_check_threshold)
|
||||
{
|
||||
}
|
||||
|
||||
@ -53,12 +56,22 @@ public:
|
||||
|
||||
bool is_running() const
|
||||
{
|
||||
return m_is_running;
|
||||
return m_nRunning > 0;
|
||||
}
|
||||
|
||||
void set_running(bool running)
|
||||
{
|
||||
m_is_running = running;
|
||||
if (running)
|
||||
{
|
||||
m_nRunning = m_health_check_threshold;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_nRunning > 0)
|
||||
{
|
||||
--m_nRunning;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string to_string() const
|
||||
@ -78,7 +91,8 @@ private:
|
||||
std::string m_ip;
|
||||
int m_mysql_port;
|
||||
int m_health_port;
|
||||
bool m_is_running { true }; // Assume running, until proven otherwise.
|
||||
int m_health_check_threshold;
|
||||
int m_nRunning;
|
||||
};
|
||||
|
||||
inline std::ostream& operator << (std::ostream& out, const ClustrixNodeInfo& x)
|
||||
|
Reference in New Issue
Block a user