MXS-2219 Add health check threshold
Make it configurable how many times a node may fail to respond on the health check port before it is considered to be down.
This commit is contained in:
@ -60,3 +60,12 @@ be lowered as that may have an adverse effect on the Cluster itself.
|
|||||||
```
|
```
|
||||||
cluster_monitor_interval=120000
|
cluster_monitor_interval=120000
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### `health_check_threshold`
|
||||||
|
|
||||||
|
Defines how many times the health check may fail before the monitor
|
||||||
|
considers a particular node to be down. The default value is 2.
|
||||||
|
|
||||||
|
```
|
||||||
|
health_check_threshold=3
|
||||||
|
```
|
||||||
|
@ -46,6 +46,11 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE()
|
|||||||
MXS_MODULE_PARAM_COUNT,
|
MXS_MODULE_PARAM_COUNT,
|
||||||
DEFAULT_CLUSTER_MONITOR_INTERVAL_ZVALUE
|
DEFAULT_CLUSTER_MONITOR_INTERVAL_ZVALUE
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
HEALTH_CHECK_THRESHOLD_NAME,
|
||||||
|
MXS_MODULE_PARAM_COUNT,
|
||||||
|
DEFAULT_HEALTH_CHECK_THRESHOLD_ZVALUE
|
||||||
|
},
|
||||||
{MXS_END_MODULE_PARAMS}
|
{MXS_END_MODULE_PARAMS}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -20,3 +20,7 @@
|
|||||||
#define CLUSTER_MONITOR_INTERVAL_NAME "cluster_monitor_interval"
|
#define CLUSTER_MONITOR_INTERVAL_NAME "cluster_monitor_interval"
|
||||||
const long DEFAULT_CLUSTER_MONITOR_INTERVAL_VALUE = 60000;
|
const long DEFAULT_CLUSTER_MONITOR_INTERVAL_VALUE = 60000;
|
||||||
#define DEFAULT_CLUSTER_MONITOR_INTERVAL_ZVALUE "60000"
|
#define DEFAULT_CLUSTER_MONITOR_INTERVAL_ZVALUE "60000"
|
||||||
|
|
||||||
|
#define HEALTH_CHECK_THRESHOLD_NAME "health_check_threshold"
|
||||||
|
const long DEFAULT_HEALTH_CHECK_THRESHOLD_VALUE = 2;
|
||||||
|
#define DEFAULT_HEALTH_CHECK_THRESHOLD_ZVALUE "2"
|
||||||
|
@ -49,8 +49,10 @@ bool ClustrixMonitor::configure(const MXS_CONFIG_PARAMETER* pParams)
|
|||||||
}
|
}
|
||||||
|
|
||||||
m_health_urls.clear();
|
m_health_urls.clear();
|
||||||
|
m_node_infos.clear();
|
||||||
|
|
||||||
m_config.set_cluster_monitor_interval(config_get_integer(pParams, CLUSTER_MONITOR_INTERVAL_NAME));
|
m_config.set_cluster_monitor_interval(config_get_integer(pParams, CLUSTER_MONITOR_INTERVAL_NAME));
|
||||||
|
m_config.set_health_check_threshold(config_get_integer(pParams, HEALTH_CHECK_THRESHOLD_NAME));
|
||||||
|
|
||||||
refresh_cluster_nodes();
|
refresh_cluster_nodes();
|
||||||
|
|
||||||
@ -179,8 +181,9 @@ void ClustrixMonitor::fetch_cluster_nodes_from(MXS_MONITORED_SERVER& ms)
|
|||||||
string ip = row[1];
|
string ip = row[1];
|
||||||
int mysql_port = row[2] ? atoi(row[2]) : DEFAULT_MYSQL_PORT;
|
int mysql_port = row[2] ? atoi(row[2]) : DEFAULT_MYSQL_PORT;
|
||||||
int health_port = row[3] ? atoi(row[3]) : DEFAULT_HEALTH_PORT;
|
int health_port = row[3] ? atoi(row[3]) : DEFAULT_HEALTH_PORT;
|
||||||
|
int health_check_threshold = m_config.health_check_threshold();
|
||||||
|
|
||||||
node_infos.emplace_back(id, ip, mysql_port, health_port);
|
node_infos.emplace_back(id, ip, mysql_port, health_port, health_check_threshold);
|
||||||
|
|
||||||
string health_url = "http://" + ip + ":" + std::to_string(health_port);
|
string health_url = "http://" + ip + ":" + std::to_string(health_port);
|
||||||
health_urls.push_back(health_url);
|
health_urls.push_back(health_url);
|
||||||
|
@ -27,6 +27,7 @@ public:
|
|||||||
public:
|
public:
|
||||||
Config()
|
Config()
|
||||||
: m_cluster_monitor_interval(DEFAULT_CLUSTER_MONITOR_INTERVAL_VALUE)
|
: m_cluster_monitor_interval(DEFAULT_CLUSTER_MONITOR_INTERVAL_VALUE)
|
||||||
|
, m_health_check_threshold(DEFAULT_HEALTH_CHECK_THRESHOLD_VALUE)
|
||||||
{
|
{
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -40,8 +41,19 @@ public:
|
|||||||
m_cluster_monitor_interval = l;
|
m_cluster_monitor_interval = l;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long health_check_threshold() const
|
||||||
|
{
|
||||||
|
return m_health_check_threshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_health_check_threshold(long l)
|
||||||
|
{
|
||||||
|
m_health_check_threshold = l;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
long m_cluster_monitor_interval;
|
long m_cluster_monitor_interval;
|
||||||
|
long m_health_check_threshold;
|
||||||
};
|
};
|
||||||
|
|
||||||
~ClustrixMonitor();
|
~ClustrixMonitor();
|
||||||
|
@ -23,11 +23,14 @@ public:
|
|||||||
ClustrixNodeInfo(int id,
|
ClustrixNodeInfo(int id,
|
||||||
const std::string& ip,
|
const std::string& ip,
|
||||||
int mysql_port,
|
int mysql_port,
|
||||||
int health_port)
|
int health_port,
|
||||||
|
int health_check_threshold)
|
||||||
: m_id(id)
|
: m_id(id)
|
||||||
, m_ip(ip)
|
, m_ip(ip)
|
||||||
, m_mysql_port(mysql_port)
|
, m_mysql_port(mysql_port)
|
||||||
, m_health_port(health_port)
|
, m_health_port(health_port)
|
||||||
|
, m_health_check_threshold(health_check_threshold)
|
||||||
|
, m_nRunning(m_health_check_threshold)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -53,12 +56,22 @@ public:
|
|||||||
|
|
||||||
bool is_running() const
|
bool is_running() const
|
||||||
{
|
{
|
||||||
return m_is_running;
|
return m_nRunning > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_running(bool running)
|
void set_running(bool running)
|
||||||
{
|
{
|
||||||
m_is_running = running;
|
if (running)
|
||||||
|
{
|
||||||
|
m_nRunning = m_health_check_threshold;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (m_nRunning > 0)
|
||||||
|
{
|
||||||
|
--m_nRunning;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string to_string() const
|
std::string to_string() const
|
||||||
@ -78,7 +91,8 @@ private:
|
|||||||
std::string m_ip;
|
std::string m_ip;
|
||||||
int m_mysql_port;
|
int m_mysql_port;
|
||||||
int m_health_port;
|
int m_health_port;
|
||||||
bool m_is_running { true }; // Assume running, until proven otherwise.
|
int m_health_check_threshold;
|
||||||
|
int m_nRunning;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline std::ostream& operator << (std::ostream& out, const ClustrixNodeInfo& x)
|
inline std::ostream& operator << (std::ostream& out, const ClustrixNodeInfo& x)
|
||||||
|
Reference in New Issue
Block a user