MXS-2275 Check for softfailed nodes

When checking the node info, also include information about wheter
a node is being SOFTFAILed. If it is, turn on the `Being Drained`
bit.

A node is SOFTFAILed with the intention of removing it, so better
not to create new connections to it as they later would be broken
when the node is actually taken down.
This commit is contained in:
Johan Wikman
2019-02-01 09:51:37 +02:00
parent 55b1e031d6
commit b582119d27
2 changed files with 47 additions and 2 deletions

View File

@ -230,7 +230,9 @@ void ClustrixMonitor::refresh_nodes()
if (check_cluster_membership(&memberships))
{
const char ZQUERY[] = "SELECT nodeid, iface_ip, mysql_port, healthmon_port FROM system.nodeinfo";
const char ZQUERY[] =
"SELECT ni.nodeid, ni.iface_ip, ni.mysql_port, ni.healthmon_port, sn.nodeid FROM system.nodeinfo AS ni "
"LEFT JOIN system.softfailed_nodes AS sn ON ni.nodeid = sn.nodeid";
if (mysql_query(m_pHub_con, ZQUERY) == 0)
{
@ -238,7 +240,7 @@ void ClustrixMonitor::refresh_nodes()
if (pResult)
{
mxb_assert(mysql_field_count(m_pHub_con) == 4);
mxb_assert(mysql_field_count(m_pHub_con) == 5);
set<int> nids;
for (const auto& element : m_nodes)
@ -256,6 +258,7 @@ void ClustrixMonitor::refresh_nodes()
string ip = row[1];
int mysql_port = row[2] ? atoi(row[2]) : DEFAULT_MYSQL_PORT;
int health_port = row[3] ? atoi(row[3]) : DEFAULT_HEALTH_PORT;
bool softfailed = row[4] ? true : false;
// '@@' ensures no clash with user created servers.
// Monitor name ensures no clash with other Clustrix monitor instances.
@ -286,6 +289,23 @@ void ClustrixMonitor::refresh_nodes()
node.set_health_port(health_port);
}
bool is_being_drained = node.server()->is_being_drained();
if (softfailed && !is_being_drained)
{
MXS_NOTICE("%s: Node %d (%s) has been SOFTFAILed. Turning ON 'Being Drained'.",
m_name, node.id(), node.server()->address);
node.server()->set_status(SERVER_BEING_DRAINED);
}
else if (!softfailed && is_being_drained)
{
MXS_NOTICE("%s: Node %d (%s) is no longer being SOFTFAILed. Turning OFF 'Being Drained'.",
m_name, node.id(), node.server()->address);
node.server()->clear_status(SERVER_BEING_DRAINED);
}
nids.erase(id);
}
else if (mit != memberships.end())
@ -303,6 +323,11 @@ void ClustrixMonitor::refresh_nodes()
SERVER* pServer = SERVER::find_by_unique_name(name);
mxb_assert(pServer);
if (softfailed)
{
pServer->set_status(SERVER_BEING_DRAINED);
}
const ClustrixMembership& membership = mit->second;
int health_check_threshold = m_config.health_check_threshold();
@ -336,6 +361,8 @@ void ClustrixMonitor::refresh_nodes()
mysql_free_result(pResult);
// Any nodes that were not found are not available, so their
// state must be set accordingly.
for (const auto nid : nids)
{
auto it = m_nodes.find(nid);