Fix master failure tracking

The master failure was assumed to be the only master related event for
each monitoring loop. If the master was switched by an external actor, the
monitor tracking would be out of sync.
This commit is contained in:
Markus Mäkelä 2017-10-27 14:38:21 +03:00
parent c7c670930c
commit 600509be4a

View File

@ -1740,6 +1740,9 @@ void servers_status_current_to_pending(MXS_MONITOR *monitor)
void mon_process_state_changes(MXS_MONITOR *monitor, const char *script, uint64_t events)
{
bool master_down = false;
bool master_up = false;
for (MXS_MONITORED_SERVER *ptr = monitor->monitored_servers; ptr; ptr = ptr->next)
{
if (mon_status_changed(ptr))
@ -1761,11 +1764,11 @@ void mon_process_state_changes(MXS_MONITOR *monitor, const char *script, uint64_
if (event == MASTER_DOWN_EVENT)
{
monitor->master_has_failed = true;
master_down = true;
}
else if (event == MASTER_UP_EVENT || event == NEW_MASTER_EVENT)
{
monitor->master_has_failed = false;
master_up = true;
}
if (script && (events & event))
@ -1774,6 +1777,23 @@ void mon_process_state_changes(MXS_MONITOR *monitor, const char *script, uint64_
}
}
}
if (master_down != master_up)
{
// We either lost the master or gained a new one
if (master_down)
{
monitor->master_has_failed = true;
}
else if (master_up)
{
monitor->master_has_failed = false;
}
}
else if (master_down && master_up)
{
MXS_INFO("Master switch detected: lost a master and gained a new one");
}
}
static const char* monitor_state_to_string(int state)