Clarify master failure verification

The two previous functions were somewhat overlapping.
This commit is contained in:
Esa Korhonen
2018-01-22 12:53:55 +02:00
parent a25e90643a
commit 257034bf3e

View File

@ -1448,41 +1448,34 @@ static bool do_show_slave_status(MYSQL_MONITOR* mon,
return rval; return rval;
} }
static inline bool master_maybe_dead(MYSQL_MONITOR* handle) /**
* Check if a slave is receiving events from master.
*
* @param handle Cluster monitor
* @return True, if a slave has an event more recent than master_failure_timeout.
*/
static bool slave_receiving_events(MYSQL_MONITOR* handle)
{ {
return handle->verify_master_failure && handle->master && ss_dassert(handle->master);
SERVER_IS_DOWN(handle->master->server); bool received_event = false;
} long master_id = handle->master->server->node_id;
for (MXS_MONITORED_SERVER* server = handle->monitor->monitored_servers; server; server = server->next)
static bool master_still_alive(MYSQL_MONITOR* handle)
{
bool rval = true;
if (handle->master && SERVER_IS_DOWN(handle->master->server))
{ {
// We have a master and it appears to be dead MySqlServerInfo* info = get_server_info(handle, server);
rval = false;
for (MXS_MONITORED_SERVER* s = handle->monitor->monitored_servers; s; s = s->next) if (info->slave_configured &&
info->slave_status.master_server_id == master_id &&
difftime(time(NULL), info->latest_event) < handle->master_failure_timeout)
{ {
MySqlServerInfo* info = get_server_info(handle, s); /**
* The slave is still connected to the correct master and has received events. This means that
if (info->slave_configured && * while MaxScale can't connect to the master, it's probably still alive.
info->slave_status.master_server_id == handle->master->server->node_id && */
difftime(time(NULL), info->latest_event) < handle->master_failure_timeout) received_event = true;
{ break;
/**
* The slave is still connected to the correct master and has
* received events. This means that the master is not dead, but
* we just can't connect to it.
*/
rval = true;
break;
}
} }
} }
return received_event;
return rval;
} }
static inline void monitor_mysql_db(MYSQL_MONITOR* mon, static inline void monitor_mysql_db(MYSQL_MONITOR* mon,
@ -2383,7 +2376,9 @@ monitorMain(void *arg)
handle->auto_failover = false; handle->auto_failover = false;
disable_setting(handle, CN_AUTO_FAILOVER); disable_setting(handle, CN_AUTO_FAILOVER);
} }
else if (master_maybe_dead(handle) && master_still_alive(handle)) // If master seems to be down, check if slaves are receiving events.
else if (handle->verify_master_failure && handle->master &&
SERVER_IS_DOWN(handle->master->server) && slave_receiving_events(handle))
{ {
MXS_INFO("Master failure not yet confirmed by slaves, delaying failover."); MXS_INFO("Master failure not yet confirmed by slaves, delaying failover.");
} }