Clarify master failure verification
The two previous functions were somewhat overlapping.
This commit is contained in:
@ -1448,41 +1448,34 @@ static bool do_show_slave_status(MYSQL_MONITOR* mon,
|
|||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool master_maybe_dead(MYSQL_MONITOR* handle)
|
/**
|
||||||
|
* Check if a slave is receiving events from master.
|
||||||
|
*
|
||||||
|
* @param handle Cluster monitor
|
||||||
|
* @return True, if a slave has an event more recent than master_failure_timeout.
|
||||||
|
*/
|
||||||
|
static bool slave_receiving_events(MYSQL_MONITOR* handle)
|
||||||
{
|
{
|
||||||
return handle->verify_master_failure && handle->master &&
|
ss_dassert(handle->master);
|
||||||
SERVER_IS_DOWN(handle->master->server);
|
bool received_event = false;
|
||||||
}
|
long master_id = handle->master->server->node_id;
|
||||||
|
for (MXS_MONITORED_SERVER* server = handle->monitor->monitored_servers; server; server = server->next)
|
||||||
static bool master_still_alive(MYSQL_MONITOR* handle)
|
|
||||||
{
|
|
||||||
bool rval = true;
|
|
||||||
|
|
||||||
if (handle->master && SERVER_IS_DOWN(handle->master->server))
|
|
||||||
{
|
{
|
||||||
// We have a master and it appears to be dead
|
MySqlServerInfo* info = get_server_info(handle, server);
|
||||||
rval = false;
|
|
||||||
|
|
||||||
for (MXS_MONITORED_SERVER* s = handle->monitor->monitored_servers; s; s = s->next)
|
if (info->slave_configured &&
|
||||||
|
info->slave_status.master_server_id == master_id &&
|
||||||
|
difftime(time(NULL), info->latest_event) < handle->master_failure_timeout)
|
||||||
{
|
{
|
||||||
MySqlServerInfo* info = get_server_info(handle, s);
|
/**
|
||||||
|
* The slave is still connected to the correct master and has received events. This means that
|
||||||
if (info->slave_configured &&
|
* while MaxScale can't connect to the master, it's probably still alive.
|
||||||
info->slave_status.master_server_id == handle->master->server->node_id &&
|
*/
|
||||||
difftime(time(NULL), info->latest_event) < handle->master_failure_timeout)
|
received_event = true;
|
||||||
{
|
break;
|
||||||
/**
|
|
||||||
* The slave is still connected to the correct master and has
|
|
||||||
* received events. This means that the master is not dead, but
|
|
||||||
* we just can't connect to it.
|
|
||||||
*/
|
|
||||||
rval = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return received_event;
|
||||||
return rval;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void monitor_mysql_db(MYSQL_MONITOR* mon,
|
static inline void monitor_mysql_db(MYSQL_MONITOR* mon,
|
||||||
@ -2383,7 +2376,9 @@ monitorMain(void *arg)
|
|||||||
handle->auto_failover = false;
|
handle->auto_failover = false;
|
||||||
disable_setting(handle, CN_AUTO_FAILOVER);
|
disable_setting(handle, CN_AUTO_FAILOVER);
|
||||||
}
|
}
|
||||||
else if (master_maybe_dead(handle) && master_still_alive(handle))
|
// If master seems to be down, check if slaves are receiving events.
|
||||||
|
else if (handle->verify_master_failure && handle->master &&
|
||||||
|
SERVER_IS_DOWN(handle->master->server) && slave_receiving_events(handle))
|
||||||
{
|
{
|
||||||
MXS_INFO("Master failure not yet confirmed by slaves, delaying failover.");
|
MXS_INFO("Master failure not yet confirmed by slaves, delaying failover.");
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user