MXS-1679 Check for existence of master before continuing failover checks
Seems to fix the issue with MaxScale detecting an old master down event.
This commit is contained in:
		@ -3330,49 +3330,50 @@ void check_maxscale_schema_replication(MXS_MONITOR *monitor)
 | 
			
		||||
bool mon_process_failover(MYSQL_MONITOR* monitor, uint32_t failover_timeout, bool* cluster_modified_out)
 | 
			
		||||
{
 | 
			
		||||
    ss_dassert(*cluster_modified_out == false);
 | 
			
		||||
    if (config_get_global_options()->passive ||
 | 
			
		||||
        (monitor->master && SERVER_IS_MASTER(monitor->master->server)))
 | 
			
		||||
    {
 | 
			
		||||
        return true;
 | 
			
		||||
    }
 | 
			
		||||
    bool rval = true;
 | 
			
		||||
    MXS_CONFIG* cnf = config_get_global_options();
 | 
			
		||||
    MXS_MONITORED_SERVER* failed_master = NULL;
 | 
			
		||||
 | 
			
		||||
    if (!cnf->passive)
 | 
			
		||||
    for (MXS_MONITORED_SERVER *ptr = monitor->monitor->monitored_servers; ptr; ptr = ptr->next)
 | 
			
		||||
    {
 | 
			
		||||
        for (MXS_MONITORED_SERVER *ptr = monitor->monitor->monitored_servers; ptr; ptr = ptr->next)
 | 
			
		||||
        if (ptr->new_event && ptr->server->last_event == MASTER_DOWN_EVENT)
 | 
			
		||||
        {
 | 
			
		||||
            if (ptr->new_event && ptr->server->last_event == MASTER_DOWN_EVENT)
 | 
			
		||||
            if (failed_master)
 | 
			
		||||
            {
 | 
			
		||||
                if (failed_master)
 | 
			
		||||
                {
 | 
			
		||||
                    MXS_ALERT("Multiple failed master servers detected: "
 | 
			
		||||
                              "'%s' is the first master to fail but server "
 | 
			
		||||
                              "'%s' has also triggered a master_down event.",
 | 
			
		||||
                              failed_master->server->unique_name,
 | 
			
		||||
                              ptr->server->unique_name);
 | 
			
		||||
                    return false;
 | 
			
		||||
                }
 | 
			
		||||
                MXS_ALERT("Multiple failed master servers detected: "
 | 
			
		||||
                          "'%s' is the first master to fail but server "
 | 
			
		||||
                          "'%s' has also triggered a master_down event.",
 | 
			
		||||
                          failed_master->server->unique_name,
 | 
			
		||||
                          ptr->server->unique_name);
 | 
			
		||||
                return false;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
                if (ptr->server->active_event)
 | 
			
		||||
            if (ptr->server->active_event)
 | 
			
		||||
            {
 | 
			
		||||
                // MaxScale was active when the event took place
 | 
			
		||||
                failed_master = ptr;
 | 
			
		||||
            }
 | 
			
		||||
            else if (monitor->monitor->master_has_failed)
 | 
			
		||||
            {
 | 
			
		||||
                /**
 | 
			
		||||
                 * If a master_down event was triggered when this MaxScale was
 | 
			
		||||
                 * passive, we need to execute the failover script again if no new
 | 
			
		||||
                 * masters have appeared.
 | 
			
		||||
                 */
 | 
			
		||||
                int64_t timeout = SEC_TO_HB(failover_timeout);
 | 
			
		||||
                int64_t t = hkheartbeat - ptr->server->triggered_at;
 | 
			
		||||
 | 
			
		||||
                if (t > timeout)
 | 
			
		||||
                {
 | 
			
		||||
                    // MaxScale was active when the event took place
 | 
			
		||||
                    MXS_WARNING("Failover of server '%s' did not take place within "
 | 
			
		||||
                                "%u seconds, failover needs to be re-triggered",
 | 
			
		||||
                                ptr->server->unique_name, failover_timeout);
 | 
			
		||||
                    failed_master = ptr;
 | 
			
		||||
                }
 | 
			
		||||
                else if (monitor->monitor->master_has_failed)
 | 
			
		||||
                {
 | 
			
		||||
                    /**
 | 
			
		||||
                     * If a master_down event was triggered when this MaxScale was
 | 
			
		||||
                     * passive, we need to execute the failover script again if no new
 | 
			
		||||
                     * masters have appeared.
 | 
			
		||||
                     */
 | 
			
		||||
                    int64_t timeout = SEC_TO_HB(failover_timeout);
 | 
			
		||||
                    int64_t t = hkheartbeat - ptr->server->triggered_at;
 | 
			
		||||
 | 
			
		||||
                    if (t > timeout)
 | 
			
		||||
                    {
 | 
			
		||||
                        MXS_WARNING("Failover of server '%s' did not take place within "
 | 
			
		||||
                                    "%u seconds, failover needs to be re-triggered",
 | 
			
		||||
                                    ptr->server->unique_name, failover_timeout);
 | 
			
		||||
                        failed_master = ptr;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user