MXS-1679 Check for existence of master before continuing failover checks
Seems to fix the issue with MaxScale detecting an old master down event.
This commit is contained in:
		@ -3330,49 +3330,50 @@ void check_maxscale_schema_replication(MXS_MONITOR *monitor)
 | 
				
			|||||||
bool mon_process_failover(MYSQL_MONITOR* monitor, uint32_t failover_timeout, bool* cluster_modified_out)
 | 
					bool mon_process_failover(MYSQL_MONITOR* monitor, uint32_t failover_timeout, bool* cluster_modified_out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    ss_dassert(*cluster_modified_out == false);
 | 
					    ss_dassert(*cluster_modified_out == false);
 | 
				
			||||||
 | 
					    if (config_get_global_options()->passive ||
 | 
				
			||||||
 | 
					        (monitor->master && SERVER_IS_MASTER(monitor->master->server)))
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        return true;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
    bool rval = true;
 | 
					    bool rval = true;
 | 
				
			||||||
    MXS_CONFIG* cnf = config_get_global_options();
 | 
					 | 
				
			||||||
    MXS_MONITORED_SERVER* failed_master = NULL;
 | 
					    MXS_MONITORED_SERVER* failed_master = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (!cnf->passive)
 | 
					    for (MXS_MONITORED_SERVER *ptr = monitor->monitor->monitored_servers; ptr; ptr = ptr->next)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        for (MXS_MONITORED_SERVER *ptr = monitor->monitor->monitored_servers; ptr; ptr = ptr->next)
 | 
					        if (ptr->new_event && ptr->server->last_event == MASTER_DOWN_EVENT)
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            if (ptr->new_event && ptr->server->last_event == MASTER_DOWN_EVENT)
 | 
					            if (failed_master)
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                if (failed_master)
 | 
					                MXS_ALERT("Multiple failed master servers detected: "
 | 
				
			||||||
                {
 | 
					                          "'%s' is the first master to fail but server "
 | 
				
			||||||
                    MXS_ALERT("Multiple failed master servers detected: "
 | 
					                          "'%s' has also triggered a master_down event.",
 | 
				
			||||||
                              "'%s' is the first master to fail but server "
 | 
					                          failed_master->server->unique_name,
 | 
				
			||||||
                              "'%s' has also triggered a master_down event.",
 | 
					                          ptr->server->unique_name);
 | 
				
			||||||
                              failed_master->server->unique_name,
 | 
					                return false;
 | 
				
			||||||
                              ptr->server->unique_name);
 | 
					            }
 | 
				
			||||||
                    return false;
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                if (ptr->server->active_event)
 | 
					            if (ptr->server->active_event)
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                // MaxScale was active when the event took place
 | 
				
			||||||
 | 
					                failed_master = ptr;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            else if (monitor->monitor->master_has_failed)
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                /**
 | 
				
			||||||
 | 
					                 * If a master_down event was triggered when this MaxScale was
 | 
				
			||||||
 | 
					                 * passive, we need to execute the failover script again if no new
 | 
				
			||||||
 | 
					                 * masters have appeared.
 | 
				
			||||||
 | 
					                 */
 | 
				
			||||||
 | 
					                int64_t timeout = SEC_TO_HB(failover_timeout);
 | 
				
			||||||
 | 
					                int64_t t = hkheartbeat - ptr->server->triggered_at;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if (t > timeout)
 | 
				
			||||||
                {
 | 
					                {
 | 
				
			||||||
                    // MaxScale was active when the event took place
 | 
					                    MXS_WARNING("Failover of server '%s' did not take place within "
 | 
				
			||||||
 | 
					                                "%u seconds, failover needs to be re-triggered",
 | 
				
			||||||
 | 
					                                ptr->server->unique_name, failover_timeout);
 | 
				
			||||||
                    failed_master = ptr;
 | 
					                    failed_master = ptr;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                else if (monitor->monitor->master_has_failed)
 | 
					 | 
				
			||||||
                {
 | 
					 | 
				
			||||||
                    /**
 | 
					 | 
				
			||||||
                     * If a master_down event was triggered when this MaxScale was
 | 
					 | 
				
			||||||
                     * passive, we need to execute the failover script again if no new
 | 
					 | 
				
			||||||
                     * masters have appeared.
 | 
					 | 
				
			||||||
                     */
 | 
					 | 
				
			||||||
                    int64_t timeout = SEC_TO_HB(failover_timeout);
 | 
					 | 
				
			||||||
                    int64_t t = hkheartbeat - ptr->server->triggered_at;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    if (t > timeout)
 | 
					 | 
				
			||||||
                    {
 | 
					 | 
				
			||||||
                        MXS_WARNING("Failover of server '%s' did not take place within "
 | 
					 | 
				
			||||||
                                    "%u seconds, failover needs to be re-triggered",
 | 
					 | 
				
			||||||
                                    ptr->server->unique_name, failover_timeout);
 | 
					 | 
				
			||||||
                        failed_master = ptr;
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user