diff --git a/include/maxscale/config.h b/include/maxscale/config.h index d727cf648..ee3868a70 100644 --- a/include/maxscale/config.h +++ b/include/maxscale/config.h @@ -212,7 +212,7 @@ typedef struct unsigned int auth_read_timeout; /**< Read timeout for the user authentication */ unsigned int auth_write_timeout; /**< Write timeout for the user authentication */ bool skip_permission_checks; /**< Skip service and monitor permission checks */ - bool passive; /**< True if MaxScale is in passive mode */ + int32_t passive; /**< True if MaxScale is in passive mode */ int64_t promoted_at; /**< Time when this Maxscale instance was * promoted from a passive to an active */ char qc_name[PATH_MAX]; /**< The name of the query classifier to load */ diff --git a/include/maxscale/monitor.h b/include/maxscale/monitor.h index f8d1de5c0..343f51124 100644 --- a/include/maxscale/monitor.h +++ b/include/maxscale/monitor.h @@ -205,8 +205,7 @@ struct mxs_monitor bool active; /**< True if monitor is active */ time_t journal_max_age; /**< Maximum age of journal file */ uint32_t script_timeout; /**< Timeout in seconds for the monitor scripts */ - int64_t last_master_up; /**< Time when the last master_up event was triggered */ - int64_t last_master_down; /**< Time when the last master_down event was triggered */ + bool master_has_failed; /**< Set to true when the latest event is a master_down event */ struct mxs_monitor *next; /**< Next monitor in the linked list */ }; diff --git a/include/maxscale/server.h b/include/maxscale/server.h index 457541097..10c7dbc88 100644 --- a/include/maxscale/server.h +++ b/include/maxscale/server.h @@ -136,6 +136,7 @@ typedef struct server bool is_active; /**< Server is active and has not been "destroyed" */ bool proxy_protocol; /**< Send proxy-protocol header to backend when connecting client sessions. */ int last_event; /**< The last event that occurred on this server */ + bool active_event; /**< Event observed when MaxScale was active */ int64_t triggered_at; /**< Time when the last event was triggered */ #if defined(SS_DEBUG) skygw_chk_t server_chk_tail; diff --git a/server/core/config_runtime.cc b/server/core/config_runtime.cc index 332ecdd56..623bcceed 100644 --- a/server/core/config_runtime.cc +++ b/server/core/config_runtime.cc @@ -763,10 +763,10 @@ bool runtime_alter_maxscale(const char* name, const char* value) if (cnf.passive && !boolval) { // This MaxScale is being promoted to the active instance - cnf.promoted_at = hkheartbeat; + atomic_store_int64(&cnf.promoted_at, hkheartbeat); } - cnf.passive = boolval; + atomic_store_int32(&cnf.passive, boolval); rval = true; } else diff --git a/server/core/monitor.cc b/server/core/monitor.cc index 850133390..190836a1f 100644 --- a/server/core/monitor.cc +++ b/server/core/monitor.cc @@ -1755,16 +1755,17 @@ void mon_process_state_changes(MXS_MONITOR *monitor, const char *script, uint64_ mxs_monitor_event_t event = mon_get_event_type(ptr); ptr->server->last_event = event; ptr->server->triggered_at = hkheartbeat; + ptr->server->active_event = !atomic_load_int32(&config_get_global_options()->passive); ptr->new_event = true; mon_log_state_change(ptr); if (event == MASTER_DOWN_EVENT) { - monitor->last_master_down = hkheartbeat; + monitor->master_has_failed = true; } else if (event == MASTER_UP_EVENT || event == NEW_MASTER_EVENT) { - monitor->last_master_up = hkheartbeat; + monitor->master_has_failed = false; } if (script && (events & event)) diff --git a/server/modules/monitor/mysqlmon/mysql_mon.cc b/server/modules/monitor/mysqlmon/mysql_mon.cc index dbfd4596c..7f4378f4c 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.cc +++ b/server/modules/monitor/mysqlmon/mysql_mon.cc @@ -2892,43 +2892,32 @@ bool mon_process_failover(MYSQL_MONITOR* monitor, const char* failover_script, u for (MXS_MONITORED_SERVER *ptr = monitor->monitor->monitored_servers; ptr; ptr = ptr->next) { - if (mon_status_changed(ptr)) + if (ptr->new_event && !cnf->passive && + ptr->server->last_event == MASTER_DOWN_EVENT) { - if (ptr->server->last_event == MASTER_DOWN_EVENT) + if (failed_master) { - if (!cnf->passive) - { - if (failed_master) - { - MXS_ALERT("Multiple failed master servers detected: " - "'%s' is the first master to fail but server " - "'%s' has also triggered a master_down event.", - failed_master->server->unique_name, - ptr->server->unique_name); - return false; - } - else - { - failed_master = ptr; - } - } + MXS_ALERT("Multiple failed master servers detected: " + "'%s' is the first master to fail but server " + "'%s' has also triggered a master_down event.", + failed_master->server->unique_name, + ptr->server->unique_name); + return false; } - } - else - { - /** - * If a master_down event was triggered when this MaxScale was - * passive, we need to execute the failover script again if no new - * masters have appeared and this MaxScale has been set as active - * since the event took place. - */ - if (!cnf->passive && // This is not a passive MaxScale - ptr->server->last_event == MASTER_DOWN_EVENT && // This is a master that went down - cnf->promoted_at >= ptr->server->triggered_at && // Promoted to active after the event took place - ptr->new_event && // Event has not yet been processed - monitor->monitor->last_master_down > monitor->monitor->last_master_up) // Latest relevant event + if (ptr->server->active_event) { + // MaxScale was active when the event took place + failed_master = ptr; + ptr->new_event = false; + } + else if (monitor->monitor->master_has_failed) + { + /** + * If a master_down event was triggered when this MaxScale was + * passive, we need to execute the failover script again if no new + * masters have appeared. + */ int64_t timeout = SEC_TO_HB(failover_timeout); int64_t t = hkheartbeat - ptr->server->triggered_at;