diff --git a/Documentation/Monitors/MySQL-Monitor.md b/Documentation/Monitors/MySQL-Monitor.md index fe7ff6cf1..050109c2b 100644 --- a/Documentation/Monitors/MySQL-Monitor.md +++ b/Documentation/Monitors/MySQL-Monitor.md @@ -119,6 +119,51 @@ This functionality is similar to the [Multi-Master Monitor](MM-Monitor.md) functionality. The only difference is that the MySQL monitor will also detect traditional Master-Slave topologies. +### `failover` + +Failover mode. This feature takes a boolean parameter is disabled by default. + +This parameter is intended to be used with simple, two node master-slave pairs +where the failure of the master can be resolved by "promoting" the slave as the +new master. Normally this is done by using an external agent of some sort +(possibly triggered by MaxScale's monitor scripts), like +[MariaDB Replication Manager](https://github.com/tanji/replication-manager) +or [MHA](https://code.google.com/p/mysql-master-ha/). + +The failover mode in mysqlmon is completely passive in the sense that it does +not modify the cluster or any servers in it. It labels a slave server as a +master server when there is only one running server. Before a failover can be +initiated, the following conditions must have been met: + +- The monitor has repeatedly failed to connect to the failed servers +- There is only one running server among the monitored servers +- @@read_only is not enabled on the last running server + +When these conditions are met, the monitor assigns the last remaining server the +master status and puts all other servers into maintenance mode. This is done to +prevent accidental use of the failed servers if they came back online. + +When the failed servers come back up, the maintenance mode needs to be manually +cleared once replication has been set up. + +**Note**: A failover will cause permanent changes in the data of the promoted + server. Only use this feature if you know that the slave servers are capable + of acting as master servers. + +### `failcount` + +Number of failures that must occur on all failed servers before a failover is +initiated. The default value is 5 failures. + +The monitor will attemt to contact all servers once per monitoring cycle. When +_failover_ mode is enabled, all of the failed servers must fail _failcount_ +number of connection attemps before a failover is initiated. + +The formula for calculating the actual number of milliseconds before failover +can start is `monitor_interval * failcount`. This means that to trigger a +failover after 10 seconds of master failure with a _monitor_interval_ of 1000 +milliseconds, the value of _failcount_ must be 10. + ## Example 1 - Monitor script Here is an example shell script which sends an email to an admin when a server goes down. diff --git a/server/core/config.c b/server/core/config.c index baaec3596..df8fb44c7 100644 --- a/server/core/config.c +++ b/server/core/config.c @@ -185,6 +185,8 @@ static char *monitor_params[] = "disable_master_role_setting", "use_priority", "multimaster", + "failover", + "failcount", NULL }; diff --git a/server/modules/monitor/mysqlmon.h b/server/modules/monitor/mysqlmon.h index e2b5f17c5..3eb551ccc 100644 --- a/server/modules/monitor/mysqlmon.h +++ b/server/modules/monitor/mysqlmon.h @@ -50,6 +50,8 @@ * @endverbatim */ +#define MYSQLMON_DEFAULT_FAILCOUNT 5 + /** * The handle for an instance of a MySQL Monitor module */ @@ -72,6 +74,9 @@ typedef struct char* script; /*< Script to call when state changes occur on servers */ bool events[MAX_MONITOR_EVENT]; /*< enabled events */ HASHTABLE *server_info; /**< Contains server specific information */ + bool failover; /**< If simple failover is enabled */ + int failcount; /**< How many monitoring cycles servers must be + down before failover is initiated */ } MYSQL_MONITOR; #endif diff --git a/server/modules/monitor/mysqlmon/mysql_mon.c b/server/modules/monitor/mysqlmon/mysql_mon.c index c45e7b60b..0b289ce74 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.c +++ b/server/modules/monitor/mysqlmon/mysql_mon.c @@ -273,6 +273,8 @@ startMonitor(MONITOR *monitor, const CONFIG_PARAMETER* params) handle->script = NULL; handle->multimaster = false; handle->mysql51_replication = false; + handle->failover = false; + handle->failcount = MYSQLMON_DEFAULT_FAILCOUNT; memset(handle->events, false, sizeof(handle->events)); spinlock_init(&handle->lock); } @@ -295,6 +297,19 @@ startMonitor(MONITOR *monitor, const CONFIG_PARAMETER* params) { handle->multimaster = config_truth_value(params->value); } + else if (!strcmp(params->name, "failover")) + { + handle->failover = config_truth_value(params->value); + } + else if (!strcmp(params->name, "failcount")) + { + handle->failcount = atoi(params->value); + if (handle->failcount <= 0) + { + MXS_ERROR("[%s] Invalid value for 'failcount': %s", monitor->name, params->value); + error = true; + } + } else if (!strcmp(params->name, "script")) { if (externcmd_can_execute(params->value)) @@ -352,6 +367,7 @@ startMonitor(MONITOR *monitor, const CONFIG_PARAMETER* params) hashtable_free(handle->server_info); MXS_FREE(handle->script); MXS_FREE(handle); + handle = NULL; } else if (thread_start(&handle->thread, monitorMain, monitor) == NULL) { @@ -1021,6 +1037,80 @@ void find_graph_cycles(MYSQL_MONITOR *handle, MONITOR_SERVERS *database, int nse } } +/** + * @brief Check whether failover conditions have been met + * + * This function checks whether all the conditions to trigger a failover have + * been met. For a failover to happen, only one server must be available and + * other servers must have passed the configured tolerance level of failures. + * + * @param handle Monitor instance + * @param db Monitor servers + * + * @return True if failover is required + */ +bool failover_required(MYSQL_MONITOR *handle, MONITOR_SERVERS *db) +{ + int candidates = 0; + + while (db) + { + if (SERVER_IS_RUNNING(db->server)) + { + candidates++; + MYSQL_SERVER_INFO *server_info = hashtable_fetch(handle->server_info, db->server->unique_name); + + if (server_info->read_only || candidates > 1) + { + return false; + } + } + else if (db->mon_err_count < handle->failcount) + { + return false; + } + + db = db->next; + } + + return candidates == 1; +} + +/** + * @brief Initiate simple failover + * + * This function does the actual failover by assigning the last remaining server + * the master status and setting all other servers into maintenance mode. By + * setting the servers into maintenance mode, we prevent any possible conflicts + * when the failed servers come back up. + * + * @param handle Monitor instance + * @param db Monitor servers + */ +void do_failover(MYSQL_MONITOR *handle, MONITOR_SERVERS *db) +{ + while (db) + { + if (SERVER_IS_RUNNING(db->server)) + { + if (!SERVER_IS_MASTER(db->server)) + { + MXS_WARNING("Failover initiated, server '%s' is now the master. " + "All other servers are set into maintenance mode.", + db->server->unique_name); + } + + monitor_set_pending_status(db, SERVER_MASTER); + monitor_clear_pending_status(db, SERVER_SLAVE); + } + else + { + monitor_set_pending_status(db, SERVER_MAINT); + } + db = db->next; + } +} + /** * The entry point for the monitoring module thread * @@ -1296,6 +1386,17 @@ monitorMain(void *arg) ptr = ptr->next; } + /** Now that all servers have their status correctly set, we can check + if we need to do a failover */ + if (handle->failover) + { + if (failover_required(handle, mon->databases)) + { + /** Other servers have died, initiate a failover to the last remaining server */ + do_failover(handle, mon->databases); + } + } + ptr = mon->databases; monitor_event_t evtype; while (ptr)