diff --git a/Documentation/Monitors/MySQL-Monitor.md b/Documentation/Monitors/MySQL-Monitor.md index 050109c2b..f0be5738c 100644 --- a/Documentation/Monitors/MySQL-Monitor.md +++ b/Documentation/Monitors/MySQL-Monitor.md @@ -164,6 +164,21 @@ can start is `monitor_interval * failcount`. This means that to trigger a failover after 10 seconds of master failure with a _monitor_interval_ of 1000 milliseconds, the value of _failcount_ must be 10. +### `failover_recovery` + +Allow recovery after failover. This feature takes a boolean parameter is +disabled by default. + +Normally if a failover has been triggered and the last remaining server is +chosen as the master, the monitor will set all of the failed servers into +maintenance mode. When this option is enabled, the failed servers are allowed to +rejoin the cluster. + +This option should be enabled when failover in MaxScale is used in conjunction +with an external agent that resets the slave status for new master servers. One +of these agents is the _replication-manager_ which clears the slave +configuration for each new master and removes the read-only mode. + ## Example 1 - Monitor script Here is an example shell script which sends an email to an admin when a server goes down. diff --git a/server/modules/monitor/mysqlmon.h b/server/modules/monitor/mysqlmon.h index d2cfd5f02..eca4b6b83 100644 --- a/server/modules/monitor/mysqlmon.h +++ b/server/modules/monitor/mysqlmon.h @@ -77,6 +77,7 @@ typedef struct bool failover; /**< If simple failover is enabled */ int failcount; /**< How many monitoring cycles servers must be down before failover is initiated */ + bool failover_recovery; /**< Allow servers to rejoin the cluster in failover mode */ bool warn_failover; /**< Log a warning when failover happens */ } MYSQL_MONITOR; diff --git a/server/modules/monitor/mysqlmon/mysql_mon.c b/server/modules/monitor/mysqlmon/mysql_mon.c index 4961ddd27..9caa297f8 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.c +++ b/server/modules/monitor/mysqlmon/mysql_mon.c @@ -127,6 +127,7 @@ MXS_MODULE* MXS_CREATE_MODULE() {"multimaster", MXS_MODULE_PARAM_BOOL, "false"}, {"failover", MXS_MODULE_PARAM_BOOL, "false"}, {"failcount", MXS_MODULE_PARAM_COUNT, "5"}, + {"failover_recovery", MXS_MODULE_PARAM_BOOL, "false"}, { "script", MXS_MODULE_PARAM_PATH, @@ -280,6 +281,7 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params) handle->multimaster = config_get_bool(params, "multimaster"); handle->failover = config_get_bool(params, "failover"); handle->failcount = config_get_integer(params, "failcount"); + handle->failover_recovery = config_get_bool(params, "failover_recovery"); handle->mysql51_replication = config_get_bool(params, "mysql51_replication"); handle->script = config_copy_string(params, "script"); handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values); @@ -1006,9 +1008,10 @@ void do_failover(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *db) { if (!SERVER_IS_MASTER(db->server) && handle->warn_failover) { - MXS_WARNING("Failover initiated, server '%s' is now the master. " - "All other servers are set into maintenance mode.", - db->server->unique_name); + MXS_WARNING("Failover initiated, server '%s' is now the master.%s", + db->server->unique_name, + handle->failover_recovery ? + "" : " All other servers are set into maintenance mode."); handle->warn_failover = false; } @@ -1016,7 +1019,7 @@ void do_failover(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *db) monitor_set_pending_status(db, SERVER_MASTER); monitor_clear_pending_status(db, SERVER_SLAVE); } - else + else if (!handle->failover_recovery) { server_set_status_nolock(db->server, SERVER_MAINT); monitor_set_pending_status(db, SERVER_MAINT);