Add option for failover recovery in mysqlmon
The `failover_recovery` option allows failed servers to rejoin the cluster. This should make using MaxScale with two node clusters easier. One use case for this is when the replication-manager promotes the last node in the cluster as the master. When this is done, the slave configuration is cleared and the read-only mode is disabled. Since the failover requires that the server is not configured as a slave and that it is not in read-only mode, it is safe to use `failover_recovery` with replication-manager.
This commit is contained in:
parent
61f2d96a58
commit
e7c7caebad
@ -164,6 +164,21 @@ can start is `monitor_interval * failcount`. This means that to trigger a
|
||||
failover after 10 seconds of master failure with a _monitor_interval_ of 1000
|
||||
milliseconds, the value of _failcount_ must be 10.
|
||||
|
||||
### `failover_recovery`
|
||||
|
||||
Allow recovery after failover. This feature takes a boolean parameter is
|
||||
disabled by default.
|
||||
|
||||
Normally if a failover has been triggered and the last remaining server is
|
||||
chosen as the master, the monitor will set all of the failed servers into
|
||||
maintenance mode. When this option is enabled, the failed servers are allowed to
|
||||
rejoin the cluster.
|
||||
|
||||
This option should be enabled when failover in MaxScale is used in conjunction
|
||||
with an external agent that resets the slave status for new master servers. One
|
||||
of these agents is the _replication-manager_ which clears the slave
|
||||
configuration for each new master and removes the read-only mode.
|
||||
|
||||
## Example 1 - Monitor script
|
||||
|
||||
Here is an example shell script which sends an email to an admin when a server goes down.
|
||||
|
@ -77,6 +77,7 @@ typedef struct
|
||||
bool failover; /**< If simple failover is enabled */
|
||||
int failcount; /**< How many monitoring cycles servers must be
|
||||
down before failover is initiated */
|
||||
bool failover_recovery; /**< Allow servers to rejoin the cluster in failover mode */
|
||||
bool warn_failover; /**< Log a warning when failover happens */
|
||||
} MYSQL_MONITOR;
|
||||
|
||||
|
@ -127,6 +127,7 @@ MXS_MODULE* MXS_CREATE_MODULE()
|
||||
{"multimaster", MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{"failover", MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{"failcount", MXS_MODULE_PARAM_COUNT, "5"},
|
||||
{"failover_recovery", MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{
|
||||
"script",
|
||||
MXS_MODULE_PARAM_PATH,
|
||||
@ -280,6 +281,7 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
||||
handle->multimaster = config_get_bool(params, "multimaster");
|
||||
handle->failover = config_get_bool(params, "failover");
|
||||
handle->failcount = config_get_integer(params, "failcount");
|
||||
handle->failover_recovery = config_get_bool(params, "failover_recovery");
|
||||
handle->mysql51_replication = config_get_bool(params, "mysql51_replication");
|
||||
handle->script = config_copy_string(params, "script");
|
||||
handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
|
||||
@ -1006,9 +1008,10 @@ void do_failover(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *db)
|
||||
{
|
||||
if (!SERVER_IS_MASTER(db->server) && handle->warn_failover)
|
||||
{
|
||||
MXS_WARNING("Failover initiated, server '%s' is now the master. "
|
||||
"All other servers are set into maintenance mode.",
|
||||
db->server->unique_name);
|
||||
MXS_WARNING("Failover initiated, server '%s' is now the master.%s",
|
||||
db->server->unique_name,
|
||||
handle->failover_recovery ?
|
||||
"" : " All other servers are set into maintenance mode.");
|
||||
handle->warn_failover = false;
|
||||
}
|
||||
|
||||
@ -1016,7 +1019,7 @@ void do_failover(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *db)
|
||||
monitor_set_pending_status(db, SERVER_MASTER);
|
||||
monitor_clear_pending_status(db, SERVER_SLAVE);
|
||||
}
|
||||
else
|
||||
else if (!handle->failover_recovery)
|
||||
{
|
||||
server_set_status_nolock(db->server, SERVER_MAINT);
|
||||
monitor_set_pending_status(db, SERVER_MAINT);
|
||||
|
Loading…
x
Reference in New Issue
Block a user