Add option for failover recovery in mysqlmon
The `failover_recovery` option allows failed servers to rejoin the cluster. This should make using MaxScale with two node clusters easier. One use case for this is when the replication-manager promotes the last node in the cluster as the master. When this is done, the slave configuration is cleared and the read-only mode is disabled. Since the failover requires that the server is not configured as a slave and that it is not in read-only mode, it is safe to use `failover_recovery` with replication-manager.
This commit is contained in:
@ -164,6 +164,21 @@ can start is `monitor_interval * failcount`. This means that to trigger a
|
|||||||
failover after 10 seconds of master failure with a _monitor_interval_ of 1000
|
failover after 10 seconds of master failure with a _monitor_interval_ of 1000
|
||||||
milliseconds, the value of _failcount_ must be 10.
|
milliseconds, the value of _failcount_ must be 10.
|
||||||
|
|
||||||
|
### `failover_recovery`
|
||||||
|
|
||||||
|
Allow recovery after failover. This feature takes a boolean parameter is
|
||||||
|
disabled by default.
|
||||||
|
|
||||||
|
Normally if a failover has been triggered and the last remaining server is
|
||||||
|
chosen as the master, the monitor will set all of the failed servers into
|
||||||
|
maintenance mode. When this option is enabled, the failed servers are allowed to
|
||||||
|
rejoin the cluster.
|
||||||
|
|
||||||
|
This option should be enabled when failover in MaxScale is used in conjunction
|
||||||
|
with an external agent that resets the slave status for new master servers. One
|
||||||
|
of these agents is the _replication-manager_ which clears the slave
|
||||||
|
configuration for each new master and removes the read-only mode.
|
||||||
|
|
||||||
## Example 1 - Monitor script
|
## Example 1 - Monitor script
|
||||||
|
|
||||||
Here is an example shell script which sends an email to an admin when a server goes down.
|
Here is an example shell script which sends an email to an admin when a server goes down.
|
||||||
|
@ -77,6 +77,7 @@ typedef struct
|
|||||||
bool failover; /**< If simple failover is enabled */
|
bool failover; /**< If simple failover is enabled */
|
||||||
int failcount; /**< How many monitoring cycles servers must be
|
int failcount; /**< How many monitoring cycles servers must be
|
||||||
down before failover is initiated */
|
down before failover is initiated */
|
||||||
|
bool failover_recovery; /**< Allow servers to rejoin the cluster in failover mode */
|
||||||
bool warn_failover; /**< Log a warning when failover happens */
|
bool warn_failover; /**< Log a warning when failover happens */
|
||||||
} MYSQL_MONITOR;
|
} MYSQL_MONITOR;
|
||||||
|
|
||||||
|
@ -127,6 +127,7 @@ MXS_MODULE* MXS_CREATE_MODULE()
|
|||||||
{"multimaster", MXS_MODULE_PARAM_BOOL, "false"},
|
{"multimaster", MXS_MODULE_PARAM_BOOL, "false"},
|
||||||
{"failover", MXS_MODULE_PARAM_BOOL, "false"},
|
{"failover", MXS_MODULE_PARAM_BOOL, "false"},
|
||||||
{"failcount", MXS_MODULE_PARAM_COUNT, "5"},
|
{"failcount", MXS_MODULE_PARAM_COUNT, "5"},
|
||||||
|
{"failover_recovery", MXS_MODULE_PARAM_BOOL, "false"},
|
||||||
{
|
{
|
||||||
"script",
|
"script",
|
||||||
MXS_MODULE_PARAM_PATH,
|
MXS_MODULE_PARAM_PATH,
|
||||||
@ -280,6 +281,7 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
|||||||
handle->multimaster = config_get_bool(params, "multimaster");
|
handle->multimaster = config_get_bool(params, "multimaster");
|
||||||
handle->failover = config_get_bool(params, "failover");
|
handle->failover = config_get_bool(params, "failover");
|
||||||
handle->failcount = config_get_integer(params, "failcount");
|
handle->failcount = config_get_integer(params, "failcount");
|
||||||
|
handle->failover_recovery = config_get_bool(params, "failover_recovery");
|
||||||
handle->mysql51_replication = config_get_bool(params, "mysql51_replication");
|
handle->mysql51_replication = config_get_bool(params, "mysql51_replication");
|
||||||
handle->script = config_copy_string(params, "script");
|
handle->script = config_copy_string(params, "script");
|
||||||
handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
|
handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
|
||||||
@ -1006,9 +1008,10 @@ void do_failover(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *db)
|
|||||||
{
|
{
|
||||||
if (!SERVER_IS_MASTER(db->server) && handle->warn_failover)
|
if (!SERVER_IS_MASTER(db->server) && handle->warn_failover)
|
||||||
{
|
{
|
||||||
MXS_WARNING("Failover initiated, server '%s' is now the master. "
|
MXS_WARNING("Failover initiated, server '%s' is now the master.%s",
|
||||||
"All other servers are set into maintenance mode.",
|
db->server->unique_name,
|
||||||
db->server->unique_name);
|
handle->failover_recovery ?
|
||||||
|
"" : " All other servers are set into maintenance mode.");
|
||||||
handle->warn_failover = false;
|
handle->warn_failover = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1016,7 +1019,7 @@ void do_failover(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *db)
|
|||||||
monitor_set_pending_status(db, SERVER_MASTER);
|
monitor_set_pending_status(db, SERVER_MASTER);
|
||||||
monitor_clear_pending_status(db, SERVER_SLAVE);
|
monitor_clear_pending_status(db, SERVER_SLAVE);
|
||||||
}
|
}
|
||||||
else
|
else if (!handle->failover_recovery)
|
||||||
{
|
{
|
||||||
server_set_status_nolock(db->server, SERVER_MAINT);
|
server_set_status_nolock(db->server, SERVER_MAINT);
|
||||||
monitor_set_pending_status(db, SERVER_MAINT);
|
monitor_set_pending_status(db, SERVER_MAINT);
|
||||||
|
Reference in New Issue
Block a user