MXS-1446: Move failover to mysqlmon
Split the state change processing and failover handling into two separate functions and added a call to the failover function into mysqlmon. This prevents unintended behavior when failover is enabled for non-mysqlmon monitors. The parameter itself still needs to be moved into mysqlmon. Moved the failover documentation to the mysqlmon documentation as it is specific to this monitor.
This commit is contained in:
parent
0d6c06f33d
commit
ef115208e6
@ -115,19 +115,6 @@ If the script execution exceeds the configured timeout, it is stopped by sending
|
||||
a SIGTERM signal to it. If the process does not stop, a SIGKILL signal will be
|
||||
sent to it once the execution time is greater than twice the configured timeout.
|
||||
|
||||
### `failover_timeout`
|
||||
|
||||
The timeout for the cluster failover in seconds. The default value is 90
|
||||
seconds.
|
||||
|
||||
If no successful failover takes place within the configured time period, a
|
||||
message is logged and the failover functionality is disabled.
|
||||
|
||||
This parameter also controls how long a MaxScale instance that has transitioned
|
||||
from passive to active will wait for a failover to take place after an apparent
|
||||
loss of a master server. If no new master server is detected within the
|
||||
configured time period, the failover will be initiated again.
|
||||
|
||||
### `events`
|
||||
|
||||
A list of event names which cause the script to be executed. If this option is not defined, all events cause the script to be executed. The list must contain a comma separated list of event names.
|
||||
|
@ -214,6 +214,35 @@ assigned the _Slave_ status which allows them to be used like normal slave
|
||||
servers. When the option is disabled, the servers will only receive the _Slave
|
||||
of External Server_ status and they will not be used.
|
||||
|
||||
### `failover`
|
||||
|
||||
Enable automated master failover. This parameter expects a boolean value and the
|
||||
default value is false.
|
||||
|
||||
When the failover functionality is enabled, traditional MariaDB Master-Slave
|
||||
clusters will automatically elect a new master if the old master goes down. The
|
||||
failover functionality will not take place when MaxScale is configured as a
|
||||
passive instance. For details on how MaxScale behaves in passive mode, see the
|
||||
following documentation of `failover_timeout`.
|
||||
|
||||
If an attempt at failover fails or multiple master servers are detected, an
|
||||
error is logged and the failover functionality is disabled. If this happens, the
|
||||
cluster must be fixed manually and the failover needs to be re-enabled via the
|
||||
REST API or MaxAdmin.
|
||||
|
||||
### `failover_timeout`
|
||||
|
||||
The timeout for the cluster failover in seconds. The default value is 90
|
||||
seconds.
|
||||
|
||||
If no successful failover takes place within the configured time period, a
|
||||
message is logged and the failover functionality is disabled.
|
||||
|
||||
This parameter also controls how long a MaxScale instance that has transitioned
|
||||
from passive to active will wait for a failover to take place after an apparent
|
||||
loss of a master server. If no new master server is detected within the
|
||||
configured time period, the failover will be initiated again.
|
||||
|
||||
## Using the MySQL Monitor With Binlogrouter
|
||||
|
||||
Since MaxScale 2.2 it's possible to detect a replication setup
|
||||
@ -252,5 +281,7 @@ script=mail_to_admin.sh
|
||||
events=master_down,slave_down
|
||||
```
|
||||
|
||||
When a master or a slave server goes down, the script is executed, a mail is sent and the administrator will be immediately notified of any possible problems.
|
||||
This is just a simple example showing what you can do with MaxScale and monitor scripts.
|
||||
When a master or a slave server goes down, the script is executed, a mail is
|
||||
sent and the administrator will be immediately notified of any possible
|
||||
problems. This is just a simple example showing what you can do with MaxScale
|
||||
and monitor scripts.
|
||||
|
@ -289,6 +289,22 @@ void release_monitor_servers(MXS_MONITOR *monitor);
|
||||
*/
|
||||
void mon_process_state_changes(MXS_MONITOR *monitor, const char *script, uint64_t events);
|
||||
|
||||
/**
|
||||
* @brief Process possible failover event
|
||||
*
|
||||
* If a master failure has occurred and MaxScale is configured with failover
|
||||
* functionality, this fuction executes an external failover program to elect
|
||||
* a new master server.
|
||||
*
|
||||
* This function should be called immediately after @c mon_process_state_changes.
|
||||
*
|
||||
* @param monitor Monitor whose cluster is processed
|
||||
*
|
||||
* @todo Currently this only works with flat replication topologies and
|
||||
* needs to be moved inside mysqlmon as it is MariaDB specific code.
|
||||
*/
|
||||
void mon_process_failover(MXS_MONITOR *monitor);
|
||||
|
||||
/**
|
||||
* @brief Hangup connections to failed servers
|
||||
*
|
||||
|
@ -1728,9 +1728,6 @@ void servers_status_current_to_pending(MXS_MONITOR *monitor)
|
||||
|
||||
void mon_process_state_changes(MXS_MONITOR *monitor, const char *script, uint64_t events)
|
||||
{
|
||||
MXS_CONFIG* cnf = config_get_global_options();
|
||||
MXS_MONITOR_SERVERS* failed_master = NULL;
|
||||
|
||||
for (MXS_MONITOR_SERVERS *ptr = monitor->databases; ptr; ptr = ptr->next)
|
||||
{
|
||||
if (mon_status_changed(ptr))
|
||||
@ -1752,11 +1749,6 @@ void mon_process_state_changes(MXS_MONITOR *monitor, const char *script, uint64_
|
||||
if (event == MASTER_DOWN_EVENT)
|
||||
{
|
||||
monitor->last_master_down = hkheartbeat;
|
||||
|
||||
if (monitor->failover && !cnf->passive)
|
||||
{
|
||||
failed_master = ptr;
|
||||
}
|
||||
}
|
||||
else if (event == MASTER_UP_EVENT || event == NEW_MASTER_EVENT)
|
||||
{
|
||||
@ -1768,6 +1760,40 @@ void mon_process_state_changes(MXS_MONITOR *monitor, const char *script, uint64_
|
||||
monitor_launch_script(monitor, ptr, script, monitor->script_timeout);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mon_process_failover(MXS_MONITOR *monitor)
|
||||
{
|
||||
MXS_CONFIG* cnf = config_get_global_options();
|
||||
MXS_MONITOR_SERVERS* failed_master = NULL;
|
||||
|
||||
for (MXS_MONITOR_SERVERS *ptr = monitor->databases; ptr; ptr = ptr->next)
|
||||
{
|
||||
if (mon_status_changed(ptr))
|
||||
{
|
||||
if (ptr->server->last_event == MASTER_DOWN_EVENT)
|
||||
{
|
||||
if (monitor->failover && !cnf->passive)
|
||||
{
|
||||
if (failed_master)
|
||||
{
|
||||
MXS_ALERT("Multiple failed master servers detected: "
|
||||
"'%s' is the first master to fail but server "
|
||||
"'%s' has also triggered a master_down event."
|
||||
"Aborting and disabling failover.",
|
||||
failed_master->server->unique_name,
|
||||
ptr->server->unique_name);
|
||||
monitorSetFailover(monitor, false);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
failed_master = ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
|
@ -1401,6 +1401,7 @@ monitorMain(void *arg)
|
||||
* need to be launched.
|
||||
*/
|
||||
mon_process_state_changes(mon, handle->script, handle->events);
|
||||
mon_process_failover(mon);
|
||||
|
||||
/* log master detection failure of first master becomes available after failure */
|
||||
if (root_master &&
|
||||
|
Loading…
x
Reference in New Issue
Block a user