Implement simple failover mode into mysqlmon
The mysqlmon simple failover mode allows it to direct write traffic to a secondary node. This enables a very simple failover mode with MaxScale when it is used in a two node master-slave setup.
This commit is contained in:
parent
a4aa03a1fb
commit
c919511ba7
@ -119,6 +119,51 @@ This functionality is similar to the [Multi-Master Monitor](MM-Monitor.md)
|
||||
functionality. The only difference is that the MySQL monitor will also detect
|
||||
traditional Master-Slave topologies.
|
||||
|
||||
### `failover`
|
||||
|
||||
Failover mode. This feature takes a boolean parameter is disabled by default.
|
||||
|
||||
This parameter is intended to be used with simple, two node master-slave pairs
|
||||
where the failure of the master can be resolved by "promoting" the slave as the
|
||||
new master. Normally this is done by using an external agent of some sort
|
||||
(possibly triggered by MaxScale's monitor scripts), like
|
||||
[MariaDB Replication Manager](https://github.com/tanji/replication-manager)
|
||||
or [MHA](https://code.google.com/p/mysql-master-ha/).
|
||||
|
||||
The failover mode in mysqlmon is completely passive in the sense that it does
|
||||
not modify the cluster or any servers in it. It labels a slave server as a
|
||||
master server when there is only one running server. Before a failover can be
|
||||
initiated, the following conditions must have been met:
|
||||
|
||||
- The monitor has repeatedly failed to connect to the failed servers
|
||||
- There is only one running server among the monitored servers
|
||||
- @@read_only is not enabled on the last running server
|
||||
|
||||
When these conditions are met, the monitor assigns the last remaining server the
|
||||
master status and puts all other servers into maintenance mode. This is done to
|
||||
prevent accidental use of the failed servers if they came back online.
|
||||
|
||||
When the failed servers come back up, the maintenance mode needs to be manually
|
||||
cleared once replication has been set up.
|
||||
|
||||
**Note**: A failover will cause permanent changes in the data of the promoted
|
||||
server. Only use this feature if you know that the slave servers are capable
|
||||
of acting as master servers.
|
||||
|
||||
### `failcount`
|
||||
|
||||
Number of failures that must occur on all failed servers before a failover is
|
||||
initiated. The default value is 5 failures.
|
||||
|
||||
The monitor will attemt to contact all servers once per monitoring cycle. When
|
||||
_failover_ mode is enabled, all of the failed servers must fail _failcount_
|
||||
number of connection attemps before a failover is initiated.
|
||||
|
||||
The formula for calculating the actual number of milliseconds before failover
|
||||
can start is `monitor_interval * failcount`. This means that to trigger a
|
||||
failover after 10 seconds of master failure with a _monitor_interval_ of 1000
|
||||
milliseconds, the value of _failcount_ must be 10.
|
||||
|
||||
## Example 1 - Monitor script
|
||||
|
||||
Here is an example shell script which sends an email to an admin when a server goes down.
|
||||
|
@ -185,6 +185,8 @@ static char *monitor_params[] =
|
||||
"disable_master_role_setting",
|
||||
"use_priority",
|
||||
"multimaster",
|
||||
"failover",
|
||||
"failcount",
|
||||
NULL
|
||||
};
|
||||
|
||||
|
@ -50,6 +50,8 @@
|
||||
* @endverbatim
|
||||
*/
|
||||
|
||||
#define MYSQLMON_DEFAULT_FAILCOUNT 5
|
||||
|
||||
/**
|
||||
* The handle for an instance of a MySQL Monitor module
|
||||
*/
|
||||
@ -72,6 +74,9 @@ typedef struct
|
||||
char* script; /*< Script to call when state changes occur on servers */
|
||||
bool events[MAX_MONITOR_EVENT]; /*< enabled events */
|
||||
HASHTABLE *server_info; /**< Contains server specific information */
|
||||
bool failover; /**< If simple failover is enabled */
|
||||
int failcount; /**< How many monitoring cycles servers must be
|
||||
down before failover is initiated */
|
||||
} MYSQL_MONITOR;
|
||||
|
||||
#endif
|
||||
|
@ -273,6 +273,8 @@ startMonitor(MONITOR *monitor, const CONFIG_PARAMETER* params)
|
||||
handle->script = NULL;
|
||||
handle->multimaster = false;
|
||||
handle->mysql51_replication = false;
|
||||
handle->failover = false;
|
||||
handle->failcount = MYSQLMON_DEFAULT_FAILCOUNT;
|
||||
memset(handle->events, false, sizeof(handle->events));
|
||||
spinlock_init(&handle->lock);
|
||||
}
|
||||
@ -295,6 +297,19 @@ startMonitor(MONITOR *monitor, const CONFIG_PARAMETER* params)
|
||||
{
|
||||
handle->multimaster = config_truth_value(params->value);
|
||||
}
|
||||
else if (!strcmp(params->name, "failover"))
|
||||
{
|
||||
handle->failover = config_truth_value(params->value);
|
||||
}
|
||||
else if (!strcmp(params->name, "failcount"))
|
||||
{
|
||||
handle->failcount = atoi(params->value);
|
||||
if (handle->failcount <= 0)
|
||||
{
|
||||
MXS_ERROR("[%s] Invalid value for 'failcount': %s", monitor->name, params->value);
|
||||
error = true;
|
||||
}
|
||||
}
|
||||
else if (!strcmp(params->name, "script"))
|
||||
{
|
||||
if (externcmd_can_execute(params->value))
|
||||
@ -352,6 +367,7 @@ startMonitor(MONITOR *monitor, const CONFIG_PARAMETER* params)
|
||||
hashtable_free(handle->server_info);
|
||||
MXS_FREE(handle->script);
|
||||
MXS_FREE(handle);
|
||||
handle = NULL;
|
||||
}
|
||||
else if (thread_start(&handle->thread, monitorMain, monitor) == NULL)
|
||||
{
|
||||
@ -1021,6 +1037,80 @@ void find_graph_cycles(MYSQL_MONITOR *handle, MONITOR_SERVERS *database, int nse
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Check whether failover conditions have been met
|
||||
*
|
||||
* This function checks whether all the conditions to trigger a failover have
|
||||
* been met. For a failover to happen, only one server must be available and
|
||||
* other servers must have passed the configured tolerance level of failures.
|
||||
*
|
||||
* @param handle Monitor instance
|
||||
* @param db Monitor servers
|
||||
*
|
||||
* @return True if failover is required
|
||||
*/
|
||||
bool failover_required(MYSQL_MONITOR *handle, MONITOR_SERVERS *db)
|
||||
{
|
||||
int candidates = 0;
|
||||
|
||||
while (db)
|
||||
{
|
||||
if (SERVER_IS_RUNNING(db->server))
|
||||
{
|
||||
candidates++;
|
||||
MYSQL_SERVER_INFO *server_info = hashtable_fetch(handle->server_info, db->server->unique_name);
|
||||
|
||||
if (server_info->read_only || candidates > 1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (db->mon_err_count < handle->failcount)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
db = db->next;
|
||||
}
|
||||
|
||||
return candidates == 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Initiate simple failover
|
||||
*
|
||||
* This function does the actual failover by assigning the last remaining server
|
||||
* the master status and setting all other servers into maintenance mode. By
|
||||
* setting the servers into maintenance mode, we prevent any possible conflicts
|
||||
* when the failed servers come back up.
|
||||
*
|
||||
* @param handle Monitor instance
|
||||
* @param db Monitor servers
|
||||
*/
|
||||
void do_failover(MYSQL_MONITOR *handle, MONITOR_SERVERS *db)
|
||||
{
|
||||
while (db)
|
||||
{
|
||||
if (SERVER_IS_RUNNING(db->server))
|
||||
{
|
||||
if (!SERVER_IS_MASTER(db->server))
|
||||
{
|
||||
MXS_WARNING("Failover initiated, server '%s' is now the master. "
|
||||
"All other servers are set into maintenance mode.",
|
||||
db->server->unique_name);
|
||||
}
|
||||
|
||||
monitor_set_pending_status(db, SERVER_MASTER);
|
||||
monitor_clear_pending_status(db, SERVER_SLAVE);
|
||||
}
|
||||
else
|
||||
{
|
||||
monitor_set_pending_status(db, SERVER_MAINT);
|
||||
}
|
||||
db = db->next;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The entry point for the monitoring module thread
|
||||
*
|
||||
@ -1296,6 +1386,17 @@ monitorMain(void *arg)
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
/** Now that all servers have their status correctly set, we can check
|
||||
if we need to do a failover */
|
||||
if (handle->failover)
|
||||
{
|
||||
if (failover_required(handle, mon->databases))
|
||||
{
|
||||
/** Other servers have died, initiate a failover to the last remaining server */
|
||||
do_failover(handle, mon->databases);
|
||||
}
|
||||
}
|
||||
|
||||
ptr = mon->databases;
|
||||
monitor_event_t evtype;
|
||||
while (ptr)
|
||||
|
Loading…
x
Reference in New Issue
Block a user