Implement simple failover mode into mysqlmon
The mysqlmon simple failover mode allows it to direct write traffic to a secondary node. This enables a very simple failover mode with MaxScale when it is used in a two node master-slave setup.
This commit is contained in:
@ -119,6 +119,51 @@ This functionality is similar to the [Multi-Master Monitor](MM-Monitor.md)
|
|||||||
functionality. The only difference is that the MySQL monitor will also detect
|
functionality. The only difference is that the MySQL monitor will also detect
|
||||||
traditional Master-Slave topologies.
|
traditional Master-Slave topologies.
|
||||||
|
|
||||||
|
### `failover`
|
||||||
|
|
||||||
|
Failover mode. This feature takes a boolean parameter is disabled by default.
|
||||||
|
|
||||||
|
This parameter is intended to be used with simple, two node master-slave pairs
|
||||||
|
where the failure of the master can be resolved by "promoting" the slave as the
|
||||||
|
new master. Normally this is done by using an external agent of some sort
|
||||||
|
(possibly triggered by MaxScale's monitor scripts), like
|
||||||
|
[MariaDB Replication Manager](https://github.com/tanji/replication-manager)
|
||||||
|
or [MHA](https://code.google.com/p/mysql-master-ha/).
|
||||||
|
|
||||||
|
The failover mode in mysqlmon is completely passive in the sense that it does
|
||||||
|
not modify the cluster or any servers in it. It labels a slave server as a
|
||||||
|
master server when there is only one running server. Before a failover can be
|
||||||
|
initiated, the following conditions must have been met:
|
||||||
|
|
||||||
|
- The monitor has repeatedly failed to connect to the failed servers
|
||||||
|
- There is only one running server among the monitored servers
|
||||||
|
- @@read_only is not enabled on the last running server
|
||||||
|
|
||||||
|
When these conditions are met, the monitor assigns the last remaining server the
|
||||||
|
master status and puts all other servers into maintenance mode. This is done to
|
||||||
|
prevent accidental use of the failed servers if they came back online.
|
||||||
|
|
||||||
|
When the failed servers come back up, the maintenance mode needs to be manually
|
||||||
|
cleared once replication has been set up.
|
||||||
|
|
||||||
|
**Note**: A failover will cause permanent changes in the data of the promoted
|
||||||
|
server. Only use this feature if you know that the slave servers are capable
|
||||||
|
of acting as master servers.
|
||||||
|
|
||||||
|
### `failcount`
|
||||||
|
|
||||||
|
Number of failures that must occur on all failed servers before a failover is
|
||||||
|
initiated. The default value is 5 failures.
|
||||||
|
|
||||||
|
The monitor will attemt to contact all servers once per monitoring cycle. When
|
||||||
|
_failover_ mode is enabled, all of the failed servers must fail _failcount_
|
||||||
|
number of connection attemps before a failover is initiated.
|
||||||
|
|
||||||
|
The formula for calculating the actual number of milliseconds before failover
|
||||||
|
can start is `monitor_interval * failcount`. This means that to trigger a
|
||||||
|
failover after 10 seconds of master failure with a _monitor_interval_ of 1000
|
||||||
|
milliseconds, the value of _failcount_ must be 10.
|
||||||
|
|
||||||
## Example 1 - Monitor script
|
## Example 1 - Monitor script
|
||||||
|
|
||||||
Here is an example shell script which sends an email to an admin when a server goes down.
|
Here is an example shell script which sends an email to an admin when a server goes down.
|
||||||
|
@ -185,6 +185,8 @@ static char *monitor_params[] =
|
|||||||
"disable_master_role_setting",
|
"disable_master_role_setting",
|
||||||
"use_priority",
|
"use_priority",
|
||||||
"multimaster",
|
"multimaster",
|
||||||
|
"failover",
|
||||||
|
"failcount",
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -50,6 +50,8 @@
|
|||||||
* @endverbatim
|
* @endverbatim
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#define MYSQLMON_DEFAULT_FAILCOUNT 5
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The handle for an instance of a MySQL Monitor module
|
* The handle for an instance of a MySQL Monitor module
|
||||||
*/
|
*/
|
||||||
@ -72,6 +74,9 @@ typedef struct
|
|||||||
char* script; /*< Script to call when state changes occur on servers */
|
char* script; /*< Script to call when state changes occur on servers */
|
||||||
bool events[MAX_MONITOR_EVENT]; /*< enabled events */
|
bool events[MAX_MONITOR_EVENT]; /*< enabled events */
|
||||||
HASHTABLE *server_info; /**< Contains server specific information */
|
HASHTABLE *server_info; /**< Contains server specific information */
|
||||||
|
bool failover; /**< If simple failover is enabled */
|
||||||
|
int failcount; /**< How many monitoring cycles servers must be
|
||||||
|
down before failover is initiated */
|
||||||
} MYSQL_MONITOR;
|
} MYSQL_MONITOR;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -273,6 +273,8 @@ startMonitor(MONITOR *monitor, const CONFIG_PARAMETER* params)
|
|||||||
handle->script = NULL;
|
handle->script = NULL;
|
||||||
handle->multimaster = false;
|
handle->multimaster = false;
|
||||||
handle->mysql51_replication = false;
|
handle->mysql51_replication = false;
|
||||||
|
handle->failover = false;
|
||||||
|
handle->failcount = MYSQLMON_DEFAULT_FAILCOUNT;
|
||||||
memset(handle->events, false, sizeof(handle->events));
|
memset(handle->events, false, sizeof(handle->events));
|
||||||
spinlock_init(&handle->lock);
|
spinlock_init(&handle->lock);
|
||||||
}
|
}
|
||||||
@ -295,6 +297,19 @@ startMonitor(MONITOR *monitor, const CONFIG_PARAMETER* params)
|
|||||||
{
|
{
|
||||||
handle->multimaster = config_truth_value(params->value);
|
handle->multimaster = config_truth_value(params->value);
|
||||||
}
|
}
|
||||||
|
else if (!strcmp(params->name, "failover"))
|
||||||
|
{
|
||||||
|
handle->failover = config_truth_value(params->value);
|
||||||
|
}
|
||||||
|
else if (!strcmp(params->name, "failcount"))
|
||||||
|
{
|
||||||
|
handle->failcount = atoi(params->value);
|
||||||
|
if (handle->failcount <= 0)
|
||||||
|
{
|
||||||
|
MXS_ERROR("[%s] Invalid value for 'failcount': %s", monitor->name, params->value);
|
||||||
|
error = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
else if (!strcmp(params->name, "script"))
|
else if (!strcmp(params->name, "script"))
|
||||||
{
|
{
|
||||||
if (externcmd_can_execute(params->value))
|
if (externcmd_can_execute(params->value))
|
||||||
@ -352,6 +367,7 @@ startMonitor(MONITOR *monitor, const CONFIG_PARAMETER* params)
|
|||||||
hashtable_free(handle->server_info);
|
hashtable_free(handle->server_info);
|
||||||
MXS_FREE(handle->script);
|
MXS_FREE(handle->script);
|
||||||
MXS_FREE(handle);
|
MXS_FREE(handle);
|
||||||
|
handle = NULL;
|
||||||
}
|
}
|
||||||
else if (thread_start(&handle->thread, monitorMain, monitor) == NULL)
|
else if (thread_start(&handle->thread, monitorMain, monitor) == NULL)
|
||||||
{
|
{
|
||||||
@ -1021,6 +1037,80 @@ void find_graph_cycles(MYSQL_MONITOR *handle, MONITOR_SERVERS *database, int nse
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Check whether failover conditions have been met
|
||||||
|
*
|
||||||
|
* This function checks whether all the conditions to trigger a failover have
|
||||||
|
* been met. For a failover to happen, only one server must be available and
|
||||||
|
* other servers must have passed the configured tolerance level of failures.
|
||||||
|
*
|
||||||
|
* @param handle Monitor instance
|
||||||
|
* @param db Monitor servers
|
||||||
|
*
|
||||||
|
* @return True if failover is required
|
||||||
|
*/
|
||||||
|
bool failover_required(MYSQL_MONITOR *handle, MONITOR_SERVERS *db)
|
||||||
|
{
|
||||||
|
int candidates = 0;
|
||||||
|
|
||||||
|
while (db)
|
||||||
|
{
|
||||||
|
if (SERVER_IS_RUNNING(db->server))
|
||||||
|
{
|
||||||
|
candidates++;
|
||||||
|
MYSQL_SERVER_INFO *server_info = hashtable_fetch(handle->server_info, db->server->unique_name);
|
||||||
|
|
||||||
|
if (server_info->read_only || candidates > 1)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (db->mon_err_count < handle->failcount)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
db = db->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
return candidates == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initiate simple failover
|
||||||
|
*
|
||||||
|
* This function does the actual failover by assigning the last remaining server
|
||||||
|
* the master status and setting all other servers into maintenance mode. By
|
||||||
|
* setting the servers into maintenance mode, we prevent any possible conflicts
|
||||||
|
* when the failed servers come back up.
|
||||||
|
*
|
||||||
|
* @param handle Monitor instance
|
||||||
|
* @param db Monitor servers
|
||||||
|
*/
|
||||||
|
void do_failover(MYSQL_MONITOR *handle, MONITOR_SERVERS *db)
|
||||||
|
{
|
||||||
|
while (db)
|
||||||
|
{
|
||||||
|
if (SERVER_IS_RUNNING(db->server))
|
||||||
|
{
|
||||||
|
if (!SERVER_IS_MASTER(db->server))
|
||||||
|
{
|
||||||
|
MXS_WARNING("Failover initiated, server '%s' is now the master. "
|
||||||
|
"All other servers are set into maintenance mode.",
|
||||||
|
db->server->unique_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
monitor_set_pending_status(db, SERVER_MASTER);
|
||||||
|
monitor_clear_pending_status(db, SERVER_SLAVE);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
monitor_set_pending_status(db, SERVER_MAINT);
|
||||||
|
}
|
||||||
|
db = db->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The entry point for the monitoring module thread
|
* The entry point for the monitoring module thread
|
||||||
*
|
*
|
||||||
@ -1296,6 +1386,17 @@ monitorMain(void *arg)
|
|||||||
ptr = ptr->next;
|
ptr = ptr->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Now that all servers have their status correctly set, we can check
|
||||||
|
if we need to do a failover */
|
||||||
|
if (handle->failover)
|
||||||
|
{
|
||||||
|
if (failover_required(handle, mon->databases))
|
||||||
|
{
|
||||||
|
/** Other servers have died, initiate a failover to the last remaining server */
|
||||||
|
do_failover(handle, mon->databases);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ptr = mon->databases;
|
ptr = mon->databases;
|
||||||
monitor_event_t evtype;
|
monitor_event_t evtype;
|
||||||
while (ptr)
|
while (ptr)
|
||||||
|
Reference in New Issue
Block a user