Added 'disable_master_failback' monitor option

Added 'disable_master_failback' option in Galera monitor
This commit is contained in:
MassimilianoPinto
2014-10-30 19:03:07 +01:00
parent 9967a45379
commit 6f22975e6c
7 changed files with 64 additions and 14 deletions

View File

@ -38,6 +38,7 @@
* 09/09/14 Massimiliano Pinto Added localhost_match_wildcard_host parameter * 09/09/14 Massimiliano Pinto Added localhost_match_wildcard_host parameter
* 12/09/14 Mark Riddoch Addition of checks on servers list and * 12/09/14 Mark Riddoch Addition of checks on servers list and
* internal router suppression of messages * internal router suppression of messages
* 30/10/14 Massimiliano Pinto Added disable_master_failback parameter
* *
* @endverbatim * @endverbatim
*/ */
@ -765,6 +766,7 @@ int error_count = 0;
unsigned long interval = 0; unsigned long interval = 0;
int replication_heartbeat = 0; int replication_heartbeat = 0;
int detect_stale_master = 0; int detect_stale_master = 0;
int disable_master_failback = 0;
module = config_get_value(obj->parameters, "module"); module = config_get_value(obj->parameters, "module");
servers = config_get_value(obj->parameters, "servers"); servers = config_get_value(obj->parameters, "servers");
@ -782,6 +784,10 @@ int error_count = 0;
detect_stale_master = atoi(config_get_value(obj->parameters, "detect_stale_master")); detect_stale_master = atoi(config_get_value(obj->parameters, "detect_stale_master"));
} }
if (config_get_value(obj->parameters, "disable_master_failback")) {
disable_master_failback = atoi(config_get_value(obj->parameters, "disable_master_failback"));
}
if (module) if (module)
{ {
obj->element = monitor_alloc(obj->object, module); obj->element = monitor_alloc(obj->object, module);
@ -809,6 +815,10 @@ int error_count = 0;
if(detect_stale_master == 1) if(detect_stale_master == 1)
monitorDetectStaleMaster(obj->element, detect_stale_master); monitorDetectStaleMaster(obj->element, detect_stale_master);
/* disable master failback */
if(disable_master_failback == 1)
monitorDisableMasterFailback(obj->element, disable_master_failback);
/* get the servers to monitor */ /* get the servers to monitor */
s = strtok(servers, ","); s = strtok(servers, ",");
while (s) while (s)
@ -1612,6 +1622,7 @@ static char *monitor_params[] =
"monitor_interval", "monitor_interval",
"detect_replication_lag", "detect_replication_lag",
"detect_stale_master", "detect_stale_master",
"disable_master_failback",
NULL NULL
}; };
/** /**

View File

@ -26,6 +26,7 @@
* 08/07/13 Mark Riddoch Initial implementation * 08/07/13 Mark Riddoch Initial implementation
* 23/05/14 Massimiliano Pinto Addition of monitor_interval parameter * 23/05/14 Massimiliano Pinto Addition of monitor_interval parameter
* and monitor id * and monitor id
* 30/10/14 Massimiliano Pinto Addition of disable_master_failback parameter
* *
* @endverbatim * @endverbatim
*/ */
@ -329,3 +330,17 @@ monitorDetectStaleMaster(MONITOR *mon, int enable)
mon->module->detectStaleMaster(mon->handle, enable); mon->module->detectStaleMaster(mon->handle, enable);
} }
} }
/**
* Disable Master Failback
*
* @param mon The monitor instance
* @param disable The value 1 disable the failback, 0 keeps it
*/
void
monitorDisableMasterFailback(MONITOR *mon, int disable)
{
if (mon->module->disableMasterFailback != NULL) {
mon->module->disableMasterFailback(mon->handle, disable);
}
}

View File

@ -33,6 +33,7 @@
* 23/05/14 Massimiliano Pinto Addition of defaultId and setInterval * 23/05/14 Massimiliano Pinto Addition of defaultId and setInterval
* 23/06/14 Massimiliano Pinto Addition of replicationHeartbeat * 23/06/14 Massimiliano Pinto Addition of replicationHeartbeat
* 28/08/14 Massimiliano Pinto Addition of detectStaleMaster * 28/08/14 Massimiliano Pinto Addition of detectStaleMaster
* 30/10/14 Massimiliano Pinto Addition of disableMasterFailback
* *
* @endverbatim * @endverbatim
*/ */
@ -73,6 +74,7 @@ typedef struct {
void (*defaultId)(void *, unsigned long); void (*defaultId)(void *, unsigned long);
void (*replicationHeartbeat)(void *, int); void (*replicationHeartbeat)(void *, int);
void (*detectStaleMaster)(void *, int); void (*detectStaleMaster)(void *, int);
void (*disableMasterFailback)(void *, int);
} MONITOR_OBJECT; } MONITOR_OBJECT;
/** /**
@ -123,4 +125,5 @@ extern void monitorSetId(MONITOR *, unsigned long);
extern void monitorSetInterval (MONITOR *, unsigned long); extern void monitorSetInterval (MONITOR *, unsigned long);
extern void monitorSetReplicationHeartbeat(MONITOR *, int); extern void monitorSetReplicationHeartbeat(MONITOR *, int);
extern void monitorDetectStaleMaster(MONITOR *, int); extern void monitorDetectStaleMaster(MONITOR *, int);
extern void monitorDisableMasterFailback(MONITOR *, int);
#endif #endif

View File

@ -30,7 +30,7 @@
* Interval is printed in diagnostics. * Interval is printed in diagnostics.
* 03/06/14 Mark Riddoch Add support for maintenance mode * 03/06/14 Mark Riddoch Add support for maintenance mode
* 24/06/14 Massimiliano Pinto Added depth level 0 for each node * 24/06/14 Massimiliano Pinto Added depth level 0 for each node
* 30/10/14 Massimiliano Pinto Added stickininess for master selection * 30/10/14 Massimiliano Pinto Added disableMasterFailback feature
* *
* @endverbatim * @endverbatim
*/ */
@ -53,7 +53,7 @@ extern int lm_enabled_logfiles_bitmask;
static void monitorMain(void *); static void monitorMain(void *);
static char *version_str = "V1.2.0"; static char *version_str = "V1.3.0";
MODULE_INFO info = { MODULE_INFO info = {
MODULE_API_MONITOR, MODULE_API_MONITOR,
@ -71,6 +71,7 @@ static void diagnostics(DCB *, void *);
static void setInterval(void *, size_t); static void setInterval(void *, size_t);
static MONITOR_SERVERS *get_candidate_master(MONITOR_SERVERS *); static MONITOR_SERVERS *get_candidate_master(MONITOR_SERVERS *);
static MONITOR_SERVERS *set_cluster_master(MONITOR_SERVERS *, MONITOR_SERVERS *, int); static MONITOR_SERVERS *set_cluster_master(MONITOR_SERVERS *, MONITOR_SERVERS *, int);
static void disableMasterFailback(void *, int);
static MONITOR_OBJECT MyObject = { static MONITOR_OBJECT MyObject = {
startMonitor, startMonitor,
@ -83,6 +84,7 @@ static MONITOR_OBJECT MyObject = {
NULL, NULL,
NULL, NULL,
NULL, NULL,
disableMasterFailback
}; };
/** /**
@ -150,6 +152,7 @@ MYSQL_MONITOR *handle;
handle->defaultPasswd = NULL; handle->defaultPasswd = NULL;
handle->id = MONITOR_DEFAULT_ID; handle->id = MONITOR_DEFAULT_ID;
handle->interval = MONITOR_INTERVAL; handle->interval = MONITOR_INTERVAL;
handle->disableMasterFailback = 0;
handle->master = NULL; handle->master = NULL;
spinlock_init(&handle->lock); spinlock_init(&handle->lock);
} }
@ -438,8 +441,8 @@ monitorMain(void *arg)
MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg; MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg;
MONITOR_SERVERS *ptr; MONITOR_SERVERS *ptr;
size_t nrounds = 0; size_t nrounds = 0;
MONITOR_SERVERS *candidate_master=NULL; MONITOR_SERVERS *candidate_master = NULL;
int master_stickiness=1; int master_stickiness = handle->disableMasterFailback;
if (mysql_thread_init()) if (mysql_thread_init())
{ {
@ -486,7 +489,6 @@ int master_stickiness=1;
/* clear bits for non member nodes */ /* clear bits for non member nodes */
if ( ! SERVER_IN_MAINT(ptr->server) && (ptr->server->node_id < 0 || ! SERVER_IS_JOINED(ptr->server))) { if ( ! SERVER_IN_MAINT(ptr->server) && (ptr->server->node_id < 0 || ! SERVER_IS_JOINED(ptr->server))) {
ptr->server->depth = -1; ptr->server->depth = -1;
/* clear M/S status */ /* clear M/S status */
@ -525,7 +527,6 @@ int master_stickiness=1;
/* Select the master, based on master_stickiness */ /* Select the master, based on master_stickiness */
handle->master = set_cluster_master(handle->master, candidate_master, master_stickiness); handle->master = set_cluster_master(handle->master, candidate_master, master_stickiness);
ptr = handle->databases; ptr = handle->databases;
while (ptr && handle->master) { while (ptr && handle->master) {
@ -573,16 +574,14 @@ MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg;
memcpy(&handle->interval, &interval, sizeof(unsigned long)); memcpy(&handle->interval, &interval, sizeof(unsigned long));
} }
/** /**
* get candidate master from all nodes * get candidate master from all nodes
* *
* current available rule: get the server with MIN(node_id) * The current available rule: get the server with MIN(node_id)
* node_id comes from 'wsrep_local_index' variable * node_id comes from 'wsrep_local_index' variable
* *
* @param servers The monitored servers list * @param servers The monitored servers list
* @return The candidate master on success, NULL on failure * @return The candidate master on success, NULL on failure
*
*/ */
static MONITOR_SERVERS *get_candidate_master(MONITOR_SERVERS *servers) { static MONITOR_SERVERS *get_candidate_master(MONITOR_SERVERS *servers) {
MONITOR_SERVERS *ptr = servers; MONITOR_SERVERS *ptr = servers;
@ -618,7 +617,7 @@ static MONITOR_SERVERS *get_candidate_master(MONITOR_SERVERS *servers) {
* The selection is based on the configuration option mapped to master_stickiness * The selection is based on the configuration option mapped to master_stickiness
* The candidate master may change over time due to * The candidate master may change over time due to
* 'wsrep_local_index' value change in the Galera Cluster * 'wsrep_local_index' value change in the Galera Cluster
* Enablig master_stickiness will avoid master change unless a failure is spotted * Enabling master_stickiness will avoid master change unless a failure is spotted
* *
* @param current_master Previous master server * @param current_master Previous master server
* @param candidate_master The candidate master server accordingly to the selection rule * @param candidate_master The candidate master server accordingly to the selection rule
@ -642,3 +641,22 @@ static MONITOR_SERVERS *set_cluster_master(MONITOR_SERVERS *current_master, MONI
return candidate_master; return candidate_master;
} }
} }
/**
* Disable/Enable the Master failback in a Galera Cluster.
*
* A restarted / rejoined node may get back the previous 'wsrep_local_index'
* from Cluster: if the value is the lowest in the cluster it will be selected as Master
* This will cause a Master change even if there is no failure.
* The option if set to 1 will avoid this situation, keeping the current Master (if running) available
*
* @param arg The handle allocated by startMonitor
* @param disable To disable it use 1, 0 keeps failback
*/
static void
disableMasterFailback(void *arg, int disable)
{
MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg;
memcpy(&handle->disableMasterFailback, &disable, sizeof(int));
}

View File

@ -105,7 +105,8 @@ static MONITOR_OBJECT MyObject = {
setInterval, setInterval,
defaultId, defaultId,
replicationHeartbeat, replicationHeartbeat,
detectStaleMaster detectStaleMaster,
NULL
}; };
/** /**

View File

@ -65,6 +65,7 @@ typedef struct {
unsigned long id; /**< Monitor ID */ unsigned long id; /**< Monitor ID */
int replicationHeartbeat; /**< Monitor flag for MySQL replication heartbeat */ int replicationHeartbeat; /**< Monitor flag for MySQL replication heartbeat */
int detectStaleMaster; /**< Monitor flag for MySQL replication Stale Master detection */ int detectStaleMaster; /**< Monitor flag for MySQL replication Stale Master detection */
int disableMasterFailback; /**< Monitor flag for Galera Cluster Master failback */
MONITOR_SERVERS *master; /**< Master server for MySQL Master/Slave replication */ MONITOR_SERVERS *master; /**< Master server for MySQL Master/Slave replication */
MONITOR_SERVERS *databases; /**< Linked list of servers to monitor */ MONITOR_SERVERS *databases; /**< Linked list of servers to monitor */
} MYSQL_MONITOR; } MYSQL_MONITOR;

View File

@ -73,6 +73,7 @@ static MONITOR_OBJECT MyObject = {
setInterval, setInterval,
NULL, NULL,
NULL, NULL,
NULL,
NULL NULL
}; };