MXS-1446: Move failover parameters into mysqlmon

The `failover` and `failover_timeout` parameters are now declared as a
part of the mysqlmon module. Changed the implementation of the failover
function so that the dependencies on the monitor struct can be removed or
moved into parameters.
This commit is contained in:
Markus Mäkelä 2017-09-28 08:15:28 +03:00
parent ef115208e6
commit d4fd34cecd
7 changed files with 44 additions and 120 deletions

View File

@ -205,8 +205,6 @@ struct mxs_monitor
bool active; /**< True if monitor is active */
time_t journal_max_age; /**< Maximum age of journal file */
uint32_t script_timeout; /**< Timeout in seconds for the monitor scripts */
uint32_t failover_timeout; /**< Timeout in seconds for failover script */
bool failover; /**< Whether failover functionality is enabled */
int64_t last_master_up; /**< Time when the last master_up event was triggered */
int64_t last_master_down; /**< Time when the last master_down event was triggered */
struct mxs_monitor *next; /**< Next monitor in the linked list */
@ -255,8 +253,6 @@ extern const char CN_BACKEND_CONNECT_TIMEOUT[];
extern const char CN_MONITOR_INTERVAL[];
extern const char CN_JOURNAL_MAX_AGE[];
extern const char CN_SCRIPT_TIMEOUT[];
extern const char CN_FAILOVER[];
extern const char CN_FAILOVER_TIMEOUT[];
extern const char CN_SCRIPT[];
extern const char CN_EVENTS[];
@ -298,12 +294,15 @@ void mon_process_state_changes(MXS_MONITOR *monitor, const char *script, uint64_
*
* This function should be called immediately after @c mon_process_state_changes.
*
* @param monitor Monitor whose cluster is processed
* @param monitor Monitor whose cluster is processed
* @param failover_timeout Timeout in seconds for the failover
*
* @return True on success, false on error
*
* @todo Currently this only works with flat replication topologies and
* needs to be moved inside mysqlmon as it is MariaDB specific code.
*/
void mon_process_failover(MXS_MONITOR *monitor);
bool mon_process_failover(MXS_MONITOR *monitor, uint32_t failover_timeout);
/**
* @brief Hangup connections to failed servers

View File

@ -244,8 +244,6 @@ const char *config_monitor_params[] =
CN_MONITOR_INTERVAL,
CN_JOURNAL_MAX_AGE,
CN_SCRIPT_TIMEOUT,
CN_FAILOVER,
CN_FAILOVER_TIMEOUT,
CN_BACKEND_CONNECT_TIMEOUT,
CN_BACKEND_READ_TIMEOUT,
CN_BACKEND_WRITE_TIMEOUT,
@ -3189,41 +3187,6 @@ int create_new_monitor(CONFIG_CONTEXT *context, CONFIG_CONTEXT *obj, HASHTABLE*
obj->object, CN_SCRIPT_TIMEOUT, DEFAULT_SCRIPT_TIMEOUT);
}
char *failover = config_get_value(obj->parameters, CN_FAILOVER);
if (failover)
{
int val = config_truth_value(failover);
if (val != -1)
{
monitorSetFailover(monitor, val);
}
else
{
error_count++;
MXS_NOTICE("Invalid '%s' parameter for monitor '%s'",
CN_FAILOVER, obj->object);
}
}
char *failover_timeout = config_get_value(obj->parameters, CN_FAILOVER_TIMEOUT);
if (failover_timeout)
{
char *endptr;
long interval = strtol(failover_timeout, &endptr, 0);
if (*endptr == '\0' && interval > 0)
{
monitorSetFailoverTimeout(monitor, (uint32_t)interval);
}
else
{
error_count++;
MXS_NOTICE("Invalid '%s' parameter for monitor '%s'",
CN_FAILOVER_TIMEOUT, obj->object);
}
}
char *connect_timeout = config_get_value(obj->parameters, CN_BACKEND_CONNECT_TIMEOUT);
if (connect_timeout)
{

View File

@ -522,25 +522,6 @@ bool runtime_alter_monitor(MXS_MONITOR *monitor, const char *key, const char *va
monitorSetScriptTimeout(monitor, ival);
}
}
else if (strcmp(key, CN_FAILOVER_TIMEOUT) == 0)
{
long ival = get_positive_int(value);
if (ival)
{
valid = true;
monitorSetFailoverTimeout(monitor, ival);
}
}
else if (strcmp(key, CN_FAILOVER) == 0)
{
int val = config_truth_value(value);
if (val != -1)
{
valid = true;
monitorSetFailover(monitor, val);
}
}
else
{
/** We're modifying module specific parameters and we need to stop the monitor */

View File

@ -35,9 +35,6 @@ MXS_BEGIN_DECLS
/** Default script execution timeout in seconds */
#define DEFAULT_SCRIPT_TIMEOUT 90
/** Default failover script timeout */
#define DEFAULT_FAILOVER_TIMEOUT 90
/**
* Monitor network timeout types
*/
@ -77,8 +74,6 @@ void monitorSetInterval (MXS_MONITOR *, unsigned long);
bool monitorSetNetworkTimeout(MXS_MONITOR *, int, int);
void monitorSetJournalMaxAge(MXS_MONITOR *mon, time_t value);
void monitorSetScriptTimeout(MXS_MONITOR *mon, uint32_t value);
void monitorSetFailover(MXS_MONITOR *mon, bool value);
void monitorSetFailoverTimeout(MXS_MONITOR *mon, uint32_t value);
/**
* @brief Serialize a monitor to a file

View File

@ -71,8 +71,6 @@ const char CN_BACKEND_CONNECT_TIMEOUT[] = "backend_connect_timeout";
const char CN_MONITOR_INTERVAL[] = "monitor_interval";
const char CN_JOURNAL_MAX_AGE[] = "journal_max_age";
const char CN_SCRIPT_TIMEOUT[] = "script_timeout";
const char CN_FAILOVER[] = "failover";
const char CN_FAILOVER_TIMEOUT[] = "failover_timeout";
const char CN_SCRIPT[] = "script";
const char CN_EVENTS[] = "events";
@ -138,8 +136,6 @@ MXS_MONITOR* monitor_alloc(const char *name, const char *module)
mon->script_timeout = DEFAULT_SCRIPT_TIMEOUT;
mon->parameters = NULL;
mon->server_pending_changes = false;
mon->failover = false;
mon->failover_timeout = DEFAULT_FAILOVER_TIMEOUT;
spinlock_init(&mon->lock);
spinlock_acquire(&monLock);
mon->next = allMonitors;
@ -663,16 +659,6 @@ void monitorSetScriptTimeout(MXS_MONITOR *mon, uint32_t value)
mon->script_timeout = value;
}
void monitorSetFailover(MXS_MONITOR *mon, bool value)
{
mon->failover = value;
}
void monitorSetFailoverTimeout(MXS_MONITOR *mon, uint32_t value)
{
mon->failover_timeout = value;
}
/**
* Set Monitor timeouts for connect/read/write
*
@ -1560,8 +1546,6 @@ static bool create_monitor_config(const MXS_MONITOR *monitor, const char *filena
dprintf(file, "%s=%d\n", CN_BACKEND_CONNECT_ATTEMPTS, monitor->connect_attempts);
dprintf(file, "%s=%ld\n", CN_JOURNAL_MAX_AGE, monitor->journal_max_age);
dprintf(file, "%s=%d\n", CN_SCRIPT_TIMEOUT, monitor->script_timeout);
dprintf(file, "%s=%s\n", CN_FAILOVER, monitor->failover ? "true" : "false");
dprintf(file, "%s=%d\n", CN_FAILOVER_TIMEOUT, monitor->failover_timeout);
if (monitor->databases)
{
@ -1591,8 +1575,6 @@ static bool create_monitor_config(const MXS_MONITOR *monitor, const char *filena
CN_BACKEND_CONNECT_ATTEMPTS,
CN_JOURNAL_MAX_AGE,
CN_SCRIPT_TIMEOUT,
CN_FAILOVER,
CN_FAILOVER_TIMEOUT,
CN_SERVERS
};
@ -1763,8 +1745,9 @@ void mon_process_state_changes(MXS_MONITOR *monitor, const char *script, uint64_
}
}
void mon_process_failover(MXS_MONITOR *monitor)
bool mon_process_failover(MXS_MONITOR *monitor, uint32_t failover_timeout)
{
bool rval = true;
MXS_CONFIG* cnf = config_get_global_options();
MXS_MONITOR_SERVERS* failed_master = NULL;
@ -1774,18 +1757,16 @@ void mon_process_failover(MXS_MONITOR *monitor)
{
if (ptr->server->last_event == MASTER_DOWN_EVENT)
{
if (monitor->failover && !cnf->passive)
if (!cnf->passive)
{
if (failed_master)
{
MXS_ALERT("Multiple failed master servers detected: "
"'%s' is the first master to fail but server "
"'%s' has also triggered a master_down event."
"Aborting and disabling failover.",
"'%s' has also triggered a master_down event.",
failed_master->server->unique_name,
ptr->server->unique_name);
monitorSetFailover(monitor, false);
return;
return false;
}
else
{
@ -1803,21 +1784,20 @@ void mon_process_failover(MXS_MONITOR *monitor)
* since the event took place.
*/
if (monitor->failover && // Failover is enabled
!cnf->passive && // This is not a passive MaxScale
if (!cnf->passive && // This is not a passive MaxScale
ptr->server->last_event == MASTER_DOWN_EVENT && // This is a master that went down
cnf->promoted_at >= ptr->server->triggered_at && // Promoted to active after the event took place
ptr->new_event && // Event has not yet been processed
monitor->last_master_down > monitor->last_master_up) // Latest relevant event
{
int64_t timeout = SEC_TO_HB(monitor->failover_timeout);
int64_t timeout = SEC_TO_HB(failover_timeout);
int64_t t = hkheartbeat - ptr->server->triggered_at;
if (t > timeout)
{
MXS_WARNING("Failover of server '%s' did not take place within "
"%u seconds, failover needs to be re-triggered",
ptr->server->unique_name, monitor->failover_timeout);
ptr->server->unique_name, failover_timeout);
failed_master = ptr;
ptr->new_event = false;
}
@ -1837,15 +1817,13 @@ void mon_process_failover(MXS_MONITOR *monitor)
"SLAVELIST=$SLAVELIST SYNCEDLIST=$SYNCEDLIST";
if (monitor_launch_script(monitor, failed_master, failover_cmd,
monitor->failover_timeout))
failover_timeout))
{
MXS_ALERT("Failed to perform failover, disabling failover functionality. "
"To enable failover functionalty, manually set 'failover' "
"to 'true' for monitor '%s' via MaxAdmin or the REST API.",
monitor->name);
monitorSetFailover(monitor, false);
rval = false;
}
}
return rval;
}
static const char* monitor_state_to_string(int state)
@ -1883,8 +1861,6 @@ json_t* monitor_parameters_to_json(const MXS_MONITOR* monitor)
json_object_set_new(rval, CN_BACKEND_CONNECT_ATTEMPTS, json_integer(monitor->connect_attempts));
json_object_set_new(rval, CN_JOURNAL_MAX_AGE, json_integer(monitor->journal_max_age));
json_object_set_new(rval, CN_SCRIPT_TIMEOUT, json_integer(monitor->script_timeout));
json_object_set_new(rval, CN_FAILOVER, json_boolean(monitor->failover));
json_object_set_new(rval, CN_FAILOVER_TIMEOUT, json_integer(monitor->script_timeout));
/** Add custom module parameters */
const MXS_MODULE* mod = get_module(monitor->module_name, MODULE_MONITOR);

View File

@ -16,22 +16,6 @@
/**
* @file mysqlmon.h - The MySQL monitor
*
* @verbatim
* Revision History
*
* Date Who Description
* 08/07/13 Mark Riddoch Initial implementation
* 26/05/14 Massimiliano Pinto Default values for MONITOR_INTERVAL
* 28/05/14 Massimiliano Pinto Addition of new fields in MYSQL_MONITOR struct
* 24/06/14 Massimiliano Pinto Addition of master field in MYSQL_MONITOR struct and MONITOR_MAX_NUM_SLAVES
* 28/08/14 Massimiliano Pinto Addition of detectStaleMaster
* 30/10/14 Massimiliano Pinto Addition of disableMasterFailback
* 07/11/14 Massimiliano Pinto Addition of NetworkTimeout: connect, read, write
* 20/04/15 Guillaume Lefranc Addition of availableWhenDonor
* 22/04/15 Martin Brampton Addition of disableMasterRoleSetting
* 07/05/15 Markus Makela Addition of command execution on Master server failure
* @endverbatim
*/
#include <maxscale/cdefs.h>
@ -79,6 +63,8 @@ typedef struct
bool allow_cluster_recovery; /**< Allow failed servers to rejoin the cluster */
bool warn_failover; /**< Log a warning when failover happens */
bool allow_external_slaves; /**< Whether to allow usage of external slave servers */
bool failover; /**< If master failover is enabled */
uint32_t failover_timeout; /**< Timeout in seconds for the master failover */
MXS_MONITOR* monitor;
} MYSQL_MONITOR;

View File

@ -60,6 +60,12 @@ void check_maxscale_schema_replication(MXS_MONITOR *monitor);
static bool report_version_err = true;
static const char* hb_table_name = "maxscale_schema.replication_heartbeat";
static const char CN_FAILOVER[] = "failover";
static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout";
/** Default failover timeout */
#define DEFAULT_FAILOVER_TIMEOUT "90"
/**
* The module entry point routine. It is this routine that
* must populate the structure that is referred to as the
@ -116,6 +122,8 @@ MXS_MODULE* MXS_CREATE_MODULE()
MXS_MODULE_OPT_NONE,
mxs_monitor_event_enum_values
},
{CN_FAILOVER, MXS_MODULE_PARAM_BOOL, "false"},
{CN_FAILOVER_TIMEOUT, MXS_MODULE_PARAM_COUNT, DEFAULT_FAILOVER_TIMEOUT},
{MXS_END_MODULE_PARAMS}
}
};
@ -262,6 +270,8 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
handle->script = config_copy_string(params, "script");
handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
handle->allow_external_slaves = config_get_bool(params, "allow_external_slaves");
handle->failover = config_get_bool(params, CN_FAILOVER);
handle->failover_timeout = config_get_integer(params, CN_FAILOVER_TIMEOUT);
bool error = false;
@ -319,6 +329,8 @@ static void diagnostics(DCB *dcb, const MXS_MONITOR *mon)
{
const MYSQL_MONITOR *handle = (const MYSQL_MONITOR *)mon->handle;
dcb_printf(dcb, "Failover:\t%s\n", handle->failover ? "Enabled" : "Disabled");
dcb_printf(dcb, "Failover Timeout:\t%u\n", handle->failover_timeout);
dcb_printf(dcb, "MaxScale MonitorId:\t%lu\n", handle->id);
dcb_printf(dcb, "Replication lag:\t%s\n", (handle->replicationHeartbeat == 1) ? "enabled" : "disabled");
dcb_printf(dcb, "Detect Stale Master:\t%s\n", (handle->detectStaleMaster == 1) ? "enabled" : "disabled");
@ -365,6 +377,8 @@ static json_t* diagnostics_json(const MXS_MONITOR *mon)
json_object_set_new(rval, "failcount", json_integer(handle->failcount));
json_object_set_new(rval, "allow_cluster_recovery", json_boolean(handle->allow_cluster_recovery));
json_object_set_new(rval, "mysql51_replication", json_boolean(handle->mysql51_replication));
json_object_set_new(rval, CN_FAILOVER, json_boolean(handle->failover));
json_object_set_new(rval, CN_FAILOVER_TIMEOUT, json_integer(handle->failover_timeout));
if (handle->script)
{
@ -1401,7 +1415,17 @@ monitorMain(void *arg)
* need to be launched.
*/
mon_process_state_changes(mon, handle->script, handle->events);
mon_process_failover(mon);
if (handle->failover)
{
if (!mon_process_failover(mon, handle->failover_timeout))
{
MXS_ALERT("Failed to perform failover, disabling failover functionality. "
"To enable failover functionality, manually set 'failover' to "
"'true' for monitor '%s' via MaxAdmin or the REST API.", mon->name);
handle->failover = false;
}
}
/* log master detection failure of first master becomes available after failure */
if (root_master &&