MXS-1446: Make failover_timeout configurable

The time that MaxScale waits for a failover is now configurable.
This commit is contained in:
Markus Mäkelä 2017-09-26 14:13:24 +03:00
parent 4c6e7a0dbc
commit 316f792242
7 changed files with 61 additions and 2 deletions

View File

@ -115,6 +115,19 @@ If the script execution exceeds the configured timeout, it is stopped by sending
a SIGTERM signal to it. If the process does not stop, a SIGKILL signal will be
sent to it once the execution time is greater than twice the configured timeout.
### `failover_timeout`
The timeout for the cluster failover in seconds. The default value is 90
seconds.
If no successful failover takes place within the configured time period, a
message is logged and the failover functionality is disabled.
This parameter also controls how long a MaxScale instance that has transitioned
from passive to active will wait for a failover to take place after an apparent
loss of a master server. If no new master server is detected within the
configured time period, the failover will be initiated again.
### `events`
A list of event names which cause the script to be executed. If this option is not defined, all events cause the script to be executed. The list must contain a comma separated list of event names.

View File

@ -205,7 +205,7 @@ struct mxs_monitor
bool active; /**< True if monitor is active */
time_t journal_max_age; /**< Maximum age of journal file */
uint32_t script_timeout; /**< Timeout in seconds for the monitor scripts */
int32_t failover_timeout; /**< Timeout in seconds for failover script */
uint32_t failover_timeout; /**< Timeout in seconds for failover script */
int64_t last_master_up; /**< Time when the last master_up event was triggered */
int64_t last_master_down; /**< Time when the last master_down event was triggered */
struct mxs_monitor *next; /**< Next monitor in the linked list */
@ -254,6 +254,7 @@ extern const char CN_BACKEND_CONNECT_TIMEOUT[];
extern const char CN_MONITOR_INTERVAL[];
extern const char CN_JOURNAL_MAX_AGE[];
extern const char CN_SCRIPT_TIMEOUT[];
extern const char CN_FAILOVER_TIMEOUT[];
extern const char CN_SCRIPT[];
extern const char CN_EVENTS[];

View File

@ -244,6 +244,7 @@ const char *config_monitor_params[] =
CN_MONITOR_INTERVAL,
CN_JOURNAL_MAX_AGE,
CN_SCRIPT_TIMEOUT,
CN_FAILOVER_TIMEOUT,
CN_BACKEND_CONNECT_TIMEOUT,
CN_BACKEND_READ_TIMEOUT,
CN_BACKEND_WRITE_TIMEOUT,
@ -3187,6 +3188,24 @@ int create_new_monitor(CONFIG_CONTEXT *context, CONFIG_CONTEXT *obj, HASHTABLE*
obj->object, CN_SCRIPT_TIMEOUT, DEFAULT_SCRIPT_TIMEOUT);
}
char *failover_timeout = config_get_value(obj->parameters, CN_FAILOVER_TIMEOUT);
if (failover_timeout)
{
char *endptr;
long interval = strtol(failover_timeout, &endptr, 0);
if (*endptr == '\0' && interval > 0)
{
monitorSetFailoverTimeout(monitor, (uint32_t)interval);
}
else
{
error_count++;
MXS_NOTICE("Invalid '%s' parameter for monitor '%s'",
CN_FAILOVER_TIMEOUT, obj->object);
}
}
char *connect_timeout = config_get_value(obj->parameters, CN_BACKEND_CONNECT_TIMEOUT);
if (connect_timeout)
{

View File

@ -522,6 +522,15 @@ bool runtime_alter_monitor(MXS_MONITOR *monitor, const char *key, const char *va
monitorSetScriptTimeout(monitor, ival);
}
}
else if (strcmp(key, CN_FAILOVER_TIMEOUT) == 0)
{
long ival = get_positive_int(value);
if (ival)
{
valid = true;
monitorSetFailoverTimeout(monitor, ival);
}
}
else
{
/** We're modifying module specific parameters and we need to stop the monitor */

View File

@ -77,6 +77,7 @@ void monitorSetInterval (MXS_MONITOR *, unsigned long);
bool monitorSetNetworkTimeout(MXS_MONITOR *, int, int);
void monitorSetJournalMaxAge(MXS_MONITOR *mon, time_t value);
void monitorSetScriptTimeout(MXS_MONITOR *mon, uint32_t value);
void monitorSetFailoverTimeout(MXS_MONITOR *mon, uint32_t value);
/**
* @brief Serialize a monitor to a file

View File

@ -71,6 +71,7 @@ const char CN_BACKEND_CONNECT_TIMEOUT[] = "backend_connect_timeout";
const char CN_MONITOR_INTERVAL[] = "monitor_interval";
const char CN_JOURNAL_MAX_AGE[] = "journal_max_age";
const char CN_SCRIPT_TIMEOUT[] = "script_timeout";
const char CN_FAILOVER_TIMEOUT[] = "failover_timeout";
const char CN_SCRIPT[] = "script";
const char CN_EVENTS[] = "events";
@ -666,6 +667,17 @@ void monitorSetScriptTimeout(MXS_MONITOR *mon, uint32_t value)
mon->script_timeout = value;
}
/**
* Set the maximum age of the monitor journal
*
* @param mon The monitor instance
* @param interval The journal age in seconds
*/
void monitorSetFailoverTimeout(MXS_MONITOR *mon, uint32_t value)
{
mon->failover_timeout = value;
}
/**
* Set Monitor timeouts for connect/read/write
*
@ -1546,6 +1558,7 @@ static bool create_monitor_config(const MXS_MONITOR *monitor, const char *filena
dprintf(file, "%s=%d\n", CN_BACKEND_CONNECT_ATTEMPTS, monitor->connect_attempts);
dprintf(file, "%s=%ld\n", CN_JOURNAL_MAX_AGE, monitor->journal_max_age);
dprintf(file, "%s=%d\n", CN_SCRIPT_TIMEOUT, monitor->script_timeout);
dprintf(file, "%s=%d\n", CN_FAILOVER_TIMEOUT, monitor->failover_timeout);
if (monitor->databases)
{
@ -1575,6 +1588,7 @@ static bool create_monitor_config(const MXS_MONITOR *monitor, const char *filena
CN_BACKEND_CONNECT_ATTEMPTS,
CN_JOURNAL_MAX_AGE,
CN_SCRIPT_TIMEOUT,
CN_FAILOVER_TIMEOUT,
CN_SERVERS
};
@ -1811,6 +1825,7 @@ json_t* monitor_parameters_to_json(const MXS_MONITOR* monitor)
json_object_set_new(rval, CN_BACKEND_CONNECT_ATTEMPTS, json_integer(monitor->connect_attempts));
json_object_set_new(rval, CN_JOURNAL_MAX_AGE, json_integer(monitor->journal_max_age));
json_object_set_new(rval, CN_SCRIPT_TIMEOUT, json_integer(monitor->script_timeout));
json_object_set_new(rval, CN_FAILOVER_TIMEOUT, json_integer(monitor->script_timeout));
/** Add custom module parameters */
const MXS_MODULE* mod = get_module(monitor->module_name, MODULE_MONITOR);

View File

@ -1587,12 +1587,13 @@ struct subcommand alteroptions[] =
"user Username used when connecting to servers\n"
"password Password used when connecting to servers\n"
"monitor_interval Monitoring interval in milliseconds\n"
"backend_connect_timeout Server coneection timeout in seconds\n"
"backend_connect_timeout Server connection timeout in seconds\n"
"backend_write_timeout Server write timeout in seconds\n"
"backend_read_timeout Server read timeout in seconds\n"
"backend_connect_attempts Number of re-connection attempts\n"
"journal_max_age Maximum age of server state journal\n"
"script_timeout Timeout in seconds for monitor scripts\n"
"failover_timeout Failover timeout in seconds\n"
"\n"
"This will alter an existing parameter of a monitor. To remove parameters,\n"
"pass an empty value for a key e.g. 'maxadmin alter monitor my-monitor my-key='\n"