MXS-1446: Make failover_timeout configurable

The time that MaxScale waits for a failover is now configurable.
This commit is contained in:
Markus Mäkelä
2017-09-26 14:13:24 +03:00
parent 4c6e7a0dbc
commit 316f792242
7 changed files with 61 additions and 2 deletions

View File

@ -115,6 +115,19 @@ If the script execution exceeds the configured timeout, it is stopped by sending
a SIGTERM signal to it. If the process does not stop, a SIGKILL signal will be a SIGTERM signal to it. If the process does not stop, a SIGKILL signal will be
sent to it once the execution time is greater than twice the configured timeout. sent to it once the execution time is greater than twice the configured timeout.
### `failover_timeout`
The timeout for the cluster failover in seconds. The default value is 90
seconds.
If no successful failover takes place within the configured time period, a
message is logged and the failover functionality is disabled.
This parameter also controls how long a MaxScale instance that has transitioned
from passive to active will wait for a failover to take place after an apparent
loss of a master server. If no new master server is detected within the
configured time period, the failover will be initiated again.
### `events` ### `events`
A list of event names which cause the script to be executed. If this option is not defined, all events cause the script to be executed. The list must contain a comma separated list of event names. A list of event names which cause the script to be executed. If this option is not defined, all events cause the script to be executed. The list must contain a comma separated list of event names.

View File

@ -205,7 +205,7 @@ struct mxs_monitor
bool active; /**< True if monitor is active */ bool active; /**< True if monitor is active */
time_t journal_max_age; /**< Maximum age of journal file */ time_t journal_max_age; /**< Maximum age of journal file */
uint32_t script_timeout; /**< Timeout in seconds for the monitor scripts */ uint32_t script_timeout; /**< Timeout in seconds for the monitor scripts */
int32_t failover_timeout; /**< Timeout in seconds for failover script */ uint32_t failover_timeout; /**< Timeout in seconds for failover script */
int64_t last_master_up; /**< Time when the last master_up event was triggered */ int64_t last_master_up; /**< Time when the last master_up event was triggered */
int64_t last_master_down; /**< Time when the last master_down event was triggered */ int64_t last_master_down; /**< Time when the last master_down event was triggered */
struct mxs_monitor *next; /**< Next monitor in the linked list */ struct mxs_monitor *next; /**< Next monitor in the linked list */
@ -254,6 +254,7 @@ extern const char CN_BACKEND_CONNECT_TIMEOUT[];
extern const char CN_MONITOR_INTERVAL[]; extern const char CN_MONITOR_INTERVAL[];
extern const char CN_JOURNAL_MAX_AGE[]; extern const char CN_JOURNAL_MAX_AGE[];
extern const char CN_SCRIPT_TIMEOUT[]; extern const char CN_SCRIPT_TIMEOUT[];
extern const char CN_FAILOVER_TIMEOUT[];
extern const char CN_SCRIPT[]; extern const char CN_SCRIPT[];
extern const char CN_EVENTS[]; extern const char CN_EVENTS[];

View File

@ -244,6 +244,7 @@ const char *config_monitor_params[] =
CN_MONITOR_INTERVAL, CN_MONITOR_INTERVAL,
CN_JOURNAL_MAX_AGE, CN_JOURNAL_MAX_AGE,
CN_SCRIPT_TIMEOUT, CN_SCRIPT_TIMEOUT,
CN_FAILOVER_TIMEOUT,
CN_BACKEND_CONNECT_TIMEOUT, CN_BACKEND_CONNECT_TIMEOUT,
CN_BACKEND_READ_TIMEOUT, CN_BACKEND_READ_TIMEOUT,
CN_BACKEND_WRITE_TIMEOUT, CN_BACKEND_WRITE_TIMEOUT,
@ -3187,6 +3188,24 @@ int create_new_monitor(CONFIG_CONTEXT *context, CONFIG_CONTEXT *obj, HASHTABLE*
obj->object, CN_SCRIPT_TIMEOUT, DEFAULT_SCRIPT_TIMEOUT); obj->object, CN_SCRIPT_TIMEOUT, DEFAULT_SCRIPT_TIMEOUT);
} }
char *failover_timeout = config_get_value(obj->parameters, CN_FAILOVER_TIMEOUT);
if (failover_timeout)
{
char *endptr;
long interval = strtol(failover_timeout, &endptr, 0);
if (*endptr == '\0' && interval > 0)
{
monitorSetFailoverTimeout(monitor, (uint32_t)interval);
}
else
{
error_count++;
MXS_NOTICE("Invalid '%s' parameter for monitor '%s'",
CN_FAILOVER_TIMEOUT, obj->object);
}
}
char *connect_timeout = config_get_value(obj->parameters, CN_BACKEND_CONNECT_TIMEOUT); char *connect_timeout = config_get_value(obj->parameters, CN_BACKEND_CONNECT_TIMEOUT);
if (connect_timeout) if (connect_timeout)
{ {

View File

@ -522,6 +522,15 @@ bool runtime_alter_monitor(MXS_MONITOR *monitor, const char *key, const char *va
monitorSetScriptTimeout(monitor, ival); monitorSetScriptTimeout(monitor, ival);
} }
} }
else if (strcmp(key, CN_FAILOVER_TIMEOUT) == 0)
{
long ival = get_positive_int(value);
if (ival)
{
valid = true;
monitorSetFailoverTimeout(monitor, ival);
}
}
else else
{ {
/** We're modifying module specific parameters and we need to stop the monitor */ /** We're modifying module specific parameters and we need to stop the monitor */

View File

@ -77,6 +77,7 @@ void monitorSetInterval (MXS_MONITOR *, unsigned long);
bool monitorSetNetworkTimeout(MXS_MONITOR *, int, int); bool monitorSetNetworkTimeout(MXS_MONITOR *, int, int);
void monitorSetJournalMaxAge(MXS_MONITOR *mon, time_t value); void monitorSetJournalMaxAge(MXS_MONITOR *mon, time_t value);
void monitorSetScriptTimeout(MXS_MONITOR *mon, uint32_t value); void monitorSetScriptTimeout(MXS_MONITOR *mon, uint32_t value);
void monitorSetFailoverTimeout(MXS_MONITOR *mon, uint32_t value);
/** /**
* @brief Serialize a monitor to a file * @brief Serialize a monitor to a file

View File

@ -71,6 +71,7 @@ const char CN_BACKEND_CONNECT_TIMEOUT[] = "backend_connect_timeout";
const char CN_MONITOR_INTERVAL[] = "monitor_interval"; const char CN_MONITOR_INTERVAL[] = "monitor_interval";
const char CN_JOURNAL_MAX_AGE[] = "journal_max_age"; const char CN_JOURNAL_MAX_AGE[] = "journal_max_age";
const char CN_SCRIPT_TIMEOUT[] = "script_timeout"; const char CN_SCRIPT_TIMEOUT[] = "script_timeout";
const char CN_FAILOVER_TIMEOUT[] = "failover_timeout";
const char CN_SCRIPT[] = "script"; const char CN_SCRIPT[] = "script";
const char CN_EVENTS[] = "events"; const char CN_EVENTS[] = "events";
@ -666,6 +667,17 @@ void monitorSetScriptTimeout(MXS_MONITOR *mon, uint32_t value)
mon->script_timeout = value; mon->script_timeout = value;
} }
/**
* Set the maximum age of the monitor journal
*
* @param mon The monitor instance
* @param interval The journal age in seconds
*/
void monitorSetFailoverTimeout(MXS_MONITOR *mon, uint32_t value)
{
mon->failover_timeout = value;
}
/** /**
* Set Monitor timeouts for connect/read/write * Set Monitor timeouts for connect/read/write
* *
@ -1546,6 +1558,7 @@ static bool create_monitor_config(const MXS_MONITOR *monitor, const char *filena
dprintf(file, "%s=%d\n", CN_BACKEND_CONNECT_ATTEMPTS, monitor->connect_attempts); dprintf(file, "%s=%d\n", CN_BACKEND_CONNECT_ATTEMPTS, monitor->connect_attempts);
dprintf(file, "%s=%ld\n", CN_JOURNAL_MAX_AGE, monitor->journal_max_age); dprintf(file, "%s=%ld\n", CN_JOURNAL_MAX_AGE, monitor->journal_max_age);
dprintf(file, "%s=%d\n", CN_SCRIPT_TIMEOUT, monitor->script_timeout); dprintf(file, "%s=%d\n", CN_SCRIPT_TIMEOUT, monitor->script_timeout);
dprintf(file, "%s=%d\n", CN_FAILOVER_TIMEOUT, monitor->failover_timeout);
if (monitor->databases) if (monitor->databases)
{ {
@ -1575,6 +1588,7 @@ static bool create_monitor_config(const MXS_MONITOR *monitor, const char *filena
CN_BACKEND_CONNECT_ATTEMPTS, CN_BACKEND_CONNECT_ATTEMPTS,
CN_JOURNAL_MAX_AGE, CN_JOURNAL_MAX_AGE,
CN_SCRIPT_TIMEOUT, CN_SCRIPT_TIMEOUT,
CN_FAILOVER_TIMEOUT,
CN_SERVERS CN_SERVERS
}; };
@ -1811,6 +1825,7 @@ json_t* monitor_parameters_to_json(const MXS_MONITOR* monitor)
json_object_set_new(rval, CN_BACKEND_CONNECT_ATTEMPTS, json_integer(monitor->connect_attempts)); json_object_set_new(rval, CN_BACKEND_CONNECT_ATTEMPTS, json_integer(monitor->connect_attempts));
json_object_set_new(rval, CN_JOURNAL_MAX_AGE, json_integer(monitor->journal_max_age)); json_object_set_new(rval, CN_JOURNAL_MAX_AGE, json_integer(monitor->journal_max_age));
json_object_set_new(rval, CN_SCRIPT_TIMEOUT, json_integer(monitor->script_timeout)); json_object_set_new(rval, CN_SCRIPT_TIMEOUT, json_integer(monitor->script_timeout));
json_object_set_new(rval, CN_FAILOVER_TIMEOUT, json_integer(monitor->script_timeout));
/** Add custom module parameters */ /** Add custom module parameters */
const MXS_MODULE* mod = get_module(monitor->module_name, MODULE_MONITOR); const MXS_MODULE* mod = get_module(monitor->module_name, MODULE_MONITOR);

View File

@ -1587,12 +1587,13 @@ struct subcommand alteroptions[] =
"user Username used when connecting to servers\n" "user Username used when connecting to servers\n"
"password Password used when connecting to servers\n" "password Password used when connecting to servers\n"
"monitor_interval Monitoring interval in milliseconds\n" "monitor_interval Monitoring interval in milliseconds\n"
"backend_connect_timeout Server coneection timeout in seconds\n" "backend_connect_timeout Server connection timeout in seconds\n"
"backend_write_timeout Server write timeout in seconds\n" "backend_write_timeout Server write timeout in seconds\n"
"backend_read_timeout Server read timeout in seconds\n" "backend_read_timeout Server read timeout in seconds\n"
"backend_connect_attempts Number of re-connection attempts\n" "backend_connect_attempts Number of re-connection attempts\n"
"journal_max_age Maximum age of server state journal\n" "journal_max_age Maximum age of server state journal\n"
"script_timeout Timeout in seconds for monitor scripts\n" "script_timeout Timeout in seconds for monitor scripts\n"
"failover_timeout Failover timeout in seconds\n"
"\n" "\n"
"This will alter an existing parameter of a monitor. To remove parameters,\n" "This will alter an existing parameter of a monitor. To remove parameters,\n"
"pass an empty value for a key e.g. 'maxadmin alter monitor my-monitor my-key='\n" "pass an empty value for a key e.g. 'maxadmin alter monitor my-monitor my-key='\n"