From 316f792242690bd9e0e581462576ecdb8c668d26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Tue, 26 Sep 2017 14:13:24 +0300 Subject: [PATCH] MXS-1446: Make `failover_timeout` configurable The time that MaxScale waits for a failover is now configurable. --- Documentation/Monitors/Monitor-Common.md | 13 +++++++++++++ include/maxscale/monitor.h | 3 ++- server/core/config.cc | 19 +++++++++++++++++++ server/core/config_runtime.cc | 9 +++++++++ server/core/maxscale/monitor.h | 1 + server/core/monitor.cc | 15 +++++++++++++++ server/modules/routing/debugcli/debugcmd.c | 3 ++- 7 files changed, 61 insertions(+), 2 deletions(-) diff --git a/Documentation/Monitors/Monitor-Common.md b/Documentation/Monitors/Monitor-Common.md index f85c1a546..e7107dc8a 100644 --- a/Documentation/Monitors/Monitor-Common.md +++ b/Documentation/Monitors/Monitor-Common.md @@ -115,6 +115,19 @@ If the script execution exceeds the configured timeout, it is stopped by sending a SIGTERM signal to it. If the process does not stop, a SIGKILL signal will be sent to it once the execution time is greater than twice the configured timeout. +### `failover_timeout` + +The timeout for the cluster failover in seconds. The default value is 90 +seconds. + +If no successful failover takes place within the configured time period, a +message is logged and the failover functionality is disabled. + +This parameter also controls how long a MaxScale instance that has transitioned +from passive to active will wait for a failover to take place after an apparent +loss of a master server. If no new master server is detected within the +configured time period, the failover will be initiated again. + ### `events` A list of event names which cause the script to be executed. If this option is not defined, all events cause the script to be executed. The list must contain a comma separated list of event names. diff --git a/include/maxscale/monitor.h b/include/maxscale/monitor.h index 791180c4f..8341ff188 100644 --- a/include/maxscale/monitor.h +++ b/include/maxscale/monitor.h @@ -205,7 +205,7 @@ struct mxs_monitor bool active; /**< True if monitor is active */ time_t journal_max_age; /**< Maximum age of journal file */ uint32_t script_timeout; /**< Timeout in seconds for the monitor scripts */ - int32_t failover_timeout; /**< Timeout in seconds for failover script */ + uint32_t failover_timeout; /**< Timeout in seconds for failover script */ int64_t last_master_up; /**< Time when the last master_up event was triggered */ int64_t last_master_down; /**< Time when the last master_down event was triggered */ struct mxs_monitor *next; /**< Next monitor in the linked list */ @@ -254,6 +254,7 @@ extern const char CN_BACKEND_CONNECT_TIMEOUT[]; extern const char CN_MONITOR_INTERVAL[]; extern const char CN_JOURNAL_MAX_AGE[]; extern const char CN_SCRIPT_TIMEOUT[]; +extern const char CN_FAILOVER_TIMEOUT[]; extern const char CN_SCRIPT[]; extern const char CN_EVENTS[]; diff --git a/server/core/config.cc b/server/core/config.cc index 1bd42a050..10983ccad 100644 --- a/server/core/config.cc +++ b/server/core/config.cc @@ -244,6 +244,7 @@ const char *config_monitor_params[] = CN_MONITOR_INTERVAL, CN_JOURNAL_MAX_AGE, CN_SCRIPT_TIMEOUT, + CN_FAILOVER_TIMEOUT, CN_BACKEND_CONNECT_TIMEOUT, CN_BACKEND_READ_TIMEOUT, CN_BACKEND_WRITE_TIMEOUT, @@ -3187,6 +3188,24 @@ int create_new_monitor(CONFIG_CONTEXT *context, CONFIG_CONTEXT *obj, HASHTABLE* obj->object, CN_SCRIPT_TIMEOUT, DEFAULT_SCRIPT_TIMEOUT); } + char *failover_timeout = config_get_value(obj->parameters, CN_FAILOVER_TIMEOUT); + if (failover_timeout) + { + char *endptr; + long interval = strtol(failover_timeout, &endptr, 0); + + if (*endptr == '\0' && interval > 0) + { + monitorSetFailoverTimeout(monitor, (uint32_t)interval); + } + else + { + error_count++; + MXS_NOTICE("Invalid '%s' parameter for monitor '%s'", + CN_FAILOVER_TIMEOUT, obj->object); + } + } + char *connect_timeout = config_get_value(obj->parameters, CN_BACKEND_CONNECT_TIMEOUT); if (connect_timeout) { diff --git a/server/core/config_runtime.cc b/server/core/config_runtime.cc index 9000127df..90f68139b 100644 --- a/server/core/config_runtime.cc +++ b/server/core/config_runtime.cc @@ -522,6 +522,15 @@ bool runtime_alter_monitor(MXS_MONITOR *monitor, const char *key, const char *va monitorSetScriptTimeout(monitor, ival); } } + else if (strcmp(key, CN_FAILOVER_TIMEOUT) == 0) + { + long ival = get_positive_int(value); + if (ival) + { + valid = true; + monitorSetFailoverTimeout(monitor, ival); + } + } else { /** We're modifying module specific parameters and we need to stop the monitor */ diff --git a/server/core/maxscale/monitor.h b/server/core/maxscale/monitor.h index 6994d3e9e..c30e0b133 100644 --- a/server/core/maxscale/monitor.h +++ b/server/core/maxscale/monitor.h @@ -77,6 +77,7 @@ void monitorSetInterval (MXS_MONITOR *, unsigned long); bool monitorSetNetworkTimeout(MXS_MONITOR *, int, int); void monitorSetJournalMaxAge(MXS_MONITOR *mon, time_t value); void monitorSetScriptTimeout(MXS_MONITOR *mon, uint32_t value); +void monitorSetFailoverTimeout(MXS_MONITOR *mon, uint32_t value); /** * @brief Serialize a monitor to a file diff --git a/server/core/monitor.cc b/server/core/monitor.cc index f05f3c842..a3a41bd32 100644 --- a/server/core/monitor.cc +++ b/server/core/monitor.cc @@ -71,6 +71,7 @@ const char CN_BACKEND_CONNECT_TIMEOUT[] = "backend_connect_timeout"; const char CN_MONITOR_INTERVAL[] = "monitor_interval"; const char CN_JOURNAL_MAX_AGE[] = "journal_max_age"; const char CN_SCRIPT_TIMEOUT[] = "script_timeout"; +const char CN_FAILOVER_TIMEOUT[] = "failover_timeout"; const char CN_SCRIPT[] = "script"; const char CN_EVENTS[] = "events"; @@ -666,6 +667,17 @@ void monitorSetScriptTimeout(MXS_MONITOR *mon, uint32_t value) mon->script_timeout = value; } +/** + * Set the maximum age of the monitor journal + * + * @param mon The monitor instance + * @param interval The journal age in seconds + */ +void monitorSetFailoverTimeout(MXS_MONITOR *mon, uint32_t value) +{ + mon->failover_timeout = value; +} + /** * Set Monitor timeouts for connect/read/write * @@ -1546,6 +1558,7 @@ static bool create_monitor_config(const MXS_MONITOR *monitor, const char *filena dprintf(file, "%s=%d\n", CN_BACKEND_CONNECT_ATTEMPTS, monitor->connect_attempts); dprintf(file, "%s=%ld\n", CN_JOURNAL_MAX_AGE, monitor->journal_max_age); dprintf(file, "%s=%d\n", CN_SCRIPT_TIMEOUT, monitor->script_timeout); + dprintf(file, "%s=%d\n", CN_FAILOVER_TIMEOUT, monitor->failover_timeout); if (monitor->databases) { @@ -1575,6 +1588,7 @@ static bool create_monitor_config(const MXS_MONITOR *monitor, const char *filena CN_BACKEND_CONNECT_ATTEMPTS, CN_JOURNAL_MAX_AGE, CN_SCRIPT_TIMEOUT, + CN_FAILOVER_TIMEOUT, CN_SERVERS }; @@ -1811,6 +1825,7 @@ json_t* monitor_parameters_to_json(const MXS_MONITOR* monitor) json_object_set_new(rval, CN_BACKEND_CONNECT_ATTEMPTS, json_integer(monitor->connect_attempts)); json_object_set_new(rval, CN_JOURNAL_MAX_AGE, json_integer(monitor->journal_max_age)); json_object_set_new(rval, CN_SCRIPT_TIMEOUT, json_integer(monitor->script_timeout)); + json_object_set_new(rval, CN_FAILOVER_TIMEOUT, json_integer(monitor->script_timeout)); /** Add custom module parameters */ const MXS_MODULE* mod = get_module(monitor->module_name, MODULE_MONITOR); diff --git a/server/modules/routing/debugcli/debugcmd.c b/server/modules/routing/debugcli/debugcmd.c index f7e1b0aaa..9a70c6b56 100644 --- a/server/modules/routing/debugcli/debugcmd.c +++ b/server/modules/routing/debugcli/debugcmd.c @@ -1587,12 +1587,13 @@ struct subcommand alteroptions[] = "user Username used when connecting to servers\n" "password Password used when connecting to servers\n" "monitor_interval Monitoring interval in milliseconds\n" - "backend_connect_timeout Server coneection timeout in seconds\n" + "backend_connect_timeout Server connection timeout in seconds\n" "backend_write_timeout Server write timeout in seconds\n" "backend_read_timeout Server read timeout in seconds\n" "backend_connect_attempts Number of re-connection attempts\n" "journal_max_age Maximum age of server state journal\n" "script_timeout Timeout in seconds for monitor scripts\n" + "failover_timeout Failover timeout in seconds\n" "\n" "This will alter an existing parameter of a monitor. To remove parameters,\n" "pass an empty value for a key e.g. 'maxadmin alter monitor my-monitor my-key='\n"