From ff467e218acc27e06822baef297bc993fe47da5f Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Thu, 28 Sep 2017 11:30:20 +0300 Subject: [PATCH] MXS-1441 Add switchover and switchover_timeout config vars Tentative documentation. With the 'switchover' config variable the switchover functionality can be enabled. If enabled a REST API endpoint will appear, using which that switchover can be initiated. Switchover can only be performed when MaxScale is in active mode and failover will be disabled for the duration of the switchover. Only if the switchover succeeds, will failover be enabled again. Might be easier to expose that REST API always and only change the behaviour when calling it, instead of making it appear and re-appear. --- Documentation/Monitors/MySQL-Monitor.md | 33 +++++++++++++++++++++ server/modules/monitor/mysqlmon.h | 2 ++ server/modules/monitor/mysqlmon/mysql_mon.c | 16 ++++++++-- 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/Documentation/Monitors/MySQL-Monitor.md b/Documentation/Monitors/MySQL-Monitor.md index a0a3fc859..64f65c5ad 100644 --- a/Documentation/Monitors/MySQL-Monitor.md +++ b/Documentation/Monitors/MySQL-Monitor.md @@ -243,6 +243,39 @@ from passive to active will wait for a failover to take place after an apparent loss of a master server. If no new master server is detected within the configured time period, the failover will be initiated again. +### `switchover` + +Enable switchover via MaxScale. This parameter expects a boolean value and +the default value is false. + +When the switchover functionality is enabled, a REST API endpoint will be +made available, using which switchover may be performed. The endpoint will +be available irrespective of whether MaxScale is in active or passive mode, +but switchover will only be attempted if MaxScale is in active mode and an +error logged if an attempt is made when MaxScale is in passive mode. +Switchover may also be triggered from MaxAdmin and the same rules regarding +active/passive holds. + +It is safe to perform switchover even with the failover functionality +enabled, as MaxScale will disable the failover behaviour for the duration +of the switchover. + +Only if the switchover succeeds, will the failover functionality be re-enabled. +Otherwise it will remain disabled and must be turned on manually via the REST +API or MaxAdmin. + +TODO: Document the URL path. Probably will include the monitor section name + from the configuration. + +### `switchover_timeout` + +The timeout for the cluster switchover in seconds. The default value is 90 +seconds. + +If no successful switchover takes place within the configured time period, +a message is logged and the failover (not switchover) functionality will not +be enabled, even if it was enabled before the switchover attempt. + ## Using the MySQL Monitor With Binlogrouter Since MaxScale 2.2 it's possible to detect a replication setup diff --git a/server/modules/monitor/mysqlmon.h b/server/modules/monitor/mysqlmon.h index 3842e3688..66c87ac91 100644 --- a/server/modules/monitor/mysqlmon.h +++ b/server/modules/monitor/mysqlmon.h @@ -65,6 +65,8 @@ typedef struct bool allow_external_slaves; /**< Whether to allow usage of external slave servers */ bool failover; /**< If master failover is enabled */ uint32_t failover_timeout; /**< Timeout in seconds for the master failover */ + bool switchover; /**< If master switchover is enabled */ + uint32_t switchover_timeout; /**< Timeout in seconds for the master switchover */ MXS_MONITOR* monitor; } MYSQL_MONITOR; diff --git a/server/modules/monitor/mysqlmon/mysql_mon.c b/server/modules/monitor/mysqlmon/mysql_mon.c index f6da229d5..f95336bfa 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.c +++ b/server/modules/monitor/mysqlmon/mysql_mon.c @@ -60,11 +60,15 @@ void check_maxscale_schema_replication(MXS_MONITOR *monitor); static bool report_version_err = true; static const char* hb_table_name = "maxscale_schema.replication_heartbeat"; -static const char CN_FAILOVER[] = "failover"; -static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout"; +static const char CN_FAILOVER[] = "failover"; +static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout"; +static const char CN_SWITCHOVER[] = "switchover"; +static const char CN_SWITCHOVER_TIMEOUT[] = "switchover_timeout"; /** Default failover timeout */ #define DEFAULT_FAILOVER_TIMEOUT "90" +/** Default switchover timeout */ +#define DEFAULT_SWITCHOVER_TIMEOUT "90" /** * The module entry point routine. It is this routine that @@ -124,6 +128,8 @@ MXS_MODULE* MXS_CREATE_MODULE() }, {CN_FAILOVER, MXS_MODULE_PARAM_BOOL, "false"}, {CN_FAILOVER_TIMEOUT, MXS_MODULE_PARAM_COUNT, DEFAULT_FAILOVER_TIMEOUT}, + {CN_SWITCHOVER, MXS_MODULE_PARAM_BOOL, "false"}, + {CN_SWITCHOVER_TIMEOUT, MXS_MODULE_PARAM_COUNT, DEFAULT_SWITCHOVER_TIMEOUT}, {MXS_END_MODULE_PARAMS} } }; @@ -272,6 +278,8 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params) handle->allow_external_slaves = config_get_bool(params, "allow_external_slaves"); handle->failover = config_get_bool(params, CN_FAILOVER); handle->failover_timeout = config_get_integer(params, CN_FAILOVER_TIMEOUT); + handle->switchover = config_get_bool(params, CN_SWITCHOVER); + handle->switchover_timeout = config_get_integer(params, CN_SWITCHOVER_TIMEOUT); bool error = false; @@ -331,6 +339,8 @@ static void diagnostics(DCB *dcb, const MXS_MONITOR *mon) dcb_printf(dcb, "Failover:\t%s\n", handle->failover ? "Enabled" : "Disabled"); dcb_printf(dcb, "Failover Timeout:\t%u\n", handle->failover_timeout); + dcb_printf(dcb, "Switchover:\t%s\n", handle->switchover ? "Enabled" : "Disabled"); + dcb_printf(dcb, "Switchover Timeout:\t%u\n", handle->switchover_timeout); dcb_printf(dcb, "MaxScale MonitorId:\t%lu\n", handle->id); dcb_printf(dcb, "Replication lag:\t%s\n", (handle->replicationHeartbeat == 1) ? "enabled" : "disabled"); dcb_printf(dcb, "Detect Stale Master:\t%s\n", (handle->detectStaleMaster == 1) ? "enabled" : "disabled"); @@ -379,6 +389,8 @@ static json_t* diagnostics_json(const MXS_MONITOR *mon) json_object_set_new(rval, "mysql51_replication", json_boolean(handle->mysql51_replication)); json_object_set_new(rval, CN_FAILOVER, json_boolean(handle->failover)); json_object_set_new(rval, CN_FAILOVER_TIMEOUT, json_integer(handle->failover_timeout)); + json_object_set_new(rval, CN_SWITCHOVER, json_boolean(handle->switchover)); + json_object_set_new(rval, CN_SWITCHOVER_TIMEOUT, json_integer(handle->switchover_timeout)); if (handle->script) {