diff --git a/Documentation/Monitors/MariaDB-Monitor.md b/Documentation/Monitors/MariaDB-Monitor.md index 792d45e8a..a1b59a6ba 100644 --- a/Documentation/Monitors/MariaDB-Monitor.md +++ b/Documentation/Monitors/MariaDB-Monitor.md @@ -676,11 +676,18 @@ settings when redirecting a slave connection. #### `failover_timeout` and `switchover_timeout` -Time limit for failover and switchover operations, in seconds. The default +Time limit for failover and switchover operations. The default values are 90 seconds for both. `switchover_timeout` is also used as the time limit for a rejoin operation. Rejoin should rarely time out, since it is a faster operation than switchover. +The timeouts are specified as documented +[here](../Getting-Started/Configuration-Guide.md#durations). If no explicit unit +is provided, the value is interpreted as seconds in MaxScale 2.4. In subsequent +versions a value without a unit may be rejected. Note that since the granularity +of the timeouts is seconds, a timeout specified in milliseconds will be rejected, +even if the duration is longer than a second. + If no successful failover/switchover takes place within the configured time period, a message is logged and automatic failover is disabled. This prevents further automatic modifications to the misbehaving cluster. @@ -689,14 +696,20 @@ further automatic modifications to the misbehaving cluster. Enable additional master failure verification for automatic failover. `verify_master_failure` is a boolean value (default: true) which enables this -feature and `master_failure_timeout` defines the timeout in seconds (default: -10). +feature and `master_failure_timeout` defines the timeout (default: 10 seconds). + +The master failure timeout is specified as documented +[here](../Getting-Started/Configuration-Guide.md#durations). If no explicit unit +is provided, the value is interpreted as seconds in MaxScale 2.4. In subsequent +versions a value without a unit may be rejected. Note that since the granularity +of the timeout is seconds, a timeout specified in milliseconds will be rejected, +even if the duration is longer than a second. Failure verification is performed by checking whether the slave servers are still connected to the master and receiving events. An event is either a change in the *Gtid_IO_Pos*-field of the `SHOW SLAVE STATUS` output or a heartbeat event. Effectively, if a slave has received an event within -`master_failure_timeout` seconds, the master is not considered down when +`master_failure_timeout` duration, the master is not considered down when deciding whether to failover, even if MaxScale cannot connect to the master. `master_failure_timeout` should be longer than the `Slave_heartbeat_period` of the slave connection to be effective. diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index db4468af2..d9643600d 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -219,13 +219,13 @@ bool MariaDBMonitor::configure(const MXS_CONFIG_PARAMETER* params) m_detect_standalone_master = params->get_bool(CN_DETECT_STANDALONE_MASTER); m_assume_unique_hostnames = params->get_bool(CN_ASSUME_UNIQUE_HOSTNAMES); m_failcount = params->get_integer(CN_FAILCOUNT); - m_failover_timeout = params->get_integer(CN_FAILOVER_TIMEOUT); - m_switchover_timeout = params->get_integer(CN_SWITCHOVER_TIMEOUT); + m_failover_timeout = params->get_duration(CN_FAILOVER_TIMEOUT).count(); + m_switchover_timeout = params->get_duration(CN_SWITCHOVER_TIMEOUT).count(); m_auto_failover = params->get_bool(CN_AUTO_FAILOVER); m_auto_rejoin = params->get_bool(CN_AUTO_REJOIN); m_enforce_read_only_slaves = params->get_bool(CN_ENFORCE_READONLY); m_verify_master_failure = params->get_bool(CN_VERIFY_MASTER_FAILURE); - m_master_failure_timeout = params->get_integer(CN_MASTER_FAILURE_TIMEOUT); + m_master_failure_timeout = params->get_duration(CN_MASTER_FAILURE_TIMEOUT).count(); m_promote_sql_file = params->get_string(CN_PROMOTION_SQL_FILE); m_demote_sql_file = params->get_string(CN_DEMOTION_SQL_FILE); m_switchover_on_low_disk_space = params->get_bool(CN_SWITCHOVER_ON_LOW_DISK_SPACE); @@ -1008,10 +1008,12 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE() CN_AUTO_FAILOVER, MXS_MODULE_PARAM_BOOL, "false" }, { - CN_FAILOVER_TIMEOUT, MXS_MODULE_PARAM_COUNT, "90" + CN_FAILOVER_TIMEOUT, MXS_MODULE_PARAM_DURATION, "90s", + MXS_MODULE_OPT_DURATION_S }, { - CN_SWITCHOVER_TIMEOUT, MXS_MODULE_PARAM_COUNT, "90" + CN_SWITCHOVER_TIMEOUT, MXS_MODULE_PARAM_DURATION, "90s", + MXS_MODULE_OPT_DURATION_S }, { CN_REPLICATION_USER, MXS_MODULE_PARAM_STRING @@ -1026,7 +1028,8 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE() CN_VERIFY_MASTER_FAILURE, MXS_MODULE_PARAM_BOOL, "true" }, { - CN_MASTER_FAILURE_TIMEOUT, MXS_MODULE_PARAM_COUNT, "10" + CN_MASTER_FAILURE_TIMEOUT, MXS_MODULE_PARAM_DURATION, "10s", + MXS_MODULE_OPT_DURATION_S }, { CN_AUTO_REJOIN, MXS_MODULE_PARAM_BOOL, "false"