diff --git a/Documentation/Monitors/MariaDB-Monitor.md b/Documentation/Monitors/MariaDB-Monitor.md index f8db3cafc..58a508196 100644 --- a/Documentation/Monitors/MariaDB-Monitor.md +++ b/Documentation/Monitors/MariaDB-Monitor.md @@ -340,9 +340,10 @@ selection criteria is as follows in descending priority: later. 3. Prepare the new master: 1. Remove the slave connection the new master used to replicate from the old -master. + master. 2. Disable the *read\_only*-flag. - 3. Enable scheduled server events (if event handling is on). + 3. Enable scheduled server events (if event handling is on). Only events that were + enabled on the old master are enabled. 4. Run the commands in `promotion_sql_file`. 5. Start replication from external master if one existed. 4. Redirect all other slaves to replicate from the new master: @@ -351,6 +352,9 @@ master. 3. START SLAVE 5. Check that all slaves are replicating. +Failover is considered successful if steps 1 to 3 succeeded, as the cluster then has at +least a valid master server. + **Switchover** swaps a running master with a running slave. It does the following: @@ -365,6 +369,9 @@ following: redirect the demoted old master. 4. Check that all slaves are replicating. +Similar to failover, switchover is considered successful if the new master was +successfully promoted. + **Rejoin** joins a standalone server to the cluster or redirects a slave replicating from a server other than the master. A standalone server is joined by: @@ -391,7 +398,9 @@ operation proceeds as follows: *gtid\_current\_pos*. 2. Prepare new master: 1. Disable the *read\_only*-flag. - 2. Enable scheduled server events (if event handling is on). + 2. Enable scheduled server events (if event handling is on). Events are only enabled + if the cluster had a master server when starting the reset-replication operation. + Only events that were enabled on the previous master are enabled on the new. 3. Direct other servers to replicate from the new master as in the other operations. @@ -735,14 +744,19 @@ demotion_sql_file=/home/root/scripts/demotion.sql ``` #### `handle_server_events` -This setting is on by default. If enabled, the monitor will attempt to enable -and disable server events during a switchover, failover or rejoin. When a server -is being demoted, any events with "ENABLED" status are set to -"SLAVESIDE_DISABLED". The reverse applies to a server being promoted to master. -When a standalone server is rejoined to the cluster, its events are also -disabled since it is now a slave. The monitor does not check whether the same -events were disabled and enabled during a switchover or failover/rejoin. All -events with the expected status are altered. +This setting is on by default. If enabled, the monitor continuously queries the +servers for enabled scheduled events and uses this information when performing +cluster operations, enabling and disabling events as appropriate. + +When a server is being demoted, any events with "ENABLED" status are set to +"SLAVESIDE_DISABLED". When a server is being promoted to master, events that are either +"SLAVESIDE_DISABLED" or "DISABLED" are set to "ENABLED" if the same event was also enabled +on the old master server last time it was successfully queried. Events are considered +identical if they have the same schema and name. When a standalone server is rejoined to +the cluster, its events are also disabled since it is now a slave. + +The monitor does not check whether the same events were disabled and enabled during a +switchover or failover/rejoin. All events that meet the criteria above are altered. The monitor does not enable or disable the event scheduler itself. For the events to run on the new master server, the scheduler should be enabled by the diff --git a/server/modules/monitor/mariadbmon/cluster_manipulation.cc b/server/modules/monitor/mariadbmon/cluster_manipulation.cc index 365ed5f32..24d08f4b3 100644 --- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc +++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc @@ -1701,7 +1701,8 @@ MariaDBMonitor::switchover_prepare(SERVER* promotion_server, SERVER* demotion_se m_promote_sql_file, demotion_target->m_slave_status, demotion_target->m_enabled_events); ServerOperation demotion(demotion_target, master_swap, m_handle_event_scheduler, - m_demote_sql_file, promotion_target->m_slave_status, {} /* unused */); + m_demote_sql_file, promotion_target->m_slave_status, + EventNameSet() /* unused */); GeneralOpData general(m_replication_user, m_replication_password, error_out, time_limit); rval.reset(new SwitchoverParams(promotion, demotion, general)); } diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc index 95765acb3..4cb2a45e3 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.cc +++ b/server/modules/monitor/mariadbmon/mariadbserver.cc @@ -157,7 +157,7 @@ bool MariaDBServer::execute_cmd_time_limit(const std::string& cmd, maxbase::Dura std::string* errmsg_out) { StopWatch timer; - string cmd_prefix; + string max_stmt_time; int connector_timeout = -1; if (m_capabilities.max_statement_time) { @@ -166,14 +166,15 @@ bool MariaDBServer::execute_cmd_time_limit(const std::string& cmd, maxbase::Dura mxb_assert(rv == 0); if (connector_timeout > 0) { - cmd_prefix = string_printf("SET STATEMENT max_statement_time=%i FOR ", connector_timeout); + max_stmt_time = string_printf("SET STATEMENT max_statement_time=%i FOR ", connector_timeout); } } - string command = cmd_prefix + cmd; + const string command = max_stmt_time + cmd; // If a query lasts less than 1s, sleep so that at most 1 query/s is sent. // This prevents busy-looping when faced with some network errors. const Duration min_query_time(1.0); + // Even if time is up, try at least once. bool cmd_success = false; bool keep_trying = true; @@ -187,17 +188,27 @@ bool MariaDBServer::execute_cmd_time_limit(const std::string& cmd, maxbase::Dura // Check if there is time to retry. Duration time_remaining = time_limit - timer.split(); + bool non_fatal_connector_err = maxsql::mysql_is_net_error(errornum); keep_trying = (time_remaining.secs() > 0) - // either a connector-c timeout - && (maxsql::mysql_is_net_error(errornum) - // or query was interrupted by max_statement_time. - || (!cmd_prefix.empty() && errornum == ER_STATEMENT_TIMEOUT)); + // Either a connector-c timeout or query was interrupted by max_statement_time. + && (non_fatal_connector_err || (!max_stmt_time.empty() && errornum == ER_STATEMENT_TIMEOUT)); + if (!cmd_success) { if (keep_trying) { - MXS_WARNING("Query '%s' timed out on '%s': Retrying with %.1f seconds left.", - command.c_str(), name(), time_remaining.secs()); + string retrying = string_printf("Retrying with %.1f seconds left.", time_remaining.secs()); + if (non_fatal_connector_err) + { + MXS_WARNING("%s %s", error_msg.c_str(), retrying.c_str()); + } + else + { + // Timed out because of max_statement_time. + MXS_WARNING("Query '%s' timed out on '%s'. %s", + command.c_str(), name(), retrying.c_str()); + } + if (query_time < min_query_time) { Duration query_sleep = min_query_time - query_time; @@ -207,7 +218,7 @@ bool MariaDBServer::execute_cmd_time_limit(const std::string& cmd, maxbase::Dura } else if (errmsg_out) { - *errmsg_out = error_msg; // The error string already has all required info. + *errmsg_out = error_msg; // The error string already has all required info. } } } diff --git a/server/modules/monitor/mariadbmon/server_utils.cc b/server/modules/monitor/mariadbmon/server_utils.cc index 31d8a63ea..369c5c127 100644 --- a/server/modules/monitor/mariadbmon/server_utils.cc +++ b/server/modules/monitor/mariadbmon/server_utils.cc @@ -169,7 +169,8 @@ ServerOperation::ServerOperation(MariaDBServer* target, bool was_is_master, bool ServerOperation::ServerOperation(MariaDBServer* target, bool was_is_master, bool handle_events, const std::string& sql_file) - : ServerOperation(target, was_is_master, handle_events, sql_file, {}, {}) + : ServerOperation(target, was_is_master, handle_events, sql_file, + SlaveStatusArray() /* empty */, EventNameSet() /* empty */) { }