diff --git a/server/modules/monitor/mariadbmon/cluster_manipulation.cc b/server/modules/monitor/mariadbmon/cluster_manipulation.cc index dc3868c75..c32280094 100644 --- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc +++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc @@ -27,13 +27,11 @@ using maxbase::string_printf; using maxbase::StopWatch; using maxbase::Duration; -static const char RE_ENABLE_FMT[] = "To re-enable automatic %s, manually set '%s' to 'true' " - "for monitor '%s' via MaxAdmin or the REST API, or restart MaxScale."; const char NO_SERVER[] = "Server '%s' is not monitored by '%s'."; const char FAILOVER_OK[] = "Failover '%s' -> '%s' performed."; const char FAILOVER_FAIL[] = "Failover '%s' -> '%s' failed."; const char SWITCHOVER_OK[] = "Switchover '%s' -> '%s' performed."; -const char SWITCHOVER_FAIL[] = "Switchover %s -> %s failed"; +const char SWITCHOVER_FAIL[] = "Switchover %s -> %s failed."; /** * Run a manual switchover, promoting a new master server and demoting the existing master. @@ -64,14 +62,8 @@ bool MariaDBMonitor::manual_switchover(SERVER* promotion_server, SERVER* demotio { string msg = string_printf(SWITCHOVER_FAIL, op->demotion.target->name(), op->promotion.target->name()); - bool failover_setting = parameters.get_bool(CN_AUTO_FAILOVER); - if (failover_setting) - { - disable_setting(CN_AUTO_FAILOVER); - msg += ", automatic failover has been disabled"; - } - msg += "."; PRINT_MXS_JSON_ERROR(error_out, "%s", msg.c_str()); + delay_auto_cluster_ops(); } } else @@ -625,6 +617,7 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers, json_t** SERVER* master_server = m_master->m_server_base->server; const char* master_name = master_server->name(); uint32_t servers_joined = 0; + bool rejoin_error = false; if (!joinable_servers.empty()) { for (MariaDBServer* joinable : joinable_servers) @@ -656,7 +649,8 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers, json_t** else { PRINT_MXS_JSON_ERROR(output, - "Failed to prepare (demote) standalone server '%s' for rejoin.", name); + "Failed to prepare (demote) standalone server '%s' for rejoin.", + name); } } else @@ -675,8 +669,17 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers, json_t** servers_joined++; m_cluster_modified = true; } + else + { + rejoin_error = true; + } } } + + if (rejoin_error) + { + delay_auto_cluster_ops(); + } return servers_joined; } @@ -1474,7 +1477,7 @@ void MariaDBMonitor::handle_auto_failover() else { MXS_ERROR(FAILOVER_FAIL, op->demotion_target->name(), op->promotion.target->name()); - report_and_disable("failover", CN_AUTO_FAILOVER, &m_auto_failover); + delay_auto_cluster_ops(); } } else @@ -1536,25 +1539,10 @@ void MariaDBMonitor::check_cluster_operations_support() { const char PROBLEMS[] = "The backend cluster does not support failover/switchover due to the following reason(s):\n" - "%s\n" - "Automatic failover/switchover has been disabled. They should only be enabled " - "after the above issues have been resolved."; - string p1 = string_printf(PROBLEMS, all_reasons.c_str()); - string p2 = string_printf(RE_ENABLE_FMT, "failover", CN_AUTO_FAILOVER, name()); - string p3 = string_printf(RE_ENABLE_FMT, "switchover", CN_SWITCHOVER_ON_LOW_DISK_SPACE, name()); - string total_msg = p1 + " " + p2 + " " + p3; - MXS_ERROR("%s", total_msg.c_str()); - - if (m_auto_failover) - { - m_auto_failover = false; - disable_setting(CN_AUTO_FAILOVER); - } - if (m_switchover_on_low_disk_space) - { - m_switchover_on_low_disk_space = false; - disable_setting(CN_SWITCHOVER_ON_LOW_DISK_SPACE); - } + "%s\n"; + string msg = string_printf(PROBLEMS, all_reasons.c_str()); + MXS_ERROR("%s", msg.c_str()); + delay_auto_cluster_ops(); } } @@ -1719,6 +1707,7 @@ MariaDBMonitor::switchover_prepare(SERVER* promotion_server, SERVER* demotion_se void MariaDBMonitor::enforce_read_only_on_slaves() { const char QUERY[] = "SET GLOBAL read_only=1;"; + bool error = false; for (MariaDBServer* server : m_servers) { if (server->is_slave() && !server->is_read_only() @@ -1732,9 +1721,15 @@ void MariaDBMonitor::enforce_read_only_on_slaves() else { MXS_ERROR("Setting read_only on '%s' failed: '%s'.", server->name(), mysql_error(conn)); + error = true; } } } + + if (error) + { + delay_auto_cluster_ops(); + } } void MariaDBMonitor::handle_low_disk_space_master() @@ -1762,8 +1757,7 @@ void MariaDBMonitor::handle_low_disk_space_master() else { MXS_ERROR(SWITCHOVER_FAIL, op->demotion.target->name(), op->promotion.target->name()); - report_and_disable("switchover", CN_SWITCHOVER_ON_LOW_DISK_SPACE, - &m_switchover_on_low_disk_space); + delay_auto_cluster_ops(); } } else @@ -1798,19 +1792,6 @@ void MariaDBMonitor::handle_auto_rejoin() // get_joinable_servers prints an error if master is unresponsive } -void MariaDBMonitor::report_and_disable(const string& operation, const string& setting_name, - bool* setting_var) -{ - string p1 = string_printf("Automatic %s failed, disabling automatic %s.", - operation.c_str(), - operation.c_str()); - string p2 = string_printf(RE_ENABLE_FMT, operation.c_str(), setting_name.c_str(), name()); - string error_msg = p1 + " " + p2; - MXS_ERROR("%s", error_msg.c_str()); - *setting_var = false; - disable_setting(setting_name.c_str()); -} - /** * Check that the slaves to demotion target are using gtid replication and that the gtid domain of the * cluster is defined. Only the slave connections to the demotion target are checked. @@ -1878,6 +1859,24 @@ ServerArray MariaDBMonitor::get_redirectables(const MariaDBServer* old_master, return redirectable_slaves; } +void MariaDBMonitor::delay_auto_cluster_ops() +{ + if (m_auto_failover || m_auto_rejoin || m_enforce_read_only_slaves || m_switchover_on_low_disk_space) + { + const char DISABLING_AUTO_OPS[] = "Disabling automatic cluster operations for %i monitor ticks."; + MXS_NOTICE(DISABLING_AUTO_OPS, m_failcount); + } + // + 1 because the start of next tick subtracts 1. + cluster_operation_disable_timer = m_failcount + 1; + +} + +bool MariaDBMonitor::can_perform_cluster_ops() +{ + return (!config_get_global_options()->passive && cluster_operation_disable_timer <= 0 && + !m_cluster_modified); +} + MariaDBMonitor::SwitchoverParams::SwitchoverParams(const ServerOperation& promotion, const ServerOperation& demotion, const GeneralOpData& general) diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index 17ae73520..e5eaedefd 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -27,9 +27,7 @@ #include #include #include -#include -// TODO: For monitor_add_parameters -#include "../../../core/internal/monitor.hh" +#include using std::string; using maxbase::string_printf; @@ -387,6 +385,12 @@ void MariaDBMonitor::tick() mon_srv->mon_prev_status = status; } + if (cluster_operation_disable_timer > 0) + { + cluster_operation_disable_timer--; + } + + // Query all servers for their status. bool should_update_disk_space = check_disk_space_this_tick(); const auto& conn_settings = m_settings.conn_settings; @@ -422,7 +426,7 @@ void MariaDBMonitor::tick() update_topology(); m_cluster_topology_changed = false; // If cluster operations are enabled, check topology support and disable if needed. - if (m_auto_failover || m_switchover_on_low_disk_space) + if (m_auto_failover || m_switchover_on_low_disk_space || m_auto_rejoin) { check_cluster_operations_support(); } @@ -495,16 +499,16 @@ void MariaDBMonitor::process_state_changes() } } - if (!config_get_global_options()->passive) + if (can_perform_cluster_ops()) { - if (m_auto_failover && !m_cluster_modified) + if (m_auto_failover) { handle_auto_failover(); } // Do not auto-join servers on this monitor loop if a failover (or any other cluster modification) // has been performed, as server states have not been updated yet. It will happen next iteration. - if (m_auto_rejoin && !m_cluster_modified && cluster_can_be_joined()) + if (m_auto_rejoin && cluster_can_be_joined() && can_perform_cluster_ops()) { // Check if any servers should be autojoined to the cluster and try to join them. handle_auto_rejoin(); @@ -513,13 +517,13 @@ void MariaDBMonitor::process_state_changes() /* Check if any slave servers have read-only off and turn it on if user so wishes. Again, do not * perform this if cluster has been modified this loop since it may not be clear which server * should be a slave. */ - if (m_enforce_read_only_slaves && !m_cluster_modified) + if (m_enforce_read_only_slaves && can_perform_cluster_ops()) { enforce_read_only_on_slaves(); } /* Check if the master server is on low disk space and act on it. */ - if (m_switchover_on_low_disk_space && !m_cluster_modified) + if (m_switchover_on_low_disk_space && can_perform_cluster_ops()) { handle_low_disk_space_master(); } @@ -648,17 +652,6 @@ void MariaDBMonitor::assign_new_master(MariaDBServer* new_master) m_warn_have_better_master = true; } -/** - * Set a monitor config parameter to "false". The effect persists over stopMonitor/startMonitor but not - * MaxScale restart. Only use on boolean config settings. - * - * @param setting_name Setting to disable - */ -void MariaDBMonitor::disable_setting(const std::string& setting) -{ - parameters.set(setting, "false"); -} - /** * Check sql text file parameters. A parameter should either be empty or a valid file which can be opened. * diff --git a/server/modules/monitor/mariadbmon/mariadbmon.hh b/server/modules/monitor/mariadbmon/mariadbmon.hh index e09c14f54..9cd633fcd 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.hh +++ b/server/modules/monitor/mariadbmon/mariadbmon.hh @@ -178,6 +178,10 @@ private: * Causes a topology rebuild on the current tick. */ bool m_cluster_modified = false; /* Has a cluster operation been performed this loop? Prevents * other operations during this tick. */ + + /* Counter for temporary automatic cluster operation disabling. */ + int cluster_operation_disable_timer = 0; + CycleMap m_cycles; /* Map from cycle number to cycle member servers */ CycleInfo m_master_cycle_status; /* Info about master server cycle from previous round */ @@ -296,6 +300,9 @@ private: bool switchover_perform(SwitchoverParams& operation); bool failover_perform(FailoverParams& op); + void delay_auto_cluster_ops(); + bool can_perform_cluster_ops(); + // Methods used by failover/switchover/rejoin MariaDBServer* select_promotion_target(MariaDBServer* current_master, OperationType op, Log log_mode, json_t** error_out); @@ -319,8 +326,6 @@ private: std::string generate_change_master_cmd(const std::string& master_host, int master_port); void wait_cluster_stabilization(GeneralOpData& op, const ServerArray& slaves, const MariaDBServer* new_master); - void report_and_disable(const std::string& operation, const std::string& setting_name, - bool* setting_var); // Rejoin methods bool cluster_can_be_joined(); @@ -328,8 +333,6 @@ private: bool server_is_rejoin_suspect(MariaDBServer* rejoin_cand, json_t** output); uint32_t do_rejoin(const ServerArray& joinable_servers, json_t** output); - // Other methods - void disable_setting(const std::string& setting); bool check_sql_files(); void enforce_read_only_on_slaves(); void log_master_changes();