diff --git a/Documentation/Monitors/MariaDB-Monitor.md b/Documentation/Monitors/MariaDB-Monitor.md index b06a4d80f..466b83a9e 100644 --- a/Documentation/Monitors/MariaDB-Monitor.md +++ b/Documentation/Monitors/MariaDB-Monitor.md @@ -544,6 +544,26 @@ slave threads are stopped, breaking replication. promotion_sql_file=/home/root/scripts/promotion.sql demotion_sql_file=/home/root/scripts/demotion.sql ``` +#### `handle_server_events` + +This setting is on by default. If enabled, the monitor will attempt to enable +and disable server events during a switchover, failover or rejoin. When a server +is being demoted, any events with "ENABLED" status are set to +"SLAVESIDE_DISABLED". The reverse applies to a server being promoted to master. +When a standalone server is rejoining the cluster, its events are also disabled +since it is now a slave. + +The monitor does not enable or disable the event scheduler itself. For the +events to run on the new master server, the scheduler should be enabled by the +admin. Enabling it in the server configuration file is recommended. + +Events running at high frequency may cause the replication to break in a +failover scenario. If an old master which was failed over restarts, its event +scheduler will be on if set in the server configuration file. Its events will +also remember their "ENABLED"-status and run when scheduled. This may happen +before the monitor rejoins the server and disables the events. This should only +be an issue for events running more often than the monitor interval or events +that run immediately after the server has restarted. ### Troubleshooting diff --git a/server/modules/monitor/mariadbmon/cluster_manipulation.cc b/server/modules/monitor/mariadbmon/cluster_manipulation.cc index 7bf2bd6c5..a610784ce 100644 --- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc +++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc @@ -693,10 +693,10 @@ bool MariaDBMonitor::failover_perform(MariaDBServer* promotion_target, * * @param current_master Server to demote * @param info Current master info. Will be written to. TODO: Remove need for this. - * @param err_out json object for error printing. Can be NULL. + * @param error_out Error output. Can be NULL. * @return True if successful. */ -bool MariaDBMonitor::switchover_demote_master(MariaDBServer* current_master, json_t** err_out) +bool MariaDBMonitor::switchover_demote_master(MariaDBServer* current_master, json_t** error_out) { MXS_NOTICE("Demoting server '%s'.", current_master->name()); bool query_error = false; @@ -741,7 +741,8 @@ bool MariaDBMonitor::switchover_demote_master(MariaDBServer* current_master, jso query_error = (mxs_mysql_query(conn, query) != 0); // Disable all events here - if (!query_error && m_handle_event_scheduler && !current_master->disable_events()) + if (!query_error && m_handle_event_scheduler + && !current_master->disable_events(MariaDBServer::BinlogMode::BINLOG_ON, error_out)) { event_disable_error = true; } @@ -784,23 +785,24 @@ bool MariaDBMonitor::switchover_demote_master(MariaDBServer* current_master, jso { const char UNKNOWN_ERROR[] = "Demotion failed due to an unknown error when executing " "a query. Query: '%s'."; - PRINT_MXS_JSON_ERROR(err_out, UNKNOWN_ERROR, query); + PRINT_MXS_JSON_ERROR(error_out, UNKNOWN_ERROR, query); } else { const char KNOWN_ERROR[] = "Demotion failed due to a query error: '%s'. Query: '%s'."; - PRINT_MXS_JSON_ERROR(err_out, KNOWN_ERROR, error_desc.c_str(), query); + PRINT_MXS_JSON_ERROR(error_out, KNOWN_ERROR, error_desc.c_str(), query); } } else if (gtid_update_error) { const char* const GTID_ERROR = "Demotion failed due to a query error: %s"; - PRINT_MXS_JSON_ERROR(err_out, GTID_ERROR, error_desc.c_str()); + PRINT_MXS_JSON_ERROR(error_out, GTID_ERROR, error_desc.c_str()); } + // event_disable_error has already been printed } - else if (!m_demote_sql_file.empty() && !current_master->run_sql_from_file(m_demote_sql_file, err_out)) + else if (!m_demote_sql_file.empty() && !current_master->run_sql_from_file(m_demote_sql_file, error_out)) { - PRINT_MXS_JSON_ERROR(err_out, + PRINT_MXS_JSON_ERROR(error_out, "%s execution failed when demoting server '%s'.", CN_DEMOTION_SQL_FILE, current_master->name()); @@ -945,12 +947,13 @@ bool MariaDBMonitor::wait_cluster_stabilization(MariaDBServer* new_master, * Prepares a server for the replication master role. * * @param new_master The new master server - * @param err_out json object for error printing. Can be NULL. + * @param error_out Error output. Can be NULL. * @return True if successful */ -bool MariaDBMonitor::promote_new_master(MariaDBServer* new_master, json_t** err_out) +bool MariaDBMonitor::promote_new_master(MariaDBServer* new_master, json_t** error_out) { bool success = false; + bool event_enable_error = false; MYSQL* new_master_conn = new_master->m_server_base->con; MXS_NOTICE("Promoting server '%s' to master.", new_master->name()); const char* query = "STOP SLAVE;"; @@ -964,10 +967,14 @@ bool MariaDBMonitor::promote_new_master(MariaDBServer* new_master, json_t** err_ { if (m_handle_event_scheduler) { - if (new_master->enable_events()) + if (new_master->enable_events(error_out)) { success = true; } + else + { + event_enable_error = true; + } } else { @@ -979,17 +986,21 @@ bool MariaDBMonitor::promote_new_master(MariaDBServer* new_master, json_t** err_ if (!success) { - PRINT_MXS_JSON_ERROR(err_out, - "Promotion failed: '%s'. Query: '%s'.", - mysql_error(new_master_conn), - query); + if (!event_enable_error) + { + PRINT_MXS_JSON_ERROR(error_out, + "Promotion failed: '%s'. Query: '%s'.", + mysql_error(new_master_conn), + query); + } + // event_enable_error has already been printed } else { // Promotion commands ran successfully, run promotion sql script file before external replication. - if (!m_promote_sql_file.empty() && !new_master->run_sql_from_file(m_promote_sql_file, err_out)) + if (!m_promote_sql_file.empty() && !new_master->run_sql_from_file(m_promote_sql_file, error_out)) { - PRINT_MXS_JSON_ERROR(err_out, + PRINT_MXS_JSON_ERROR(error_out, "%s execution failed when promoting server '%s'.", CN_PROMOTION_SQL_FILE, new_master->name()); @@ -997,7 +1008,7 @@ bool MariaDBMonitor::promote_new_master(MariaDBServer* new_master, json_t** err_ } // If the previous master was a slave to an external master, start the equivalent slave connection on // the new master. Success of replication is not checked. - else if (m_external_master_port != PORT_UNKNOWN && !start_external_replication(new_master, err_out)) + else if (m_external_master_port != PORT_UNKNOWN && !start_external_replication(new_master, error_out)) { success = false; } diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index 49f5e79fd..45290ce25 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -37,6 +37,7 @@ const char* const CN_AUTO_FAILOVER = "auto_failover"; const char* const CN_SWITCHOVER_ON_LOW_DISK_SPACE = "switchover_on_low_disk_space"; const char* const CN_PROMOTION_SQL_FILE = "promotion_sql_file"; const char* const CN_DEMOTION_SQL_FILE = "demotion_sql_file"; +const char* const CN_HANDLE_EVENTS = "handle_events"; static const char CN_AUTO_REJOIN[] = "auto_rejoin"; static const char CN_FAILCOUNT[] = "failcount"; @@ -46,7 +47,6 @@ static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout"; static const char CN_SWITCHOVER_TIMEOUT[] = "switchover_timeout"; static const char CN_DETECT_STANDALONE_MASTER[] = "detect_standalone_master"; static const char CN_MAINTENANCE_ON_LOW_DISK_SPACE[] = "maintenance_on_low_disk_space"; -static const char CN_HANDLE_EVENTS[] = "handle_events"; // Parameters for master failure verification and timeout static const char CN_VERIFY_MASTER_FAILURE[] = "verify_master_failure"; static const char CN_MASTER_FAILURE_TIMEOUT[] = "master_failure_timeout"; @@ -1131,7 +1131,7 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE() { CN_HANDLE_EVENTS, MXS_MODULE_PARAM_BOOL, - "false" + "true" }, {MXS_END_MODULE_PARAMS} } diff --git a/server/modules/monitor/mariadbmon/mariadbmon_common.hh b/server/modules/monitor/mariadbmon/mariadbmon_common.hh index 58563f7d5..76e5984d5 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon_common.hh +++ b/server/modules/monitor/mariadbmon/mariadbmon_common.hh @@ -42,6 +42,7 @@ extern const int64_t SERVER_ID_UNKNOWN; extern const int64_t GTID_DOMAIN_UNKNOWN; extern const int PORT_UNKNOWN; +extern const char* const CN_HANDLE_EVENTS; // Helper class for concatenating strings with a delimiter. class DelimitedPrinter diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc index 0f6088164..a7be9d967 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.cc +++ b/server/modules/monitor/mariadbmon/mariadbserver.cc @@ -35,6 +35,15 @@ const char CONNECTING[] = "Connecting"; const char NO[] = "No"; } +class MariaDBServer::EventInfo +{ +public: + std::string database; + std::string name; + std::string definer; + std::string status; +}; + MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_index) : m_server_base(monitored_server) , m_config_index(config_index) @@ -680,7 +689,7 @@ bool MariaDBServer::join_cluster(const string& change_cmd, bool disable_server_e // This is unlikely to change anything, since a restarted server does not have event scheduler // ON. If it were on and events were running while the server was standalone, its data would have // diverged from the rest of the cluster. - disable_events(); + disable_events(BinlogMode::BINLOG_OFF, NULL); } query = "CHANGE MASTER TO ..."; // Don't show the real query as it contains a password. if (mxs_mysql_query(server_conn, change_cmd.c_str()) == 0) @@ -1097,113 +1106,95 @@ const SlaveStatus* MariaDBServer::slave_connection_status(const MariaDBServer* t return rval; } -bool MariaDBServer::disable_events() +bool MariaDBServer::enable_events(json_t** error_out) { - ManipulatorFunc disabler = [this](const string& db_name, - const string& event_name, - const string& event_definer, - const string& event_status) -> bool { - bool rval = true; - string error_msg; - if (event_status == "ENABLED") + int found_disabled_events = 0; + int events_enabled = 0; + // Helper function which enables a slaveside disabled event. + ManipulatorFunc enabler = [this, &found_disabled_events, &events_enabled](const EventInfo& event, + json_t** error_out) { + if (event.status == "SLAVESIDE_DISABLED") { - // Found an enabled event. Disable it. Must first switch to the correct database. - string use_db_query = string_printf("USE %s;", db_name.c_str()); - if (execute_cmd(use_db_query, &error_msg)) + found_disabled_events++; + if (alter_event(event, "ENABLE", error_out)) { - // An ALTER EVENT by default changes the definer (owner) of the event to the monitor user. - // This causes problems if the monitor user does not have privileges to run - // the event contents. Prevent this by setting definer explicitly. - string alter_event_query = string_printf("ALTER DEFINER = %s EVENT %s DISABLE ON SLAVE;", - event_definer.c_str(), - event_name.c_str()); - if (execute_cmd(alter_event_query, &error_msg)) - { - MXS_NOTICE("Event '%s' of database '%s' disabled on '%s'.", - event_name.c_str(), - db_name.c_str(), - name()); - } - else - { - rval = false; - MXS_ERROR("Could not disable event '%s' of database '%s' on '%s': %s", - event_name.c_str(), - db_name.c_str(), - name(), - error_msg.c_str()); - } - } - else - { - rval = false; - MXS_ERROR("Could not switch to database '%s' on '%s': %s Event '%s' not disabled.", - db_name.c_str(), - name(), - event_name.c_str(), - error_msg.c_str()); + events_enabled++; } } - return rval; }; - warn_event_scheduler(); - return events_foreach(disabler); + bool rval = false; + if (events_foreach(enabler, error_out)) + { + if (found_disabled_events > 0) + { + warn_event_scheduler(); + } + if (found_disabled_events == events_enabled) + { + rval = true; + } + } + return rval; +} + +bool MariaDBServer::disable_events(BinlogMode binlog_mode, json_t** error_out) +{ + int found_enabled_events = 0; + int events_disabled = 0; + // Helper function which disables an enabled event. + ManipulatorFunc disabler = [this, &found_enabled_events, &events_disabled](const EventInfo& event, + json_t** error_out) { + if (event.status == "ENABLED") + { + found_enabled_events++; + if (alter_event(event, "DISABLE ON SLAVE", error_out)) + { + events_disabled++; + } + } + }; + + // If the server is rejoining the cluster, no events may be added to binlog. The ALTER EVENT query + // itself adds events. To prevent this, disable the binlog for this method. + string error_msg; + if (binlog_mode == BinlogMode::BINLOG_OFF) + { + if (!execute_cmd("SET @@session.sql_log_bin=0;", &error_msg)) + { + const char FMT[] = "Could not disable session binlog on '%s': %s Server events not disabled."; + PRINT_MXS_JSON_ERROR(error_out, FMT, name(), error_msg.c_str()); + return false; + } + } + + bool rval = false; + if (events_foreach(disabler, error_out)) + { + if (found_enabled_events > 0) + { + warn_event_scheduler(); + } + if (found_enabled_events == events_disabled) + { + rval = true; + } + } + + if (binlog_mode == BinlogMode::BINLOG_OFF) + { + // Failure in re-enabling the session binlog doesn't really matter because we don't want the monitor + // generating binlog events anyway. + execute_cmd("SET @@session.sql_log_bin=1;"); + } + return rval; // TODO: For better error handling, this function should try to re-enable any disabled events if a later // disable fails. } -bool MariaDBServer::enable_events() -{ - ManipulatorFunc enabler = [this](const string& db_name, - const string& event_name, - const string& event_definer, - const string& event_status) -> bool { - bool rval = true; - string error_msg; - if (event_status == "SLAVESIDE_DISABLED") - { - // Found a disabled event. Enable it. Must first switch to the correct database. - string use_db_query = string_printf("USE %s;", db_name.c_str()); - if (execute_cmd(use_db_query, &error_msg)) - { - string alter_event_query = string_printf("ALTER DEFINER = %s EVENT %s ENABLE;", - event_definer.c_str(), - event_name.c_str()); - if (execute_cmd(alter_event_query, &error_msg)) - { - MXS_NOTICE("Event '%s' of database '%s' enabled on '%s'.", - event_name.c_str(), - db_name.c_str(), - name()); - } - else - { - rval = false; - MXS_ERROR("Could not enable event '%s' of database '%s' on '%s': %s", - event_name.c_str(), - db_name.c_str(), - name(), - error_msg.c_str()); - } - } - else - { - rval = false; - MXS_ERROR("Could not switch to database '%s' on '%s': %s Event '%s' not enabled.", - db_name.c_str(), - name(), - event_name.c_str(), - error_msg.c_str()); - } - } - return rval; - }; - - warn_event_scheduler(); - return events_foreach(enabler); -} - +/** + * Print a warning if the event scheduler is off. + */ void MariaDBServer::warn_event_scheduler() { string error_msg; @@ -1218,20 +1209,32 @@ void MariaDBServer::warn_event_scheduler() { if (proc_list->get_row_count() < 1) { - // This is ok, though unexpected since user should have event handling activated for a reason. - MXS_WARNING("Event scheduler is inactive on '%s'.", name()); + // This is ok, though unexpected since events were found. + MXS_WARNING("Event scheduler is inactive on '%s' although events were found.", name()); } } } -bool MariaDBServer::events_foreach(ManipulatorFunc& func) +/** + * Run the manipulator function on every server event. + * + * @param func The manipulator function + * @param error_out Error output + * @return True if event information could be read from information_schema.EVENTS. The return value does not + * depend on the manipulator function. + */ +bool MariaDBServer::events_foreach(ManipulatorFunc& func, json_t** error_out) { string error_msg; // Get info about all scheduled events on the server. auto event_info = execute_query("SELECT * FROM information_schema.EVENTS;", &error_msg); if (event_info.get() == NULL) { - MXS_ERROR("Could not query event status of '%s': %s", name(), error_msg.c_str()); + MXS_ERROR("Could not query event status of '%s': %s Event handling can be disabled by " + "setting '%s' to false.", + name(), + error_msg.c_str(), + CN_HANDLE_EVENTS); return false; } @@ -1241,19 +1244,61 @@ bool MariaDBServer::events_foreach(ManipulatorFunc& func) auto event_status_ind = event_info->get_col_index("STATUS"); mxb_assert(db_name_ind > 0 && event_name_ind > 0 && event_definer_ind > 0 && event_status_ind > 0); - int errors = 0; while (event_info->next_row()) { - string db_name = event_info->get_string(db_name_ind); - string event_name = event_info->get_string(event_name_ind); - string event_definer = event_info->get_string(event_definer_ind); - string event_status = event_info->get_string(event_status_ind); - if (!func(db_name, event_name, event_definer, event_status)) + EventInfo event; + event.database = event_info->get_string(db_name_ind); + event.name = event_info->get_string(event_name_ind); + event.definer = event_info->get_string(event_definer_ind); + event.status = event_info->get_string(event_status_ind); + func(event, error_out); + } + return true; +} + +/** + * Alter a scheduled server event, setting its status. + * + * @param event Event to alter + * @param target_status Status to set + * @param error_out Error output + * @return True if status was set + */ +bool MariaDBServer::alter_event(const EventInfo& event, const string& target_status, json_t** error_out) +{ + bool rval = false; + string error_msg; + // First switch to the correct database. + string use_db_query = string_printf("USE %s;", event.database.c_str()); + if (execute_cmd(use_db_query, &error_msg)) + { + // An ALTER EVENT by default changes the definer (owner) of the event to the monitor user. + // This causes problems if the monitor user does not have privileges to run + // the event contents. Prevent this by setting definer explicitly. + string alter_event_query = string_printf("ALTER DEFINER = %s EVENT %s %s;", + event.definer.c_str(), + event.name.c_str(), + target_status.c_str()); + if (execute_cmd(alter_event_query, &error_msg)) { - errors++; + rval = true; + const char FMT[] = "Event '%s' of database '%s' on server '%s' set to '%s'."; + MXS_NOTICE(FMT, event.name.c_str(), event.database.c_str(), name(), target_status.c_str()); + } + else + { + const char FMT[] = "Could not alter event '%s' of database '%s' on server '%s': %s"; + PRINT_MXS_JSON_ERROR(error_out, FMT, event.name.c_str(), event.database.c_str(), name(), + error_msg.c_str()); } } - return errors == 0; + else + { + const char FMT[] = "Could not switch to database '%s' on '%s': %s Event '%s' not altered."; + PRINT_MXS_JSON_ERROR(error_out, FMT, event.database.c_str(), name(), error_msg.c_str(), + event.name.c_str()); + } + return rval; } string SlaveStatus::to_string() const diff --git a/server/modules/monitor/mariadbmon/mariadbserver.hh b/server/modules/monitor/mariadbmon/mariadbserver.hh index b43233c34..86bdee254 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.hh +++ b/server/modules/monitor/mariadbmon/mariadbserver.hh @@ -139,6 +139,12 @@ public: BINLOG_ROUTER /* MaxScale binlog server. Requires special handling. */ }; + enum class BinlogMode + { + BINLOG_ON, + BINLOG_OFF + }; + MXS_MONITORED_SERVER* m_server_base;/**< Monitored server base class/struct. MariaDBServer does not * own the struct, it is not freed (or connection closed) when * a MariaDBServer is destroyed. Can be const on gcc 4.8 */ @@ -467,30 +473,33 @@ public: void set_status(uint64_t bits); /** - * Disable any "ENABLED" events if event scheduler is enabled. + * Enable any "SLAVESIDE_DISABLED" events. Event scheduler is not touched. * - * @return True if successful + * @param error_out Error output + * @return True if all SLAVESIDE_DISABLED events were enabled */ - bool disable_events(); + bool enable_events(json_t** error_out); /** - * Enable any "SLAVESIDE_DISABLED" events if event scheduler is enabled. + * Disable any "ENABLED" events. Event scheduler is not touched. * - * @return True if successful + * @param binlog_mode If OFF, binlog event creation is disabled for the session during method execution. + * @param error_out Error output + * @return True if all ENABLED events were disabled */ - bool enable_events(); + bool disable_events(BinlogMode binlog_mode, json_t** error_out); private: - typedef std::function ManipulatorFunc; + class EventInfo; + typedef std::function ManipulatorFunc; bool update_slave_status(std::string* errmsg_out = NULL); bool sstatus_array_topology_equal(const SlaveStatusArray& new_slave_status); const SlaveStatus* sstatus_find_previous_row(const SlaveStatus& new_row, size_t guess); void warn_event_scheduler(); - bool events_foreach(ManipulatorFunc& func); + bool events_foreach(ManipulatorFunc& func, json_t** error_out); + bool alter_event(const EventInfo& event, const std::string& target_status, + json_t** error_out); }; /**