From 7e6ce2d13f28a6d09bc459fdb36f6ff97e9f0883 Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Fri, 31 Aug 2018 18:14:50 +0300 Subject: [PATCH] MXS-1937 Disable server events on master during switchover The feature is behind a config setting. --- .../mariadbmon/cluster_manipulation.cc | 68 ++++++--- .../modules/monitor/mariadbmon/mariadbmon.cc | 17 ++- .../modules/monitor/mariadbmon/mariadbmon.hh | 2 + .../monitor/mariadbmon/mariadbserver.cc | 142 +++++++++++++++--- .../monitor/mariadbmon/mariadbserver.hh | 41 ++++- 5 files changed, 217 insertions(+), 53 deletions(-) diff --git a/server/modules/monitor/mariadbmon/cluster_manipulation.cc b/server/modules/monitor/mariadbmon/cluster_manipulation.cc index 414ff9bea..086c96b28 100644 --- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc +++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc @@ -664,13 +664,21 @@ bool MariaDBMonitor::failover_perform(MariaDBServer* promotion_target, MariaDBSe bool MariaDBMonitor::switchover_demote_master(MariaDBServer* current_master, json_t** err_out) { MXS_NOTICE("Demoting server '%s'.", current_master->name()); - bool success = false; bool query_error = false; + bool gtid_update_error = false; + bool event_disable_error = false; + MYSQL* conn = current_master->m_server_base->con; const char* query = ""; // The next query to execute. Used also for error printing. // The presence of an external master changes several things. const bool external_master = server_is_slave_of_ext_master(current_master->m_server_base->server); + // Helper function for checking if any error is on. + auto any_error = [&query_error, >id_update_error, &event_disable_error]() -> bool + { + return query_error || gtid_update_error || event_disable_error; + }; + if (external_master) { // First need to stop slave. read_only is probably on already, although not certain. @@ -697,28 +705,33 @@ bool MariaDBMonitor::switchover_demote_master(MariaDBServer* current_master, jso { query = "FLUSH TABLES;"; query_error = (mxs_mysql_query(conn, query) != 0); - } - if (!query_error) - { - query = "FLUSH LOGS;"; - query_error = (mxs_mysql_query(conn, query) != 0); - if (!query_error) + // Disable all events here + if (!query_error && m_handle_event_scheduler && !current_master->disable_events()) { - query = ""; - if (current_master->update_gtids()) - { - success = true; - } + event_disable_error = true; } } - if (!success) + if (!any_error()) + { + query = "FLUSH LOGS;"; + query_error = (mxs_mysql_query(conn, query) != 0); + if (!query_error && !current_master->update_gtids(&error_desc)) + { + gtid_update_error = true; + } + } + + if (any_error()) { // Somehow, a step after "SET read_only" failed. Try to set read_only back to 0. It may not // work since the connection is likely broken. - error_desc = mysql_error(conn); // Read connection error before next step. - error_fetched = true; + if (query_error) + { + error_desc = mysql_error(conn); // Read connection error before next step. + error_fetched = true; + } mxs_mysql_query(conn, "SET GLOBAL read_only=0;"); } } @@ -729,7 +742,7 @@ bool MariaDBMonitor::switchover_demote_master(MariaDBServer* current_master, jso error_desc = mysql_error(conn); } - if (!success) + if (any_error()) { if (query_error) { @@ -745,21 +758,20 @@ bool MariaDBMonitor::switchover_demote_master(MariaDBServer* current_master, jso PRINT_MXS_JSON_ERROR(err_out, KNOWN_ERROR, error_desc.c_str(), query); } } - else + else if (gtid_update_error) { - const char * const GTID_ERROR = "Demotion failed due to an error in updating gtid:s. " - "Check log for more details."; - PRINT_MXS_JSON_ERROR(err_out, GTID_ERROR); + const char * const GTID_ERROR = "Demotion failed due to a query error: %s"; + PRINT_MXS_JSON_ERROR(err_out, GTID_ERROR, error_desc.c_str()); } } else if (!m_demote_sql_file.empty() && !current_master->run_sql_from_file(m_demote_sql_file, err_out)) { PRINT_MXS_JSON_ERROR(err_out, "%s execution failed when demoting server '%s'.", CN_DEMOTION_SQL_FILE, current_master->name()); - success = false; + query_error = true; } - return success; + return !any_error(); } /** @@ -910,7 +922,17 @@ bool MariaDBMonitor::promote_new_master(MariaDBServer* new_master, json_t** err_ query = "SET GLOBAL read_only=0;"; if (mxs_mysql_query(new_master_conn, query) == 0) { - success = true; + if (m_handle_event_scheduler) + { + if (new_master->enable_events()) + { + success = true; + } + } + else + { + success = true; + } } } } diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index 4e62c8d11..4d4ff2954 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -38,14 +38,15 @@ const char * const CN_SWITCHOVER_ON_LOW_DISK_SPACE = "switchover_on_low_disk_spa const char * const CN_PROMOTION_SQL_FILE = "promotion_sql_file"; const char * const CN_DEMOTION_SQL_FILE = "demotion_sql_file"; -static const char CN_AUTO_REJOIN[] = "auto_rejoin"; -static const char CN_FAILCOUNT[] = "failcount"; -static const char CN_ENFORCE_READONLY[] = "enforce_read_only_slaves"; -static const char CN_NO_PROMOTE_SERVERS[] = "servers_no_promotion"; -static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout"; -static const char CN_SWITCHOVER_TIMEOUT[] = "switchover_timeout"; -static const char CN_DETECT_STANDALONE_MASTER[] = "detect_standalone_master"; +static const char CN_AUTO_REJOIN[] = "auto_rejoin"; +static const char CN_FAILCOUNT[] = "failcount"; +static const char CN_ENFORCE_READONLY[] = "enforce_read_only_slaves"; +static const char CN_NO_PROMOTE_SERVERS[] = "servers_no_promotion"; +static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout"; +static const char CN_SWITCHOVER_TIMEOUT[] = "switchover_timeout"; +static const char CN_DETECT_STANDALONE_MASTER[] = "detect_standalone_master"; static const char CN_MAINTENANCE_ON_LOW_DISK_SPACE[] = "maintenance_on_low_disk_space"; +static const char CN_HANDLE_EVENTS[] = "handle_events"; // Parameters for master failure verification and timeout static const char CN_VERIFY_MASTER_FAILURE[] = "verify_master_failure"; static const char CN_MASTER_FAILURE_TIMEOUT[] = "master_failure_timeout"; @@ -218,6 +219,7 @@ bool MariaDBMonitor::configure(const MXS_CONFIG_PARAMETER* params) m_demote_sql_file = config_get_string(params, CN_DEMOTION_SQL_FILE); m_switchover_on_low_disk_space = config_get_bool(params, CN_SWITCHOVER_ON_LOW_DISK_SPACE); m_maintenance_on_low_disk_space = config_get_bool(params, CN_MAINTENANCE_ON_LOW_DISK_SPACE); + m_handle_event_scheduler = config_get_bool(params, CN_HANDLE_EVENTS); m_excluded_servers.clear(); MXS_MONITORED_SERVER** excluded_array = NULL; @@ -1006,6 +1008,7 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE() {CN_DEMOTION_SQL_FILE, MXS_MODULE_PARAM_PATH}, {CN_SWITCHOVER_ON_LOW_DISK_SPACE, MXS_MODULE_PARAM_BOOL, "false"}, {CN_MAINTENANCE_ON_LOW_DISK_SPACE, MXS_MODULE_PARAM_BOOL, "true"}, + {CN_HANDLE_EVENTS, MXS_MODULE_PARAM_BOOL, "false"}, {MXS_END_MODULE_PARAMS} } }; diff --git a/server/modules/monitor/mariadbmon/mariadbmon.hh b/server/modules/monitor/mariadbmon/mariadbmon.hh index 9df553301..7dd90b0fc 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.hh +++ b/server/modules/monitor/mariadbmon/mariadbmon.hh @@ -167,6 +167,8 @@ private: bool m_switchover_on_low_disk_space; /**< Should the monitor do a switchover on low disk space. */ bool m_maintenance_on_low_disk_space; /**< Set slave and unreplicating servers with low disk space to * maintenance. */ + bool m_handle_event_scheduler; /**< Should failover/switchover handle any scheduled events on + * the servers */ // Other settings bool m_log_no_master; /**< Should it be logged that there is no master */ diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc index 160906d12..47b2cbaf8 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.cc +++ b/server/modules/monitor/mariadbmon/mariadbserver.cc @@ -86,7 +86,7 @@ std::unique_ptr MariaDBServer::execute_query(const string& query, s { auto conn = m_server_base->con; std::unique_ptr rval; - MYSQL_RES *result = NULL; + MYSQL_RES* result = NULL; if (mxs_mysql_query(conn, query.c_str()) == 0 && (result = mysql_store_result(conn)) != NULL) { rval = std::unique_ptr(new QueryResult(result)); @@ -98,6 +98,32 @@ std::unique_ptr MariaDBServer::execute_query(const string& query, s return rval; } +bool MariaDBServer::execute_cmd(const string& cmd, std::string* errmsg_out) +{ + bool rval = false; + auto conn = m_server_base->con; + if (mxs_mysql_query(conn, cmd.c_str()) == 0) + { + MYSQL_RES* result = mysql_store_result(conn); + if (result == NULL) + { + rval = true; + } + else if (errmsg_out) + { + int cols = mysql_num_fields(result); + int rows = mysql_num_rows(result); + *errmsg_out = string_printf("Query '%s' returned %d columns and %d rows of data when none " + "was expected.", cmd.c_str(), cols, rows); + } + } + else if (errmsg_out) + { + *errmsg_out = string_printf("Query '%s' failed: '%s'.", cmd.c_str(), mysql_error(conn)); + } + return rval; +} + bool MariaDBServer::do_show_slave_status(string* errmsg_out) { unsigned int columns = 0; @@ -125,10 +151,10 @@ bool MariaDBServer::do_show_slave_status(string* errmsg_out) { return false; } - else if(result->get_column_count() < columns) + else if (result->get_col_count() < columns) { MXS_ERROR("'%s' returned less than the expected amount of columns. Expected %u columns, " - "got %" PRId64 ".", query.c_str(), columns, result->get_column_count()); + "got %" PRId64 ".", query.c_str(), columns, result->get_col_count()); return false; } @@ -1039,6 +1065,85 @@ const SlaveStatus* MariaDBServer::slave_connection_status(const MariaDBServer* t return rval; } +bool MariaDBServer::disable_events() +{ + string error_msg; + + // Events only need to be disabled if the event scheduler is ON. The comparison to 'localhost' + // excludes normal users with the username 'event_scheduler'. + const string scheduler_query = "SELECT * FROM information_schema.PROCESSLIST " + "WHERE User = 'event_scheduler' AND Host = 'localhost';"; + auto proc_list = execute_query(scheduler_query, &error_msg); + if (proc_list.get() == NULL) + { + MXS_ERROR("Could not query the event scheduler status of '%s': %s", name(), error_msg.c_str()); + return false; + } + else if (proc_list->get_row_count() < 1) + { + // This is ok, though unexpected since user should have event handling activated for a reason. + MXS_NOTICE("Event scheduler is inactive on '%s', not disabling events.", name()); + return true; + } + + // Get info about all scheduled events on the server. + auto event_info = execute_query("SELECT * FROM information_schema.EVENTS;", &error_msg); + if (event_info.get() == NULL) + { + MXS_ERROR("Could not query event status of '%s': %s", name(), error_msg.c_str()); + return false; + } + + auto db_name_ind = event_info->get_col_index("EVENT_SCHEMA"); + auto event_name_ind = event_info->get_col_index("EVENT_NAME"); + auto event_status_ind = event_info->get_col_index("STATUS"); + mxb_assert(db_name_ind > 0 && event_name_ind > 0 && event_status_ind > 0); + + int errors = 0; + while (event_info->next_row()) + { + string db_name = event_info->get_string(db_name_ind); + string event_name = event_info->get_string(event_name_ind); + string event_status = event_info->get_string(event_status_ind); + if (event_status == "ENABLED") + { + // Found an enabled event. Disable it. Must first switch to the correct database. + string use_db_query = string_printf("USE %s;", db_name.c_str()); + if (execute_cmd(use_db_query, &error_msg)) + { + string alter_event_query = string_printf("ALTER EVENT %s DISABLE ON SLAVE;", + event_name.c_str()); + if (execute_cmd(alter_event_query, &error_msg)) + { + MXS_NOTICE("Event '%s' of database '%s' disabled on '%s'.", + event_name.c_str(), db_name.c_str(), name()); + } + else + { + errors++; + MXS_ERROR("Could not disable event '%s' of database '%s' on '%s': %s", + event_name.c_str(), db_name.c_str(), name() , error_msg.c_str()); + } + } + else + { + errors++; + MXS_ERROR("Could not switch to database '%s' on '%s': %s Event '%s' not disabled.", + db_name.c_str(), name(), event_name.c_str(), error_msg.c_str()); + } + } + } + return errors == 0; + // TODO: For better error handling, this function should try to re-enable any disabled events if a later + // disable fails. +} + +bool MariaDBServer::enable_events() +{ + // TODO: + return true; +} + string SlaveStatus::to_string() const { // Print all of this on the same line to make things compact. Are the widths reasonable? The format is @@ -1115,15 +1220,12 @@ string SlaveStatus::slave_io_to_string(SlaveStatus::slave_io_running_t slave_io) QueryResult::QueryResult(MYSQL_RES* resultset) : m_resultset(resultset) - , m_columns(-1) - , m_rowdata(NULL) - , m_current_row(-1) { if (m_resultset) { - m_columns = mysql_num_fields(m_resultset); + auto columns = mysql_num_fields(m_resultset); MYSQL_FIELD* field_info = mysql_fetch_fields(m_resultset); - for (int64_t column_index = 0; column_index < m_columns; column_index++) + for (int64_t column_index = 0; column_index < columns; column_index++) { string key(field_info[column_index].name); // TODO: Think of a way to handle duplicate names nicely. Currently this should only be used @@ -1144,23 +1246,29 @@ QueryResult::~QueryResult() bool QueryResult::next_row() { + mxb_assert(m_resultset); m_rowdata = mysql_fetch_row(m_resultset); - if (m_rowdata != NULL) + if (m_rowdata) { - m_current_row++; + m_current_row_ind++; return true; } return false; } -int64_t QueryResult::get_row_index() const +int64_t QueryResult::get_current_row_index() const { - return m_current_row; + return m_current_row_ind; } -int64_t QueryResult::get_column_count() const +int64_t QueryResult::get_col_count() const { - return m_columns; + return m_resultset ? mysql_num_fields(m_resultset): -1; +} + +int64_t QueryResult::get_row_count() const +{ + return m_resultset ? mysql_num_rows(m_resultset) : -1; } int64_t QueryResult::get_col_index(const string& col_name) const @@ -1171,14 +1279,14 @@ int64_t QueryResult::get_col_index(const string& col_name) const string QueryResult::get_string(int64_t column_ind) const { - mxb_assert(column_ind < m_columns && column_ind >= 0); + mxb_assert(column_ind < get_col_count() && column_ind >= 0); char* data = m_rowdata[column_ind]; return data ? data : ""; } int64_t QueryResult::get_uint(int64_t column_ind) const { - mxb_assert(column_ind < m_columns && column_ind >= 0); + mxb_assert(column_ind < get_col_count() && column_ind >= 0); char* data = m_rowdata[column_ind]; int64_t rval = -1; if (data && *data) @@ -1196,7 +1304,7 @@ int64_t QueryResult::get_uint(int64_t column_ind) const bool QueryResult::get_bool(int64_t column_ind) const { - mxb_assert(column_ind < m_columns && column_ind >= 0); + mxb_assert(column_ind < get_col_count() && column_ind >= 0); char* data = m_rowdata[column_ind]; return data ? (strcmp(data,"Y") == 0 || strcmp(data, "1") == 0) : false; } diff --git a/server/modules/monitor/mariadbmon/mariadbserver.hh b/server/modules/monitor/mariadbmon/mariadbserver.hh index e91fd1ce4..6a24addb5 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.hh +++ b/server/modules/monitor/mariadbmon/mariadbserver.hh @@ -193,6 +193,15 @@ public: */ std::unique_ptr execute_query(const std::string& query, std::string* errmsg_out = NULL); + /** + * Execute a query which does not return data. If the query returns data, an error is returned. + * + * @param cmd The query + * @param errmsg_out Error output. Can be null. + * @return True on success, false on error or if query returned data + */ + bool execute_cmd(const std::string& cmd, std::string* errmsg_out = NULL); + /** * Update server slave connection information. * @@ -449,6 +458,20 @@ public: */ void set_status(uint64_t bits); + /** + * Disable any "ENABLED" events if event scheduler is enabled. + * + * @return True if successful + */ + bool disable_events(); + + /** + * Enable any "SLAVESIDE_DISABLED" events if event scheduler is enabled. + * + * @return True if successful + */ + bool enable_events(); + private: bool update_slave_status(std::string* errmsg_out = NULL); bool sstatus_array_topology_equal(const SlaveStatusArray& new_slave_status); @@ -480,14 +503,21 @@ public: * * @return Current row index, or -1 if no data or next_row() has not been called yet. */ - int64_t get_row_index() const; + int64_t get_current_row_index() const; /** * How many columns the result set has. * * @return Column count, or -1 if no data. */ - int64_t get_column_count() const; + int64_t get_col_count() const; + + /** + * How many rows does the result set have? + * + * @return The number of rows or -1 on error + */ + int64_t get_row_count() const; /** * Get a numeric index for a column name. May give wrong results if column names are not unique. @@ -523,9 +553,8 @@ public: bool get_bool(int64_t column_ind) const; private: - MYSQL_RES* m_resultset; // Underlying result set, freed at dtor. + MYSQL_RES* m_resultset = NULL; // Underlying result set, freed at dtor. std::unordered_map m_col_indexes; // Map of column name -> index - int64_t m_columns; // How many columns does the data have. Usually equal to column index map size. - MYSQL_ROW m_rowdata; // Data for current row - int64_t m_current_row; // Index of current row + MYSQL_ROW m_rowdata = NULL; // Data for current row + int64_t m_current_row_ind = -1; // Index of current row };