MXS-1937 Disable server events on master during switchover

The feature is behind a config setting.
This commit is contained in:
Esa Korhonen
2018-08-31 18:14:50 +03:00
parent a0ea37b2c5
commit 7e6ce2d13f
5 changed files with 217 additions and 53 deletions

View File

@ -664,13 +664,21 @@ bool MariaDBMonitor::failover_perform(MariaDBServer* promotion_target, MariaDBSe
bool MariaDBMonitor::switchover_demote_master(MariaDBServer* current_master, json_t** err_out)
{
MXS_NOTICE("Demoting server '%s'.", current_master->name());
bool success = false;
bool query_error = false;
bool gtid_update_error = false;
bool event_disable_error = false;
MYSQL* conn = current_master->m_server_base->con;
const char* query = ""; // The next query to execute. Used also for error printing.
// The presence of an external master changes several things.
const bool external_master = server_is_slave_of_ext_master(current_master->m_server_base->server);
// Helper function for checking if any error is on.
auto any_error = [&query_error, &gtid_update_error, &event_disable_error]() -> bool
{
return query_error || gtid_update_error || event_disable_error;
};
if (external_master)
{
// First need to stop slave. read_only is probably on already, although not certain.
@ -697,28 +705,33 @@ bool MariaDBMonitor::switchover_demote_master(MariaDBServer* current_master, jso
{
query = "FLUSH TABLES;";
query_error = (mxs_mysql_query(conn, query) != 0);
}
if (!query_error)
{
query = "FLUSH LOGS;";
query_error = (mxs_mysql_query(conn, query) != 0);
if (!query_error)
// Disable all events here
if (!query_error && m_handle_event_scheduler && !current_master->disable_events())
{
query = "";
if (current_master->update_gtids())
{
success = true;
}
event_disable_error = true;
}
}
if (!success)
if (!any_error())
{
query = "FLUSH LOGS;";
query_error = (mxs_mysql_query(conn, query) != 0);
if (!query_error && !current_master->update_gtids(&error_desc))
{
gtid_update_error = true;
}
}
if (any_error())
{
// Somehow, a step after "SET read_only" failed. Try to set read_only back to 0. It may not
// work since the connection is likely broken.
error_desc = mysql_error(conn); // Read connection error before next step.
error_fetched = true;
if (query_error)
{
error_desc = mysql_error(conn); // Read connection error before next step.
error_fetched = true;
}
mxs_mysql_query(conn, "SET GLOBAL read_only=0;");
}
}
@ -729,7 +742,7 @@ bool MariaDBMonitor::switchover_demote_master(MariaDBServer* current_master, jso
error_desc = mysql_error(conn);
}
if (!success)
if (any_error())
{
if (query_error)
{
@ -745,21 +758,20 @@ bool MariaDBMonitor::switchover_demote_master(MariaDBServer* current_master, jso
PRINT_MXS_JSON_ERROR(err_out, KNOWN_ERROR, error_desc.c_str(), query);
}
}
else
else if (gtid_update_error)
{
const char * const GTID_ERROR = "Demotion failed due to an error in updating gtid:s. "
"Check log for more details.";
PRINT_MXS_JSON_ERROR(err_out, GTID_ERROR);
const char * const GTID_ERROR = "Demotion failed due to a query error: %s";
PRINT_MXS_JSON_ERROR(err_out, GTID_ERROR, error_desc.c_str());
}
}
else if (!m_demote_sql_file.empty() && !current_master->run_sql_from_file(m_demote_sql_file, err_out))
{
PRINT_MXS_JSON_ERROR(err_out, "%s execution failed when demoting server '%s'.",
CN_DEMOTION_SQL_FILE, current_master->name());
success = false;
query_error = true;
}
return success;
return !any_error();
}
/**
@ -910,7 +922,17 @@ bool MariaDBMonitor::promote_new_master(MariaDBServer* new_master, json_t** err_
query = "SET GLOBAL read_only=0;";
if (mxs_mysql_query(new_master_conn, query) == 0)
{
success = true;
if (m_handle_event_scheduler)
{
if (new_master->enable_events())
{
success = true;
}
}
else
{
success = true;
}
}
}
}

View File

@ -38,14 +38,15 @@ const char * const CN_SWITCHOVER_ON_LOW_DISK_SPACE = "switchover_on_low_disk_spa
const char * const CN_PROMOTION_SQL_FILE = "promotion_sql_file";
const char * const CN_DEMOTION_SQL_FILE = "demotion_sql_file";
static const char CN_AUTO_REJOIN[] = "auto_rejoin";
static const char CN_FAILCOUNT[] = "failcount";
static const char CN_ENFORCE_READONLY[] = "enforce_read_only_slaves";
static const char CN_NO_PROMOTE_SERVERS[] = "servers_no_promotion";
static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout";
static const char CN_SWITCHOVER_TIMEOUT[] = "switchover_timeout";
static const char CN_DETECT_STANDALONE_MASTER[] = "detect_standalone_master";
static const char CN_AUTO_REJOIN[] = "auto_rejoin";
static const char CN_FAILCOUNT[] = "failcount";
static const char CN_ENFORCE_READONLY[] = "enforce_read_only_slaves";
static const char CN_NO_PROMOTE_SERVERS[] = "servers_no_promotion";
static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout";
static const char CN_SWITCHOVER_TIMEOUT[] = "switchover_timeout";
static const char CN_DETECT_STANDALONE_MASTER[] = "detect_standalone_master";
static const char CN_MAINTENANCE_ON_LOW_DISK_SPACE[] = "maintenance_on_low_disk_space";
static const char CN_HANDLE_EVENTS[] = "handle_events";
// Parameters for master failure verification and timeout
static const char CN_VERIFY_MASTER_FAILURE[] = "verify_master_failure";
static const char CN_MASTER_FAILURE_TIMEOUT[] = "master_failure_timeout";
@ -218,6 +219,7 @@ bool MariaDBMonitor::configure(const MXS_CONFIG_PARAMETER* params)
m_demote_sql_file = config_get_string(params, CN_DEMOTION_SQL_FILE);
m_switchover_on_low_disk_space = config_get_bool(params, CN_SWITCHOVER_ON_LOW_DISK_SPACE);
m_maintenance_on_low_disk_space = config_get_bool(params, CN_MAINTENANCE_ON_LOW_DISK_SPACE);
m_handle_event_scheduler = config_get_bool(params, CN_HANDLE_EVENTS);
m_excluded_servers.clear();
MXS_MONITORED_SERVER** excluded_array = NULL;
@ -1006,6 +1008,7 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE()
{CN_DEMOTION_SQL_FILE, MXS_MODULE_PARAM_PATH},
{CN_SWITCHOVER_ON_LOW_DISK_SPACE, MXS_MODULE_PARAM_BOOL, "false"},
{CN_MAINTENANCE_ON_LOW_DISK_SPACE, MXS_MODULE_PARAM_BOOL, "true"},
{CN_HANDLE_EVENTS, MXS_MODULE_PARAM_BOOL, "false"},
{MXS_END_MODULE_PARAMS}
}
};

View File

@ -167,6 +167,8 @@ private:
bool m_switchover_on_low_disk_space; /**< Should the monitor do a switchover on low disk space. */
bool m_maintenance_on_low_disk_space; /**< Set slave and unreplicating servers with low disk space to
* maintenance. */
bool m_handle_event_scheduler; /**< Should failover/switchover handle any scheduled events on
* the servers */
// Other settings
bool m_log_no_master; /**< Should it be logged that there is no master */

View File

@ -86,7 +86,7 @@ std::unique_ptr<QueryResult> MariaDBServer::execute_query(const string& query, s
{
auto conn = m_server_base->con;
std::unique_ptr<QueryResult> rval;
MYSQL_RES *result = NULL;
MYSQL_RES* result = NULL;
if (mxs_mysql_query(conn, query.c_str()) == 0 && (result = mysql_store_result(conn)) != NULL)
{
rval = std::unique_ptr<QueryResult>(new QueryResult(result));
@ -98,6 +98,32 @@ std::unique_ptr<QueryResult> MariaDBServer::execute_query(const string& query, s
return rval;
}
bool MariaDBServer::execute_cmd(const string& cmd, std::string* errmsg_out)
{
bool rval = false;
auto conn = m_server_base->con;
if (mxs_mysql_query(conn, cmd.c_str()) == 0)
{
MYSQL_RES* result = mysql_store_result(conn);
if (result == NULL)
{
rval = true;
}
else if (errmsg_out)
{
int cols = mysql_num_fields(result);
int rows = mysql_num_rows(result);
*errmsg_out = string_printf("Query '%s' returned %d columns and %d rows of data when none "
"was expected.", cmd.c_str(), cols, rows);
}
}
else if (errmsg_out)
{
*errmsg_out = string_printf("Query '%s' failed: '%s'.", cmd.c_str(), mysql_error(conn));
}
return rval;
}
bool MariaDBServer::do_show_slave_status(string* errmsg_out)
{
unsigned int columns = 0;
@ -125,10 +151,10 @@ bool MariaDBServer::do_show_slave_status(string* errmsg_out)
{
return false;
}
else if(result->get_column_count() < columns)
else if (result->get_col_count() < columns)
{
MXS_ERROR("'%s' returned less than the expected amount of columns. Expected %u columns, "
"got %" PRId64 ".", query.c_str(), columns, result->get_column_count());
"got %" PRId64 ".", query.c_str(), columns, result->get_col_count());
return false;
}
@ -1039,6 +1065,85 @@ const SlaveStatus* MariaDBServer::slave_connection_status(const MariaDBServer* t
return rval;
}
bool MariaDBServer::disable_events()
{
string error_msg;
// Events only need to be disabled if the event scheduler is ON. The comparison to 'localhost'
// excludes normal users with the username 'event_scheduler'.
const string scheduler_query = "SELECT * FROM information_schema.PROCESSLIST "
"WHERE User = 'event_scheduler' AND Host = 'localhost';";
auto proc_list = execute_query(scheduler_query, &error_msg);
if (proc_list.get() == NULL)
{
MXS_ERROR("Could not query the event scheduler status of '%s': %s", name(), error_msg.c_str());
return false;
}
else if (proc_list->get_row_count() < 1)
{
// This is ok, though unexpected since user should have event handling activated for a reason.
MXS_NOTICE("Event scheduler is inactive on '%s', not disabling events.", name());
return true;
}
// Get info about all scheduled events on the server.
auto event_info = execute_query("SELECT * FROM information_schema.EVENTS;", &error_msg);
if (event_info.get() == NULL)
{
MXS_ERROR("Could not query event status of '%s': %s", name(), error_msg.c_str());
return false;
}
auto db_name_ind = event_info->get_col_index("EVENT_SCHEMA");
auto event_name_ind = event_info->get_col_index("EVENT_NAME");
auto event_status_ind = event_info->get_col_index("STATUS");
mxb_assert(db_name_ind > 0 && event_name_ind > 0 && event_status_ind > 0);
int errors = 0;
while (event_info->next_row())
{
string db_name = event_info->get_string(db_name_ind);
string event_name = event_info->get_string(event_name_ind);
string event_status = event_info->get_string(event_status_ind);
if (event_status == "ENABLED")
{
// Found an enabled event. Disable it. Must first switch to the correct database.
string use_db_query = string_printf("USE %s;", db_name.c_str());
if (execute_cmd(use_db_query, &error_msg))
{
string alter_event_query = string_printf("ALTER EVENT %s DISABLE ON SLAVE;",
event_name.c_str());
if (execute_cmd(alter_event_query, &error_msg))
{
MXS_NOTICE("Event '%s' of database '%s' disabled on '%s'.",
event_name.c_str(), db_name.c_str(), name());
}
else
{
errors++;
MXS_ERROR("Could not disable event '%s' of database '%s' on '%s': %s",
event_name.c_str(), db_name.c_str(), name() , error_msg.c_str());
}
}
else
{
errors++;
MXS_ERROR("Could not switch to database '%s' on '%s': %s Event '%s' not disabled.",
db_name.c_str(), name(), event_name.c_str(), error_msg.c_str());
}
}
}
return errors == 0;
// TODO: For better error handling, this function should try to re-enable any disabled events if a later
// disable fails.
}
bool MariaDBServer::enable_events()
{
// TODO:
return true;
}
string SlaveStatus::to_string() const
{
// Print all of this on the same line to make things compact. Are the widths reasonable? The format is
@ -1115,15 +1220,12 @@ string SlaveStatus::slave_io_to_string(SlaveStatus::slave_io_running_t slave_io)
QueryResult::QueryResult(MYSQL_RES* resultset)
: m_resultset(resultset)
, m_columns(-1)
, m_rowdata(NULL)
, m_current_row(-1)
{
if (m_resultset)
{
m_columns = mysql_num_fields(m_resultset);
auto columns = mysql_num_fields(m_resultset);
MYSQL_FIELD* field_info = mysql_fetch_fields(m_resultset);
for (int64_t column_index = 0; column_index < m_columns; column_index++)
for (int64_t column_index = 0; column_index < columns; column_index++)
{
string key(field_info[column_index].name);
// TODO: Think of a way to handle duplicate names nicely. Currently this should only be used
@ -1144,23 +1246,29 @@ QueryResult::~QueryResult()
bool QueryResult::next_row()
{
mxb_assert(m_resultset);
m_rowdata = mysql_fetch_row(m_resultset);
if (m_rowdata != NULL)
if (m_rowdata)
{
m_current_row++;
m_current_row_ind++;
return true;
}
return false;
}
int64_t QueryResult::get_row_index() const
int64_t QueryResult::get_current_row_index() const
{
return m_current_row;
return m_current_row_ind;
}
int64_t QueryResult::get_column_count() const
int64_t QueryResult::get_col_count() const
{
return m_columns;
return m_resultset ? mysql_num_fields(m_resultset): -1;
}
int64_t QueryResult::get_row_count() const
{
return m_resultset ? mysql_num_rows(m_resultset) : -1;
}
int64_t QueryResult::get_col_index(const string& col_name) const
@ -1171,14 +1279,14 @@ int64_t QueryResult::get_col_index(const string& col_name) const
string QueryResult::get_string(int64_t column_ind) const
{
mxb_assert(column_ind < m_columns && column_ind >= 0);
mxb_assert(column_ind < get_col_count() && column_ind >= 0);
char* data = m_rowdata[column_ind];
return data ? data : "";
}
int64_t QueryResult::get_uint(int64_t column_ind) const
{
mxb_assert(column_ind < m_columns && column_ind >= 0);
mxb_assert(column_ind < get_col_count() && column_ind >= 0);
char* data = m_rowdata[column_ind];
int64_t rval = -1;
if (data && *data)
@ -1196,7 +1304,7 @@ int64_t QueryResult::get_uint(int64_t column_ind) const
bool QueryResult::get_bool(int64_t column_ind) const
{
mxb_assert(column_ind < m_columns && column_ind >= 0);
mxb_assert(column_ind < get_col_count() && column_ind >= 0);
char* data = m_rowdata[column_ind];
return data ? (strcmp(data,"Y") == 0 || strcmp(data, "1") == 0) : false;
}

View File

@ -193,6 +193,15 @@ public:
*/
std::unique_ptr<QueryResult> execute_query(const std::string& query, std::string* errmsg_out = NULL);
/**
* Execute a query which does not return data. If the query returns data, an error is returned.
*
* @param cmd The query
* @param errmsg_out Error output. Can be null.
* @return True on success, false on error or if query returned data
*/
bool execute_cmd(const std::string& cmd, std::string* errmsg_out = NULL);
/**
* Update server slave connection information.
*
@ -449,6 +458,20 @@ public:
*/
void set_status(uint64_t bits);
/**
* Disable any "ENABLED" events if event scheduler is enabled.
*
* @return True if successful
*/
bool disable_events();
/**
* Enable any "SLAVESIDE_DISABLED" events if event scheduler is enabled.
*
* @return True if successful
*/
bool enable_events();
private:
bool update_slave_status(std::string* errmsg_out = NULL);
bool sstatus_array_topology_equal(const SlaveStatusArray& new_slave_status);
@ -480,14 +503,21 @@ public:
*
* @return Current row index, or -1 if no data or next_row() has not been called yet.
*/
int64_t get_row_index() const;
int64_t get_current_row_index() const;
/**
* How many columns the result set has.
*
* @return Column count, or -1 if no data.
*/
int64_t get_column_count() const;
int64_t get_col_count() const;
/**
* How many rows does the result set have?
*
* @return The number of rows or -1 on error
*/
int64_t get_row_count() const;
/**
* Get a numeric index for a column name. May give wrong results if column names are not unique.
@ -523,9 +553,8 @@ public:
bool get_bool(int64_t column_ind) const;
private:
MYSQL_RES* m_resultset; // Underlying result set, freed at dtor.
MYSQL_RES* m_resultset = NULL; // Underlying result set, freed at dtor.
std::unordered_map<std::string, int64_t> m_col_indexes; // Map of column name -> index
int64_t m_columns; // How many columns does the data have. Usually equal to column index map size.
MYSQL_ROW m_rowdata; // Data for current row
int64_t m_current_row; // Index of current row
MYSQL_ROW m_rowdata = NULL; // Data for current row
int64_t m_current_row_ind = -1; // Index of current row
};