From 739edcbe22542c61373a8165da34b2ca465b4690 Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Wed, 18 Apr 2018 16:00:22 +0300 Subject: [PATCH] MXS-1639 Run user-given sql commands during promotion, demotion and rejoin The sql queries are given in two text files, defined by options promotion_sql_file and demotion_sql_file. The files must exist when monitor starts. The files are read line by line, ignoring empty lines and lines starting with '#'. All other lines are sent to the server being promoted, demoted or rejoined. Any error in opening a file, reading it or executing the contents will cause the entire operation to fail. The filed defined in demotion_sql_file is also ran when rejoining a server. This is to ensure a previously failed master is "demoted" properly when it joins the cluster. --- .../modules/monitor/mariadbmon/mariadbmon.cc | 161 +++++++++++++++--- server/modules/monitor/mysqlmon.h | 2 + 2 files changed, 143 insertions(+), 20 deletions(-) diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index a84529b9e..72d59a875 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -18,6 +18,7 @@ #define MXS_MODULE_NAME "mariadbmon" #include "../mysqlmon.h" +#include #include #include #include @@ -118,7 +119,7 @@ static void read_server_variables(MXS_MONITORED_SERVER* database, MySqlServerInf static bool server_is_rejoin_suspect(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* server, MySqlServerInfo* master_info, json_t** output); static bool get_joinable_servers(MYSQL_MONITOR* mon, ServerVector* output); -static uint32_t do_rejoin(MYSQL_MONITOR* mon, const ServerVector& servers); +static uint32_t do_rejoin(MYSQL_MONITOR* mon, const ServerVector& servers, json_t** output); static bool join_cluster(MXS_MONITORED_SERVER* server, const char* change_cmd); static void disable_setting(MYSQL_MONITOR* mon, const char* setting); static bool cluster_can_be_joined(MYSQL_MONITOR* mon); @@ -130,6 +131,8 @@ static bool wait_cluster_stabilization(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* static string get_connection_errors(const ServerVector& servers); static int64_t scan_server_id(const char* id_string); static string generate_change_master_cmd(MYSQL_MONITOR* mon, const string& master_host, int master_port); +static bool run_sql_from_file(MXS_MONITORED_SERVER* server, const string& path, json_t** error_out); +static bool check_sql_files(MYSQL_MONITOR* mon); static bool report_version_err = true; static const char* hb_table_name = "maxscale_schema.replication_heartbeat"; @@ -140,6 +143,8 @@ static const char CN_SWITCHOVER_TIMEOUT[] = "switchover_timeout"; static const char CN_AUTO_REJOIN[] = "auto_rejoin"; static const char CN_FAILCOUNT[] = "failcount"; static const char CN_NO_PROMOTE_SERVERS[] = "servers_no_promotion"; +static const char CN_PROMOTION_SQL_FILE[] = "promotion_sql_file"; +static const char CN_DEMOTION_SQL_FILE[] = "demotion_sql_file"; // Parameters for master failure verification and timeout static const char CN_VERIFY_MASTER_FAILURE[] = "verify_master_failure"; @@ -737,7 +742,7 @@ bool mysql_rejoin(MXS_MONITOR* mon, SERVER* rejoin_server, json_t** output) { ServerVector joinable_server; joinable_server.push_back(mon_server); - if (do_rejoin(handle, joinable_server) == 1) + if (do_rejoin(handle, joinable_server, output) == 1) { rval = true; MXS_NOTICE("Rejoin performed."); @@ -915,6 +920,8 @@ extern "C" {CN_MASTER_FAILURE_TIMEOUT, MXS_MODULE_PARAM_COUNT, DEFAULT_MASTER_FAILURE_TIMEOUT}, {CN_AUTO_REJOIN, MXS_MODULE_PARAM_BOOL, "false"}, {CN_NO_PROMOTE_SERVERS, MXS_MODULE_PARAM_SERVERLIST}, + {CN_PROMOTION_SQL_FILE, MXS_MODULE_PARAM_PATH}, + {CN_DEMOTION_SQL_FILE, MXS_MODULE_PARAM_PATH}, {MXS_END_MODULE_PARAMS} } }; @@ -1095,6 +1102,8 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params) handle->verify_master_failure = config_get_bool(params, CN_VERIFY_MASTER_FAILURE); handle->master_failure_timeout = config_get_integer(params, CN_MASTER_FAILURE_TIMEOUT); handle->auto_rejoin = config_get_bool(params, CN_AUTO_REJOIN); + handle->promote_sql_file = config_get_string(params, CN_PROMOTION_SQL_FILE); + handle->demote_sql_file = config_get_string(params, CN_DEMOTION_SQL_FILE); handle->excluded_servers = NULL; handle->n_excluded = mon_config_get_servers(params, CN_NO_PROMOTE_SERVERS, monitor, @@ -1104,6 +1113,11 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params) error = true; } + if (!check_sql_files(handle)) + { + error = true; + } + if (!set_replication_credentials(handle, params)) { MXS_ERROR("Both '%s' and '%s' must be defined", CN_REPLICATION_USER, CN_REPLICATION_PASSWORD); @@ -2586,7 +2600,7 @@ monitorMain(void *arg) ServerVector joinable_servers; if (get_joinable_servers(handle, &joinable_servers)) { - uint32_t joins = do_rejoin(handle, joinable_servers); + uint32_t joins = do_rejoin(handle, joinable_servers, NULL); if (joins > 0) { MXS_NOTICE("%d server(s) redirected or rejoined the cluster.", joins); @@ -3728,13 +3742,24 @@ bool promote_new_master(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_master, js PRINT_MXS_JSON_ERROR(err_out, "Promotion failed: '%s'. Query: '%s'.", mysql_error(new_master->con), query); } - // If the previous master was a slave to an external master, start the equivalent slave connection on - // the new master. Success of replication is not checked. - else if (mon->external_master_port != PORT_UNKNOWN && - !start_external_replication(mon, new_master, err_out)) + else { - success = false; + // Promotion commands ran successfully, run promotion sql script file before external replication. + if (*mon->promote_sql_file && !run_sql_from_file(new_master, mon->promote_sql_file, err_out)) + { + PRINT_MXS_JSON_ERROR(err_out, "%s execution failed when promoting server '%s'.", + CN_PROMOTION_SQL_FILE, new_master->server->unique_name); + success = false; + } + // If the previous master was a slave to an external master, start the equivalent slave connection on + // the new master. Success of replication is not checked. + else if (mon->external_master_port != PORT_UNKNOWN && + !start_external_replication(mon, new_master, err_out)) + { + success = false; + } } + return success; } @@ -4148,6 +4173,13 @@ static bool switchover_demote_master(MYSQL_MONITOR* mon, PRINT_MXS_JSON_ERROR(err_out, GTID_ERROR); } } + else if (*mon->demote_sql_file && !run_sql_from_file(current_master, mon->demote_sql_file, err_out)) + { + PRINT_MXS_JSON_ERROR(err_out, "%s execution failed when demoting server '%s'.", + CN_DEMOTION_SQL_FILE, current_master->server->unique_name); + success = false; + } + return success; } @@ -4742,9 +4774,10 @@ static bool get_joinable_servers(MYSQL_MONITOR* mon, ServerVector* output) * * @param mon Cluster monitor * @param joinable_servers Which servers to rejoin + * @param output Error output. Can be null. * @return The number of servers successfully rejoined */ -static uint32_t do_rejoin(MYSQL_MONITOR* mon, const ServerVector& joinable_servers) +static uint32_t do_rejoin(MYSQL_MONITOR* mon, const ServerVector& joinable_servers, json_t** output) { SERVER* master = mon->master->server; uint32_t servers_joined = 0; @@ -4760,22 +4793,30 @@ static uint32_t do_rejoin(MYSQL_MONITOR* mon, const ServerVector& joinable_serve const char* master_name = master->unique_name; MySqlServerInfo* redir_info = get_server_info(mon, joinable); - bool op_success; - if (redir_info->n_slaves_configured == 0) + if (*mon->demote_sql_file && !run_sql_from_file(joinable, mon->demote_sql_file, output)) { - MXS_NOTICE("Directing standalone server '%s' to replicate from '%s'.", name, master_name); - op_success = join_cluster(joinable, change_cmd.c_str()); + PRINT_MXS_JSON_ERROR(output, "%s execution failed when attempting to rejoin server '%s'.", + CN_DEMOTION_SQL_FILE, joinable->server->unique_name); } else { - MXS_NOTICE("Server '%s' is replicating from a server other than '%s', " - "redirecting it to '%s'.", name, master_name, master_name); - op_success = redirect_one_slave(joinable, change_cmd.c_str()); - } + bool op_success; + if (redir_info->n_slaves_configured == 0) + { + MXS_NOTICE("Directing standalone server '%s' to replicate from '%s'.", name, master_name); + op_success = join_cluster(joinable, change_cmd.c_str()); + } + else + { + MXS_NOTICE("Server '%s' is replicating from a server other than '%s', " + "redirecting it to '%s'.", name, master_name, master_name); + op_success = redirect_one_slave(joinable, change_cmd.c_str()); + } - if (op_success) - { - servers_joined++; + if (op_success) + { + servers_joined++; + } } } } @@ -4876,3 +4917,83 @@ static int64_t scan_server_id(const char* id_string) ss_dassert(server_id >= SERVER_ID_MIN && server_id <= SERVER_ID_MAX); return server_id; } + +/** + * Read the file contents and send them as sql queries to the server. Queries should not return any data. + * + * @param server Server to send queries to + * @param path Text file path. + * @param error_out Error output + * @return True if file was read and all commands were completed successfully + */ +static bool run_sql_from_file(MXS_MONITORED_SERVER* server, const string& path, json_t** error_out) +{ + MYSQL* conn = server->con; + bool error = false; + std::ifstream sql_file(path); + if (sql_file.is_open()) + { + MXS_NOTICE("Executing sql queries from file '%s'.", path.c_str()); + int lines_executed = 0; + + while (!sql_file.eof() && !error) + { + string line; + std::getline(sql_file, line); + if (sql_file.bad()) + { + PRINT_MXS_JSON_ERROR(error_out, "Error when reading sql text file '%s': '%s'.", + path.c_str(), mxs_strerror(errno)); + error = true; + } + // Skip empty lines and comment lines + else if (!line.empty() && line[0] != '#') + { + if (mxs_mysql_query(conn, line.c_str()) == 0) + { + lines_executed++; + } + else + { + PRINT_MXS_JSON_ERROR(error_out, "Failed to execute sql from text file '%s'. Query: '%s'. " + "Error: '%s'.", path.c_str(), line.c_str(), mysql_error(conn)); + error = true; + } + } + } + MXS_NOTICE("%d queries executed successfully.", lines_executed); + } + else + { + PRINT_MXS_JSON_ERROR(error_out, "Could not open sql text file '%s'.", path.c_str()); + error = true; + } + return !error; +} + +/** + * Check sql text file parameters. A parameter should either be empty or a valid file which can be opened. + * + * @param mon The monitor + * @return True if no errors occurred when opening the files + */ +static bool check_sql_files(MYSQL_MONITOR* mon) +{ + const char ERRMSG[] = "%s ('%s') does not exist or cannot be accessed for reading: '%s'."; + const char* promote_file = mon->promote_sql_file; + const char* demote_file = mon->demote_sql_file; + + bool rval = true; + if (*promote_file && access(promote_file, R_OK) != 0) + { + rval = false; + MXS_ERROR(ERRMSG, CN_PROMOTION_SQL_FILE, promote_file, mxs_strerror(errno)); + } + + if (*demote_file && access(demote_file, R_OK) != 0) + { + rval = false; + MXS_ERROR(ERRMSG, CN_DEMOTION_SQL_FILE, demote_file, mxs_strerror(errno)); + } + return rval; +} diff --git a/server/modules/monitor/mysqlmon.h b/server/modules/monitor/mysqlmon.h index 11ae6b591..df1dcd214 100644 --- a/server/modules/monitor/mysqlmon.h +++ b/server/modules/monitor/mysqlmon.h @@ -77,6 +77,8 @@ typedef struct replicating from the wrong master. */ int n_excluded; /**< Number of excluded servers */ MXS_MONITORED_SERVER** excluded_servers; /**< Servers banned for master promotion during auto-failover. */ + const char* promote_sql_file; /**< File with sql commands which are ran to a server being promoted. */ + const char* demote_sql_file; /**< File with sql commands which are ran to a server being demoted. */ MXS_MONITOR* monitor; } MYSQL_MONITOR;