From 1a046bd45360607a32d16da162c8c47f7342333c Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Fri, 16 Nov 2018 10:19:49 +0200 Subject: [PATCH 01/19] MXS-2168 Add 'assume_unique_hostnames'-setting to MariaDBMonitor Adds the setting and takes it into use during replication graph creation and the most important checks. --- .../monitor/mariadbmon/cluster_discovery.cc | 72 +++++++++++++------ .../modules/monitor/mariadbmon/mariadbmon.cc | 42 ++++++++++- .../modules/monitor/mariadbmon/mariadbmon.hh | 2 + .../monitor/mariadbmon/mariadbserver.cc | 62 +++++++++++----- .../monitor/mariadbmon/mariadbserver.hh | 5 +- 5 files changed, 143 insertions(+), 40 deletions(-) diff --git a/server/modules/monitor/mariadbmon/cluster_discovery.cc b/server/modules/monitor/mariadbmon/cluster_discovery.cc index d7995b24f..f6ff8f91b 100644 --- a/server/modules/monitor/mariadbmon/cluster_discovery.cc +++ b/server/modules/monitor/mariadbmon/cluster_discovery.cc @@ -160,8 +160,13 @@ void MariaDBMonitor::tarjan_scc_visit_node(MariaDBServer* node, } } +/** + * Use slave status and server id information to build the replication graph. Needs to be called whenever + * topology has changed, or it's suspected. + */ void MariaDBMonitor::build_replication_graph() { + const bool use_hostnames = m_assume_unique_hostnames; // First, reset all node data. for (MariaDBServer* server : m_servers) { @@ -169,39 +174,64 @@ void MariaDBMonitor::build_replication_graph() server->m_node.reset_results(); } - /* Here, all slave connections are added to the graph, even if the IO thread cannot connect. Strictly - * speaking, building the parents-array is not required as the data already exists. This construction - * is more for convenience and faster access later on. */ for (MariaDBServer* slave : m_servers) { - /* All servers are accepted in this loop, even if the server is [Down] or [Maintenance]. For these - * servers, we just use the latest available information. Not adding such servers could suddenly - * change the topology quite a bit and all it would take is a momentarily network failure. */ - + /* Check all slave connections of all servers. Connections are added even if one or both endpoints + * are down or in maintenance. */ for (SlaveStatus& slave_conn : slave->m_slave_status) { - /* We always trust the "Master_Server_Id"-field of the SHOW SLAVE STATUS output, as long as - * the id is > 0 (server uses 0 for default). This means that the graph constructed is faulty if - * an old "Master_Server_Id"- value is read from a slave which is still trying to connect to - * a new master. However, a server is only designated [Slave] if both IO- and SQL-threads are - * running fine, so the faulty graph does not cause wrong status settings. */ - /* IF THIS PART IS CHANGED, CHANGE THE COMPARISON IN 'sstatus_arrays_topology_equal' * (in MariaDBServer) accordingly so that any possible topology changes are detected. */ - auto master_id = slave_conn.master_server_id; - if (slave_conn.slave_io_running != SlaveStatus::SLAVE_IO_NO && master_id > 0) + if (slave_conn.slave_io_running != SlaveStatus::SLAVE_IO_NO && slave_conn.slave_sql_running) { - // Valid slave connection, find the MariaDBServer with this id. - auto master = get_server(master_id); - if (master != NULL) + // Looks promising, check hostname or server id. + MariaDBServer* found_master = NULL; + bool is_external = false; + if (use_hostnames) { - slave->m_node.parents.push_back(master); - master->m_node.children.push_back(slave); + found_master = get_server(slave_conn.master_host, slave_conn.master_port); + if (!found_master) + { + // Must be an external server. + is_external = true; + } } else + { + /* Cannot trust hostname:port since network may be complicated. Instead, + * trust the "Master_Server_Id"-field of the SHOW SLAVE STATUS output if + * the slave connection has been seen connected before. This means that + * the graph will miss slave-master relations that have not connected + * while the monitor has been running. TODO: This data should be saved so + * that monitor restarts do not lose this information. */ + if (slave_conn.seen_connected) + { + // Valid slave connection, find the MariaDBServer with the matching server id. + found_master = get_server(slave_conn.master_server_id); + if (!found_master) + { + /* Likely an external master. It's possible that the master is a monitored + * server which has not been queried yet and the monitor does not know its + * id. */ + is_external = true; + } + } + } + + // Valid slave connection, find the MariaDBServer with this id. + if (found_master) + { + /* Building the parents-array is not strictly required as the same data is in + * the children-array. This construction is more for convenience and faster + * access later on. */ + slave->m_node.parents.push_back(found_master); + found_master->m_node.children.push_back(slave); + } + else if (is_external) { // This is an external master connection. Save just the master id for now. - slave->m_node.external_masters.push_back(master_id); + // TODO: Save host:port instead + slave->m_node.external_masters.push_back(slave_conn.master_server_id); } } } diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index 09a3bbcd6..20369c3b2 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -47,6 +47,7 @@ static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout"; static const char CN_SWITCHOVER_TIMEOUT[] = "switchover_timeout"; static const char CN_DETECT_STANDALONE_MASTER[] = "detect_standalone_master"; static const char CN_MAINTENANCE_ON_LOW_DISK_SPACE[] = "maintenance_on_low_disk_space"; +static const char CN_ASSUME_UNIQUE_HOSTNAMES[] = "assume_unique_hostnames"; // Parameters for master failure verification and timeout static const char CN_VERIFY_MASTER_FAILURE[] = "verify_master_failure"; static const char CN_MASTER_FAILURE_TIMEOUT[] = "master_failure_timeout"; @@ -78,7 +79,7 @@ void MariaDBMonitor::reset_server_info() // Next, initialize the data. for (auto mon_server = m_monitor->monitored_servers; mon_server; mon_server = mon_server->next) { - m_servers.push_back(new MariaDBServer(mon_server, m_servers.size())); + m_servers.push_back(new MariaDBServer(mon_server, m_servers.size(), m_assume_unique_hostnames)); } for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) { @@ -151,6 +152,22 @@ MariaDBServer* MariaDBMonitor::get_server(SERVER* server) return found; } +MariaDBServer* MariaDBMonitor::get_server(const std::string& host, int port) +{ + // TODO: Do this with a map lookup + MariaDBServer* found = NULL; + for (MariaDBServer* server : m_servers) + { + SERVER* srv = server->m_server_base->server; + if (host == srv->address && srv->port == port) + { + found = server; + break; + } + } + return found; +} + bool MariaDBMonitor::set_replication_credentials(const MXS_CONFIG_PARAMETER* params) { bool rval = false; @@ -197,6 +214,7 @@ bool MariaDBMonitor::configure(const MXS_CONFIG_PARAMETER* params) m_detect_stale_slave = config_get_bool(params, "detect_stale_slave"); m_ignore_external_masters = config_get_bool(params, "ignore_external_masters"); m_detect_standalone_master = config_get_bool(params, CN_DETECT_STANDALONE_MASTER); + m_assume_unique_hostnames = config_get_bool(params, CN_ASSUME_UNIQUE_HOSTNAMES); m_failcount = config_get_integer(params, CN_FAILCOUNT); m_failover_timeout = config_get_integer(params, CN_FAILOVER_TIMEOUT); m_switchover_timeout = config_get_integer(params, CN_SWITCHOVER_TIMEOUT); @@ -230,6 +248,25 @@ bool MariaDBMonitor::configure(const MXS_CONFIG_PARAMETER* params) MXS_ERROR("Both '%s' and '%s' must be defined", CN_REPLICATION_USER, CN_REPLICATION_PASSWORD); settings_ok = false; } + if (!m_assume_unique_hostnames) + { + const char requires[] = "%s requires that %s is on."; + if (m_auto_failover) + { + MXB_ERROR(requires, CN_AUTO_FAILOVER, CN_ASSUME_UNIQUE_HOSTNAMES); + settings_ok = false; + } + if (m_switchover_on_low_disk_space) + { + MXB_ERROR(requires, CN_SWITCHOVER_ON_LOW_DISK_SPACE, CN_ASSUME_UNIQUE_HOSTNAMES); + settings_ok = false; + } + if (m_auto_rejoin) + { + MXB_ERROR(requires, CN_AUTO_REJOIN, CN_ASSUME_UNIQUE_HOSTNAMES); + settings_ok = false; + } + } return settings_ok; } @@ -1084,6 +1121,9 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE() { CN_HANDLE_EVENTS, MXS_MODULE_PARAM_BOOL, "true" }, + { + CN_ASSUME_UNIQUE_HOSTNAMES, MXS_MODULE_PARAM_BOOL, "true" + }, {MXS_END_MODULE_PARAMS} } }; diff --git a/server/modules/monitor/mariadbmon/mariadbmon.hh b/server/modules/monitor/mariadbmon/mariadbmon.hh index 9cf58696a..d8e4c9e7b 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.hh +++ b/server/modules/monitor/mariadbmon/mariadbmon.hh @@ -200,6 +200,7 @@ private: * TODO: think about removing */ bool m_ignore_external_masters = false; /* Ignore masters outside of the monitor configuration. * TODO: requires work */ + bool m_assume_unique_hostnames = true; /* Are server hostnames consistent between MaxScale and servers */ int m_failcount = 1; /* Number of ticks master must be down before it's considered * totally down, allowing failover or master change. */ @@ -253,6 +254,7 @@ private: MariaDBServer* get_server_info(MXS_MONITORED_SERVER* db); MariaDBServer* get_server(int64_t id); MariaDBServer* get_server(SERVER* server); + MariaDBServer* get_server(const std::string& host, int port); // Cluster discovery and status assignment methods, top levels void update_server(MariaDBServer* server); diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc index c91e62374..4deb47da5 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.cc +++ b/server/modules/monitor/mariadbmon/mariadbserver.cc @@ -35,9 +35,11 @@ public: std::string status; }; -MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_index) +MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_index, + bool assume_unique_hostnames) : m_server_base(monitored_server) , m_config_index(config_index) + , m_assume_unique_hostnames(assume_unique_hostnames) { mxb_assert(monitored_server); } @@ -951,8 +953,9 @@ void MariaDBServer::set_status(uint64_t bits) /** * Compare if the given slave status array is equal to the one stored in the MariaDBServer. - * Only compares the parts relevant for building replication topology: master server id:s and - * slave connection io states. + * Only compares the parts relevant for building replication topology: slave IO/SQL state, + * host:port and master server id:s. When unsure, return false. This must match + * 'build_replication_graph()' in the monitor class. * * @param new_slave_status Right hand side * @return True if equal @@ -969,10 +972,14 @@ bool MariaDBServer::sstatus_array_topology_equal(const SlaveStatusArray& new_sla { for (size_t i = 0; i < old_slave_status.size(); i++) { - // It's enough to check just the following two items, as these are used in - // 'build_replication_graph'. - if (old_slave_status[i].slave_io_running != new_slave_status[i].slave_io_running - || old_slave_status[i].master_server_id != new_slave_status[i].master_server_id) + const auto new_row = new_slave_status[i]; + const auto old_row = old_slave_status[i]; + // Strictly speaking, the following should depend on the 'assume_unique_hostnames', + // but the situations it would make a difference are so rare they can be ignored. + if (new_row.slave_io_running != old_row.slave_io_running + || new_row.slave_sql_running != old_row.slave_sql_running + || new_row.master_host != old_row.master_host || new_row.master_port != old_row.master_port + || new_row.master_server_id != old_row.master_server_id) { rval = false; break; @@ -1144,19 +1151,40 @@ bool MariaDBServer::can_be_promoted(OperationType op, const MariaDBServer* demot const SlaveStatus* MariaDBServer::slave_connection_status(const MariaDBServer* target) const { // The slave node may have several slave connections, need to find the one that is - // connected to the parent. This section is quite similar to the one in - // 'build_replication_graph', although here we require that the sql thread is running. - auto target_id = target->m_server_id; + // connected to the parent. TODO: Use the information gathered in 'build_replication_graph' + // to skip this function, as the contents are very similar. const SlaveStatus* rval = NULL; - for (const SlaveStatus& ss : m_slave_status) + if (m_assume_unique_hostnames) { - auto master_id = ss.master_server_id; - // Should this check 'Master_Host' and 'Master_Port' instead of server id:s? - if (master_id > 0 && master_id == target_id && ss.slave_sql_running && ss.seen_connected - && ss.slave_io_running != SlaveStatus::SLAVE_IO_NO) + // Can simply compare host:port. + SERVER* target_srv = target->m_server_base->server; + string target_host = target_srv->address; + int target_port = target_srv->port; + for (const SlaveStatus& ss : m_slave_status) { - rval = &ss; - break; + if (ss.master_host == target_host && ss.master_port == target_port && + ss.slave_sql_running && ss.slave_io_running != SlaveStatus::SLAVE_IO_NO) + { + rval = &ss; + break; + } + } + } + else + { + // Compare server id:s instead. If the master's id is wrong (e.g. never updated) this gives a + // wrong result. Also gives wrong result if monitor has never seen the slave connection in the + // connected state. + auto target_id = target->m_server_id; + for (const SlaveStatus& ss : m_slave_status) + { + auto master_id = ss.master_server_id; + if (master_id > 0 && master_id == target_id && ss.slave_sql_running && ss.seen_connected + && ss.slave_io_running != SlaveStatus::SLAVE_IO_NO) + { + rval = &ss; + break; + } } } return rval; diff --git a/server/modules/monitor/mariadbmon/mariadbserver.hh b/server/modules/monitor/mariadbmon/mariadbserver.hh index e13c0cba5..6ec7eca12 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.hh +++ b/server/modules/monitor/mariadbmon/mariadbserver.hh @@ -119,6 +119,8 @@ public: /* Replication lag of the server. Used during calculation so that the actual SERVER struct is * only written to once. */ int m_replication_lag = MXS_RLAG_UNDEFINED; + /* Copy of same field in monitor object. TODO: pass in struct when adding concurrent updating. */ + bool m_assume_unique_hostnames = true; /* Has anything that could affect replication topology changed this iteration? * Causes: server id, slave connections, read-only. */ bool m_topology_changed = true; @@ -128,7 +130,8 @@ public: bool m_print_update_errormsg = true; /* Should an update error be printed? */ - MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_index); + MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_index, + bool assume_unique_hostnames = true); /** * Print server information to a json object. From 90da3a4d90247321abc6be3c3da035b0ffd66e9a Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Fri, 16 Nov 2018 17:20:29 +0200 Subject: [PATCH 02/19] Remove MXS_MONITORED_SERVER mapping from MariaDBMon The mapping was rarely used. --- .../mariadbmon/cluster_manipulation.cc | 26 ++++--- .../modules/monitor/mariadbmon/mariadbmon.cc | 71 +++++++------------ .../modules/monitor/mariadbmon/mariadbmon.hh | 11 ++- .../mariadbmon/test/test_cycle_find.cc | 5 +- 4 files changed, 42 insertions(+), 71 deletions(-) diff --git a/server/modules/monitor/mariadbmon/cluster_manipulation.cc b/server/modules/monitor/mariadbmon/cluster_manipulation.cc index c18220705..a57e53d46 100644 --- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc +++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc @@ -105,17 +105,15 @@ bool MariaDBMonitor::manual_failover(json_t** output) return failover_done; } -bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output) +bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_cand_srv, json_t** output) { bool rval = false; if (cluster_can_be_joined()) { - const char* rejoin_serv_name = rejoin_server->name; - MXS_MONITORED_SERVER* mon_slave_cand = mon_get_monitored_server(m_monitor, rejoin_server); - if (mon_slave_cand) + MariaDBServer* rejoin_cand = get_server(rejoin_cand_srv); + if (rejoin_cand) { - MariaDBServer* slave_cand = get_server_info(mon_slave_cand); - if (server_is_rejoin_suspect(slave_cand, output)) + if (server_is_rejoin_suspect(rejoin_cand, output)) { string gtid_update_error; if (m_master->update_gtids(>id_update_error)) @@ -125,8 +123,8 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output) // can be rejoined manually. // TODO: Add the warning to JSON output. string no_rejoin_reason; - bool safe_rejoin = slave_cand->can_replicate_from(m_master, &no_rejoin_reason); - bool empty_gtid = slave_cand->m_gtid_current_pos.empty(); + bool safe_rejoin = rejoin_cand->can_replicate_from(m_master, &no_rejoin_reason); + bool empty_gtid = rejoin_cand->m_gtid_current_pos.empty(); bool rejoin_allowed = false; if (safe_rejoin) { @@ -138,19 +136,19 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output) { rejoin_allowed = true; MXB_WARNING("gtid_curren_pos of %s is empty. Manual rejoin is unsafe " - "but allowed.", rejoin_serv_name); + "but allowed.", rejoin_cand->name()); } else { PRINT_MXS_JSON_ERROR(output, "%s cannot replicate from master server %s: %s", - rejoin_serv_name, m_master->name(), + rejoin_cand->name(), m_master->name(), no_rejoin_reason.c_str()); } } if (rejoin_allowed) { - ServerArray joinable_server = {slave_cand}; + ServerArray joinable_server = {rejoin_cand}; if (do_rejoin(joinable_server, output) == 1) { rval = true; @@ -172,13 +170,13 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output) } else { - PRINT_MXS_JSON_ERROR(output, "The given server '%s' is not monitored by this monitor.", - rejoin_serv_name); + PRINT_MXS_JSON_ERROR(output, "%s is not monitored by %s, cannot rejoin.", + rejoin_cand_srv->name, m_monitor->name); } } else { - const char BAD_CLUSTER[] = "The server cluster of monitor '%s' is not in a state valid for joining. " + const char BAD_CLUSTER[] = "The server cluster of monitor %s is not in a valid state for joining. " "Either it has no master or its gtid domain is unknown."; PRINT_MXS_JSON_ERROR(output, BAD_CLUSTER, m_monitor->name); } diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index 20369c3b2..ecad99d1c 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -81,13 +81,6 @@ void MariaDBMonitor::reset_server_info() { m_servers.push_back(new MariaDBServer(mon_server, m_servers.size(), m_assume_unique_hostnames)); } - for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) - { - auto mon_server = (*iter)->m_server_base; - mxb_assert(m_server_info.count(mon_server) == 0); - ServerInfoMap::value_type new_val(mon_server, *iter); - m_server_info.insert(new_val); - } } void MariaDBMonitor::clear_server_info() @@ -98,7 +91,6 @@ void MariaDBMonitor::clear_server_info() } // All MariaDBServer*:s are now invalid, as well as any dependant data. m_servers.clear(); - m_server_info.clear(); m_servers_by_id.clear(); m_excluded_servers.clear(); assign_new_master(NULL); @@ -116,42 +108,6 @@ void MariaDBMonitor::reset_node_index_info() } } -/** - * Get monitor-specific server info for the monitored server. - * - * @param handle - * @param db Server to get info for. Must be a valid server or function crashes. - * @return The server info. - */ -MariaDBServer* MariaDBMonitor::get_server_info(MXS_MONITORED_SERVER* db) -{ - mxb_assert(m_server_info.count(db) == 1); // Should always exist in the map - return m_server_info[db]; -} - -MariaDBServer* MariaDBMonitor::get_server(int64_t id) -{ - auto found = m_servers_by_id.find(id); - return (found != m_servers_by_id.end()) ? (*found).second : NULL; -} - -/** - * Get the equivalent MariaDBServer. - * - * @param server Which server to search for - * @return MariaDBServer if found, NULL otherwise - */ -MariaDBServer* MariaDBMonitor::get_server(SERVER* server) -{ - MariaDBServer* found = NULL; - auto mon_server = mon_get_monitored_server(m_monitor, server); - if (mon_server) - { - found = get_server_info(mon_server); - } - return found; -} - MariaDBServer* MariaDBMonitor::get_server(const std::string& host, int port) { // TODO: Do this with a map lookup @@ -168,6 +124,29 @@ MariaDBServer* MariaDBMonitor::get_server(const std::string& host, int port) return found; } +MariaDBServer* MariaDBMonitor::get_server(int64_t id) +{ + auto found = m_servers_by_id.find(id); + return (found != m_servers_by_id.end()) ? (*found).second : NULL; +} + +MariaDBServer* MariaDBMonitor::get_server(MXS_MONITORED_SERVER* mon_server) +{ + return get_server(mon_server->server); +} + +MariaDBServer* MariaDBMonitor::get_server(SERVER* server) +{ + for (auto iter : m_servers) + { + if (iter->m_server_base->server == server) + { + return iter; + } + } + return NULL; +} + bool MariaDBMonitor::set_replication_credentials(const MXS_CONFIG_PARAMETER* params) { bool rval = false; @@ -234,7 +213,7 @@ bool MariaDBMonitor::configure(const MXS_CONFIG_PARAMETER* params) int n_excluded = mon_config_get_servers(params, CN_NO_PROMOTE_SERVERS, m_monitor, &excluded_array); for (int i = 0; i < n_excluded; i++) { - m_excluded_servers.push_back(get_server_info(excluded_array[i])); + m_excluded_servers.push_back(get_server(excluded_array[i])); } MXS_FREE(excluded_array); @@ -437,7 +416,7 @@ void MariaDBMonitor::pre_loop() // This is somewhat questionable, as the journal only contains status bits but no actual topology // info. In a fringe case the actual queried topology may not match the journal data, freezing the // master to a suboptimal choice. - assign_new_master(get_server_info(journal_master)); + assign_new_master(get_server(journal_master)); } /* This loop can be removed if/once the replication check code is inside tick. It's required so that diff --git a/server/modules/monitor/mariadbmon/mariadbmon.hh b/server/modules/monitor/mariadbmon/mariadbmon.hh index d8e4c9e7b..8a40356ce 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.hh +++ b/server/modules/monitor/mariadbmon/mariadbmon.hh @@ -29,8 +29,6 @@ extern const char* const CN_SWITCHOVER_ON_LOW_DISK_SPACE; extern const char* const CN_PROMOTION_SQL_FILE; extern const char* const CN_DEMOTION_SQL_FILE; -// Map of base struct to MariaDBServer. Does not own the server objects. -typedef std::unordered_map ServerInfoMap; // Map of server id:s to MariaDBServer. Useful when constructing the replication graph. typedef std::unordered_map IdToServerMap; // Map of cycle number to cycle members. The elements should be ordered for predictability when iterating. @@ -168,7 +166,6 @@ private: // Server containers, mostly constant. ServerArray m_servers; /* Servers of the monitor */ - ServerInfoMap m_server_info; /* Map from server base struct to MariaDBServer */ IdToServerMap m_servers_by_id; /* Map from server id:s to MariaDBServer */ // Topology related fields @@ -251,10 +248,10 @@ private: std::string diagnostics_to_string() const; json_t* to_json() const; - MariaDBServer* get_server_info(MXS_MONITORED_SERVER* db); - MariaDBServer* get_server(int64_t id); - MariaDBServer* get_server(SERVER* server); MariaDBServer* get_server(const std::string& host, int port); + MariaDBServer* get_server(int64_t id); + MariaDBServer* get_server(MXS_MONITORED_SERVER* mon_server); + MariaDBServer* get_server(SERVER* server); // Cluster discovery and status assignment methods, top levels void update_server(MariaDBServer* server); @@ -283,7 +280,7 @@ private: // Cluster operation launchers bool manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out); bool manual_failover(json_t** output); - bool manual_rejoin(SERVER* rejoin_server, json_t** output); + bool manual_rejoin(SERVER* rejoin_cand_srv, json_t** output); void handle_low_disk_space_master(); void handle_auto_failover(); void handle_auto_rejoin(); diff --git a/server/modules/monitor/mariadbmon/test/test_cycle_find.cc b/server/modules/monitor/mariadbmon/test/test_cycle_find.cc index 2a5a72829..c1f03ef82 100644 --- a/server/modules/monitor/mariadbmon/test/test_cycle_find.cc +++ b/server/modules/monitor/mariadbmon/test/test_cycle_find.cc @@ -140,8 +140,7 @@ int MariaDBMonitor::Test::run_tests() void MariaDBMonitor::Test::init_servers(int count) { clear_servers(); - mxb_assert(m_monitor->m_server_info.empty() && m_monitor->m_servers.empty() - && m_monitor->m_servers_by_id.empty()); + mxb_assert(m_monitor->m_servers.empty() && m_monitor->m_servers_by_id.empty()); for (int i = 1; i < count + 1; i++) { @@ -154,7 +153,6 @@ void MariaDBMonitor::Test::init_servers(int count) MariaDBServer* new_server = new MariaDBServer(mon_server, i - 1); new_server->m_server_id = i; m_monitor->m_servers.push_back(new_server); - m_monitor->m_server_info[mon_server] = new_server; m_monitor->m_servers_by_id[i] = new_server; } m_current_test++; @@ -165,7 +163,6 @@ void MariaDBMonitor::Test::init_servers(int count) */ void MariaDBMonitor::Test::clear_servers() { - m_monitor->m_server_info.clear(); m_monitor->m_servers_by_id.clear(); for (MariaDBServer* server : m_monitor->m_servers) { From c61aaee0ba62ef20ed9b2c0b8483db76dd1834f0 Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Fri, 16 Nov 2018 18:10:48 +0200 Subject: [PATCH 03/19] MXS-2168 Add documentation on "assume_unique_hostnames" --- Documentation/Monitors/MariaDB-Monitor.md | 28 +++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/Documentation/Monitors/MariaDB-Monitor.md b/Documentation/Monitors/MariaDB-Monitor.md index 78eb4510c..38e849c83 100644 --- a/Documentation/Monitors/MariaDB-Monitor.md +++ b/Documentation/Monitors/MariaDB-Monitor.md @@ -8,6 +8,7 @@ Table of Contents * [Configuration](#configuration) * [Common Monitor Parameters](#common-monitor-parameters) * [MariaDB Monitor optional parameters](#mariadb-monitor-optional-parameters) + * [assume_unique_hostnames](#assume_unique_hostnames) * [detect_replication_lag](#detect_replication_lag) * [detect_stale_master](#detect_stale_master) * [detect_stale_slave](#detect_stale_slave) @@ -137,6 +138,33 @@ These are optional parameters specific to the MariaDB Monitor. Failover, switchover and rejoin-specific parameters are listed in their own [section](#cluster-manipulation-operations). +### `assume_unique_hostnames` + +Boolean, default: ON. When active, the monitor assumes that server hostnames and +ports are consistent between the MaxScale configuration file server definitions +and the "SHOW ALL SLAVES STATUS" outputs of the servers. Specifically, the +monitor assumes that if server A is replicating from server B, then A must have +a slave connection with `Master_Host` and `Master_Port` equal to B's address and +port in the configuration file. If this is not the case, e.g. an IP is used in +the server while a hostname is given in the file, the monitor will misinterpret +the topology. + +This setting must be ON to use any cluster operation features such as failover +or switchover, because MaxScale uses the addresses and ports in the +configuration file when issuing "CHANGE MASTER TO"-commands. + +If the network configuration is such that the addresses MaxScale uses to connect +to backends are different from the ones the servers use to connect to each +other, `assume_unique_hostnames` should be set to OFF. In this mode, MaxScale +uses server id:s it queries from the servers and the `Master_Server_Id` fields +of the slave connections to deduce which server is replicating from which. This +is not perfect though, since MaxScale doesn't know the id:s of servers it has +never connected to (e.g. server has been down since MaxScale was started). Also, +the `Master_Server_Id`-field may have an incorrect value if the slave connection +has not been established. MaxScale will only trust the value if the monitor has +seen the slave connection IO thread connected at least once. If this is not the +case, the slave connection is ignored. + ### `detect_replication_lag` Deprecated and unused as of MaxScale 2.3. Can be defined but is ignored. From 5d4775cac15d14e8f86ff631833b3fdebaf16c98 Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Fri, 16 Nov 2018 10:21:16 +0200 Subject: [PATCH 04/19] MXS-2168 Update test_cycle_find The test now uses both server id:s and hostname:port combinations. --- .../mariadbmon/test/test_cycle_find.cc | 113 +++++++++++++----- 1 file changed, 84 insertions(+), 29 deletions(-) diff --git a/server/modules/monitor/mariadbmon/test/test_cycle_find.cc b/server/modules/monitor/mariadbmon/test/test_cycle_find.cc index c1f03ef82..e206bb4dd 100644 --- a/server/modules/monitor/mariadbmon/test/test_cycle_find.cc +++ b/server/modules/monitor/mariadbmon/test/test_cycle_find.cc @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -33,7 +32,7 @@ const int MAX_EDGES = 20; class MariaDBMonitor::Test { - // These defines are required since Centos 6 doesn't support vector literal initialisers :/ + // TODO: Now using C++11 even on Centos 6 so get rid of these typedef struct { int members[MAX_CYCLE_SIZE]; @@ -56,17 +55,22 @@ class MariaDBMonitor::Test } EdgeArray; public: - Test(); + Test(bool use_hostnames); ~Test(); int run_tests(); private: - MariaDBMonitor* m_monitor; - int m_current_test; + MariaDBMonitor* m_monitor = NULL; + int m_current_test = 0; + bool m_use_hostnames = true; void init_servers(int count); void clear_servers(); void add_replication(EdgeArray edges); int check_result_cycles(CycleArray expected_cycles); + + string create_servername(int i); + string create_hostname(int i); + MariaDBServer* get_server(int i); }; int main() @@ -74,13 +78,19 @@ int main() maxbase::init(); maxbase::Log log; - MariaDBMonitor::Test tester; - return tester.run_tests(); + bool use_hostnames = true; + MariaDBMonitor::Test tester1(use_hostnames); + int rval = tester1.run_tests(); + + use_hostnames = false; + MariaDBMonitor::Test tester2(use_hostnames); + rval += tester2.run_tests(); + return rval; } -MariaDBMonitor::Test::Test() +MariaDBMonitor::Test::Test(bool use_hostnames) : m_monitor(new MariaDBMonitor(NULL)) - , m_current_test(0) + , m_use_hostnames(use_hostnames) { } @@ -140,20 +150,36 @@ int MariaDBMonitor::Test::run_tests() void MariaDBMonitor::Test::init_servers(int count) { clear_servers(); + m_monitor->m_assume_unique_hostnames = m_use_hostnames; mxb_assert(m_monitor->m_servers.empty() && m_monitor->m_servers_by_id.empty()); for (int i = 1; i < count + 1; i++) { SERVER* base_server = new SERVER; // Contents mostly undefined - std::stringstream server_name; - server_name << "Server" << i; - base_server->name = MXS_STRDUP(server_name.str().c_str()); + string server_name = create_servername(i); + base_server->name = MXS_STRDUP(server_name.c_str()); + MXS_MONITORED_SERVER* mon_server = new MXS_MONITORED_SERVER; // Contents mostly undefined mon_server->server = base_server; - MariaDBServer* new_server = new MariaDBServer(mon_server, i - 1); - new_server->m_server_id = i; - m_monitor->m_servers.push_back(new_server); - m_monitor->m_servers_by_id[i] = new_server; + + MariaDBServer* mariadb_server = new MariaDBServer(mon_server, i - 1, m_use_hostnames); + + if (m_use_hostnames) + { + string hostname = create_hostname(i); + strcpy(base_server->address, hostname.c_str()); + base_server->port = i; + } + else + { + mariadb_server->m_server_id = i; + } + + m_monitor->m_servers.push_back(mariadb_server); + if (!m_use_hostnames) + { + m_monitor->m_servers_by_id[i] = mariadb_server; + } } m_current_test++; } @@ -189,12 +215,23 @@ void MariaDBMonitor::Test::add_replication(EdgeArray edges) { break; } - auto iter2 = m_monitor->m_servers_by_id.find(slave_id); - mxb_assert(iter2 != m_monitor->m_servers_by_id.end()); + SlaveStatus ss; - ss.master_server_id = master_id; ss.slave_io_running = SlaveStatus::SLAVE_IO_YES; - (*iter2).second->m_slave_status.push_back(ss); + ss.slave_sql_running = true; + if (m_use_hostnames) + { + ss.master_host = create_hostname(master_id); + ss.master_port = master_id; + } + else + { + ss.master_server_id = master_id; + ss.seen_connected = true; + } + + MariaDBServer* slave = get_server(slave_id); + slave->m_slave_status.push_back(ss); } m_monitor->build_replication_graph(); @@ -209,12 +246,12 @@ void MariaDBMonitor::Test::add_replication(EdgeArray edges) */ int MariaDBMonitor::Test::check_result_cycles(CycleArray expected_cycles) { - std::stringstream test_name_ss; - test_name_ss << "Test " << m_current_test << ": "; - string test_name = test_name_ss.str(); + string test_name = "Test " + std::to_string(m_current_test) + " (" + + (m_use_hostnames ? "hostnames" : "server id:s") + "): "; int errors = 0; - // Copy the index->server map so it can be checked later - IdToServerMap no_cycle_servers = m_monitor->m_servers_by_id; + + // Copy the servers for later comparison. + std::set no_cycle_servers(m_monitor->m_servers.begin(), m_monitor->m_servers.end()); std::set used_cycle_ids; for (auto ind_cycles = 0; ind_cycles < MAX_CYCLES; ind_cycles++) { @@ -227,7 +264,8 @@ int MariaDBMonitor::Test::check_result_cycles(CycleArray expected_cycles) { break; } - auto cycle_server = m_monitor->get_server(search_id); + + MariaDBServer* cycle_server = get_server(search_id); if (cycle_server->m_node.cycle == NodeData::CYCLE_NONE) { cout << test_name << cycle_server->name() << " is not in a cycle when it should.\n"; @@ -254,14 +292,13 @@ int MariaDBMonitor::Test::check_result_cycles(CycleArray expected_cycles) << " when " << cycle_id << " was expected.\n"; errors++; } - no_cycle_servers.erase(cycle_server->m_server_id); + no_cycle_servers.erase(cycle_server); } } // Check that servers not in expected_cycles are not in a cycle - for (auto& map_elem : no_cycle_servers) + for (MariaDBServer* server : no_cycle_servers) { - MariaDBServer* server = map_elem.second; if (server->m_node.cycle != NodeData::CYCLE_NONE) { cout << server->name() << " is in cycle " << server->m_node.cycle << " when none was expected.\n"; @@ -271,3 +308,21 @@ int MariaDBMonitor::Test::check_result_cycles(CycleArray expected_cycles) return errors; } + +MariaDBServer* MariaDBMonitor::Test::get_server(int i) +{ + auto rval = m_use_hostnames ? m_monitor->get_server(create_hostname(i), i) : + m_monitor->get_server(i); + mxb_assert(rval); + return rval; +} + +string MariaDBMonitor::Test::create_servername(int i) +{ + return "Server" + std::to_string(i); +} + +string MariaDBMonitor::Test::create_hostname(int i) +{ + return "hostname" + std::to_string(i) + ".mariadb.com"; +} From 923be73636e44401919e764a00854f7f14eff8a3 Mon Sep 17 00:00:00 2001 From: Niclas Antti Date: Mon, 19 Nov 2018 13:57:00 +0200 Subject: [PATCH 05/19] MXS-2171 Refuse to build without libsystemd-dev on systems that have systemd --- cmake/CheckPlatform.cmake | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/cmake/CheckPlatform.cmake b/cmake/CheckPlatform.cmake index 574b0f04a..ae66d659b 100644 --- a/cmake/CheckPlatform.cmake +++ b/cmake/CheckPlatform.cmake @@ -41,10 +41,21 @@ if(NOT HAVE_LIBPTHREAD) message(FATAL_ERROR "Could not find libpthread") endif() -# systemd libraries are optional +# run "ps -p 1 | grep systemd" to determine if this system uses systemd +execute_process( + COMMAND "ps" "-p" "1" + COMMAND "grep" "systemd" + RESULT_VARIABLE NOT_SYSTEMD_IS_RUNNING + OUTPUT_VARIABLE PS_OUTPUT) + find_library(HAVE_SYSTEMD NAMES systemd) if(HAVE_SYSTEMD) - add_definitions(-DHAVE_SYSTEMD=1) + add_definitions(-DHAVE_SYSTEMD=1) +else() + # If systemd is in use, require libsystemd-dev to be installed + if(NOT NOT_SYSTEMD_IS_RUNNING) + message( FATAL_ERROR "systemd is running: please install libsystemd-dev" ) + endif() endif() # The XSI version of strerror_r return an int and the GNU version a char* From be12cab16df4bf8ca4a56a27f8e200d473d131ce Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Wed, 21 Nov 2018 09:21:54 +0200 Subject: [PATCH 06/19] MXS-2178: Provide workaround for watchdog notifications The systemd watchdog mechanism requries notifications at regular intervals. If a synchronous operation of some kind is performed by a worker, then those notfications will not be generated. This change provides each worker with a secondary thread that can be used for triggering those notifications when the worker itself is busy doing other stuff. The effect is that there will be an additional thread for each worker, but most of the time that thread will be idle. Sofar only the mechanism; in subsequent changes the mechanism will be taken into use. --- include/maxscale/routingworker.hh | 89 ++++++++++++++++++++++++-- server/core/routingworker.cc | 102 ++++++++++++++++++++++++++++++ 2 files changed, 187 insertions(+), 4 deletions(-) diff --git a/include/maxscale/routingworker.hh b/include/maxscale/routingworker.hh index 19dfe2438..c659c2f4a 100644 --- a/include/maxscale/routingworker.hh +++ b/include/maxscale/routingworker.hh @@ -432,7 +432,38 @@ public: * To be called from the initial (parent) thread if the systemd watchdog is on. */ static void set_watchdog_interval(uint64_t microseconds); + + /** + * The internal watchdog interval. + * + * @return The interval in seconds, 0 if not enabled. + */ + static uint32_t get_internal_watchdog_interval() + { + return s_watchdog_interval.secs(); + } + + // TODO: Public now ONLY so that it can be used in the workaround for the + // TODO "long max[admin|ctrl] operations may trigger the watchdog" problem. + void check_systemd_watchdog(); + + /** + * Turn on the watchdog workaround for this worker. + * + * @see mxs::WatchdogWorkaround + */ + void start_watchdog_workaround(); + + /** + * Turn off the watchdog workaround for this worker. + * + * @see mxs::WatchdogWorkaround + */ + void stop_watchdog_workaround(); + private: + class WatchdogNotifier; + const int m_id; /*< The id of the worker. */ SessionsById m_sessions; /*< A mapping of session_id->MXS_SESSION. The map * should contain sessions exclusive to this @@ -453,14 +484,64 @@ private: void epoll_tick(); // override void delete_zombies(); - void check_systemd_watchdog(); + //void check_systemd_watchdog(); static uint32_t epoll_instance_handler(MXB_POLL_DATA* data, MXB_WORKER* worker, uint32_t events); uint32_t handle_epoll_events(uint32_t events); - static maxbase::Duration s_watchdog_interval; /*< Duration between notifications, if any. */ - static maxbase::TimePoint s_watchdog_next_check;/*< Next time to notify systemd. */ - std::atomic m_alive; /*< Set to true in epoll_tick(), false on notification. */ + static maxbase::Duration s_watchdog_interval; /*< Duration between notifications, if any. */ + static maxbase::TimePoint s_watchdog_next_check; /*< Next time to notify systemd. */ + std::atomic m_alive; /*< Set to true in epoll_tick(), false on notification. */ + WatchdogNotifier* m_pWatchdog_notifier; /*< Watchdog notifier, if systemd enabled. */ +}; + +/** + * @class WatchdogWorkaround + * + * RAII-class using which the systemd watchdog notification can be + * handled during synchronous worker activity that causes the epoll + * event handling to be stalled. + * + * The constructor turns on the workaround and the destructor + * turns it off. + */ +class WatchdogWorkaround +{ + WatchdogWorkaround(const WatchdogWorkaround&); + WatchdogWorkaround& operator=(const WatchdogWorkaround&); + +public: + /** + * Turns on the watchdog workaround for a specific worker. + * + * @param pWorker The worker for which the systemd notification + * should be arranged. Need not be the calling worker. + */ + WatchdogWorkaround(RoutingWorker* pWorker) + : m_pWorker(pWorker) + { + mxb_assert(pWorker); + m_pWorker->start_watchdog_workaround(); + } + + /** + * Turns on the watchdog workaround for the calling worker. + */ + WatchdogWorkaround() + : WatchdogWorkaround(RoutingWorker::get_current()) + { + } + + /** + * Turns off the watchdog workaround. + */ + ~WatchdogWorkaround() + { + m_pWorker->stop_watchdog_workaround(); + } + +private: + RoutingWorker* m_pWorker; }; // Data local to a routing worker diff --git a/server/core/routingworker.cc b/server/core/routingworker.cc index 23aaac277..b909de73c 100644 --- a/server/core/routingworker.cc +++ b/server/core/routingworker.cc @@ -171,16 +171,102 @@ maxbase::Duration RoutingWorker::s_watchdog_interval {0}; // static maxbase::TimePoint RoutingWorker::s_watchdog_next_check; +class RoutingWorker::WatchdogNotifier +{ + WatchdogNotifier(const WatchdogNotifier&) = delete; + WatchdogNotifier& operator=(const WatchdogNotifier&) = delete; + +public: + WatchdogNotifier(mxs::RoutingWorker* pOwner) + : m_owner(*pOwner) + , m_nClients(0) + , m_terminate(false) + { + m_thread = std::thread([this] { + uint32_t interval = mxs::RoutingWorker::get_internal_watchdog_interval(); + timespec timeout = { interval, 0 }; + + while (!mxb::atomic::load(&m_terminate, mxb::atomic::RELAXED)) + { + // We will wakeup when someone wants the notifier to run, + // or when MaxScale is going down. + m_sem_start.wait(); + + if (!mxb::atomic::load(&m_terminate, mxb::atomic::RELAXED)) + { + // If MaxScale is not going down... + do + { + // we check the systemd watchdog... + m_owner.check_systemd_watchdog(); + } while (!m_sem_stop.timedwait(timeout)); + // until the semaphore is actually posted, which it will be + // once the notification should stop. + } + } + }); + } + + ~WatchdogNotifier() + { + mxb_assert(m_nClients == 0); + mxb::atomic::store(&m_terminate, true, mxb::atomic::RELAXED); + m_sem_start.post(); + m_thread.join(); + } + + void start() + { + Guard guard(m_lock); + mxb::atomic::add(&m_nClients, 1, mxb::atomic::RELAXED); + + if (m_nClients == 1) + { + m_sem_start.post(); + } + } + + void stop() + { + Guard guard(m_lock); + mxb::atomic::add(&m_nClients, -1, mxb::atomic::RELAXED); + mxb_assert(m_nClients >= 0); + + if (m_nClients == 0) + { + m_sem_stop.post(); + } + } + +private: + using Guard = std::lock_guard; + + mxs::RoutingWorker& m_owner; + int m_nClients; + bool m_terminate; + std::thread m_thread; + std::mutex m_lock; + mxb::Semaphore m_sem_start; + mxb::Semaphore m_sem_stop; +}; + RoutingWorker::RoutingWorker() : m_id(next_worker_id()) , m_alive(true) + , m_pWatchdog_notifier(nullptr) { MXB_POLL_DATA::handler = &RoutingWorker::epoll_instance_handler; MXB_POLL_DATA::owner = this; + + if (s_watchdog_interval.count() != 0) + { + m_pWatchdog_notifier = new WatchdogNotifier(this); + } } RoutingWorker::~RoutingWorker() { + delete m_pWatchdog_notifier; } // static @@ -994,6 +1080,22 @@ void maxscale::RoutingWorker::set_watchdog_interval(uint64_t microseconds) s_watchdog_next_check = maxbase::Clock::now(); } +void maxscale::RoutingWorker::start_watchdog_workaround() +{ + if (m_pWatchdog_notifier) + { + m_pWatchdog_notifier->start(); + } +} + +void maxscale::RoutingWorker::stop_watchdog_workaround() +{ + if (m_pWatchdog_notifier) + { + m_pWatchdog_notifier->stop(); + } +} + // A note about the below code. While the main worker is turning the "m_alive" values to false, // it is a possibility that another RoutingWorker sees the old value of "s_watchdog_next_check" // but its new "m_alive==false" value, marks itself alive and promptly hangs. This would cause a From 78829429ae9eea5ccdb26dc1e80d654c59705507 Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Wed, 21 Nov 2018 12:06:02 +0200 Subject: [PATCH 07/19] MXS-2178 Add WD workaround to REST-API and maxadmin --- server/core/resource.cc | 5 +++-- server/modules/routing/cli/cli.cc | 16 ++++++++-------- server/modules/routing/debugcli/debugcli.cc | 16 ++++++++-------- server/modules/routing/debugcli/debugcmd.cc | 9 ++++++--- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/server/core/resource.cc b/server/core/resource.cc index fcfe6ed54..8e359a56c 100644 --- a/server/core/resource.cc +++ b/server/core/resource.cc @@ -1327,10 +1327,11 @@ static HttpResponse handle_request(const HttpRequest& request) HttpResponse resource_handle_request(const HttpRequest& request) { - mxb::Worker* worker = mxs::RoutingWorker::get(mxs::RoutingWorker::MAIN); + mxs::RoutingWorker* worker = mxs::RoutingWorker::get(mxs::RoutingWorker::MAIN); HttpResponse response; - worker->call([&request, &response]() { + worker->call([&request, &response, worker]() { + mxs::WatchdogWorkaround workaround(worker); response = handle_request(request); }, mxb::Worker::EXECUTE_AUTO); diff --git a/server/modules/routing/cli/cli.cc b/server/modules/routing/cli/cli.cc index 599d33167..25e7d8dba 100644 --- a/server/modules/routing/cli/cli.cc +++ b/server/modules/routing/cli/cli.cc @@ -27,19 +27,19 @@ #define MXS_MODULE_NAME "cli" +#include #include #include #include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include /* The router entry points */ static MXS_ROUTER* createInstance(SERVICE* service, MXS_CONFIG_PARAMETER* params); diff --git a/server/modules/routing/debugcli/debugcli.cc b/server/modules/routing/debugcli/debugcli.cc index 07bc1a7e4..8fc46c324 100644 --- a/server/modules/routing/debugcli/debugcli.cc +++ b/server/modules/routing/debugcli/debugcli.cc @@ -26,19 +26,19 @@ #define MXS_MODULE_NAME "debugcli" +#include #include #include #include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include /* The router entry points */ static MXS_ROUTER* createInstance(SERVICE* service, MXS_CONFIG_PARAMETER* params); diff --git a/server/modules/routing/debugcli/debugcmd.cc b/server/modules/routing/debugcli/debugcmd.cc index 87432f422..38e75180a 100644 --- a/server/modules/routing/debugcli/debugcmd.cc +++ b/server/modules/routing/debugcli/debugcmd.cc @@ -35,9 +35,9 @@ #include #include +#include #include #include -#include #include #include #include @@ -47,13 +47,12 @@ #include #include #include +#include #include #include #include #include #include -#include -#include #include @@ -2102,6 +2101,10 @@ static const char item_separator[] = */ int execute_cmd(CLI_SESSION* cli) { + mxs::RoutingWorker* worker = mxs::RoutingWorker::get_current(); + mxb_assert(worker == mxs::RoutingWorker::get(mxs::RoutingWorker::MAIN)); + mxs::WatchdogWorkaround workaround(worker); + DCB* dcb = cli->session->client_dcb; int argc, i, j, found = 0; char* args[MAXARGS + 4]; From f1a113aff00580df5516e865e6916e2c5992428c Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Wed, 21 Nov 2018 12:59:37 +0200 Subject: [PATCH 08/19] MXS-2178 Reduce accessbility of workarounds Some rearrangements to ensure that what should be private can be kept private. - WatchdogNotifier made a friend. - WatchdogWorkaround defined in RoutingWorker and made a friend. - mxs::WatchdogWorker defined with 'using'. --- include/maxscale/routingworker.hh | 118 +++++++++++++----------------- server/core/routingworker.cc | 2 +- 2 files changed, 50 insertions(+), 70 deletions(-) diff --git a/include/maxscale/routingworker.hh b/include/maxscale/routingworker.hh index c659c2f4a..3af2498fe 100644 --- a/include/maxscale/routingworker.hh +++ b/include/maxscale/routingworker.hh @@ -433,36 +433,61 @@ public: */ static void set_watchdog_interval(uint64_t microseconds); + class WatchdogWorkaround; + friend WatchdogWorkaround; + /** - * The internal watchdog interval. + * @class WatchdogWorkaround * - * @return The interval in seconds, 0 if not enabled. + * RAII-class using which the systemd watchdog notification can be + * handled during synchronous worker activity that causes the epoll + * event handling to be stalled. + * + * The constructor turns on the workaround and the destructor + * turns it off. */ - static uint32_t get_internal_watchdog_interval() + class WatchdogWorkaround { - return s_watchdog_interval.secs(); - } + WatchdogWorkaround(const WatchdogWorkaround&); + WatchdogWorkaround& operator=(const WatchdogWorkaround&); - // TODO: Public now ONLY so that it can be used in the workaround for the - // TODO "long max[admin|ctrl] operations may trigger the watchdog" problem. - void check_systemd_watchdog(); + public: + /** + * Turns on the watchdog workaround for a specific worker. + * + * @param pWorker The worker for which the systemd notification + * should be arranged. Need not be the calling worker. + */ + WatchdogWorkaround(RoutingWorker* pWorker) + : m_pWorker(pWorker) + { + mxb_assert(pWorker); + m_pWorker->start_watchdog_workaround(); + } - /** - * Turn on the watchdog workaround for this worker. - * - * @see mxs::WatchdogWorkaround - */ - void start_watchdog_workaround(); + /** + * Turns on the watchdog workaround for the calling worker. + */ + WatchdogWorkaround() + : WatchdogWorkaround(RoutingWorker::get_current()) + { + } - /** - * Turn off the watchdog workaround for this worker. - * - * @see mxs::WatchdogWorkaround - */ - void stop_watchdog_workaround(); + /** + * Turns off the watchdog workaround. + */ + ~WatchdogWorkaround() + { + m_pWorker->stop_watchdog_workaround(); + } + + private: + RoutingWorker* m_pWorker; + }; private: class WatchdogNotifier; + friend WatchdogNotifier; const int m_id; /*< The id of the worker. */ SessionsById m_sessions; /*< A mapping of session_id->MXS_SESSION. The map @@ -484,7 +509,9 @@ private: void epoll_tick(); // override void delete_zombies(); - //void check_systemd_watchdog(); + void check_systemd_watchdog(); + void start_watchdog_workaround(); + void stop_watchdog_workaround(); static uint32_t epoll_instance_handler(MXB_POLL_DATA* data, MXB_WORKER* worker, uint32_t events); uint32_t handle_epoll_events(uint32_t events); @@ -495,54 +522,7 @@ private: WatchdogNotifier* m_pWatchdog_notifier; /*< Watchdog notifier, if systemd enabled. */ }; -/** - * @class WatchdogWorkaround - * - * RAII-class using which the systemd watchdog notification can be - * handled during synchronous worker activity that causes the epoll - * event handling to be stalled. - * - * The constructor turns on the workaround and the destructor - * turns it off. - */ -class WatchdogWorkaround -{ - WatchdogWorkaround(const WatchdogWorkaround&); - WatchdogWorkaround& operator=(const WatchdogWorkaround&); - -public: - /** - * Turns on the watchdog workaround for a specific worker. - * - * @param pWorker The worker for which the systemd notification - * should be arranged. Need not be the calling worker. - */ - WatchdogWorkaround(RoutingWorker* pWorker) - : m_pWorker(pWorker) - { - mxb_assert(pWorker); - m_pWorker->start_watchdog_workaround(); - } - - /** - * Turns on the watchdog workaround for the calling worker. - */ - WatchdogWorkaround() - : WatchdogWorkaround(RoutingWorker::get_current()) - { - } - - /** - * Turns off the watchdog workaround. - */ - ~WatchdogWorkaround() - { - m_pWorker->stop_watchdog_workaround(); - } - -private: - RoutingWorker* m_pWorker; -}; +using WatchdogWorkaround = RoutingWorker::WatchdogWorkaround; // Data local to a routing worker template diff --git a/server/core/routingworker.cc b/server/core/routingworker.cc index b909de73c..86d8cd9a5 100644 --- a/server/core/routingworker.cc +++ b/server/core/routingworker.cc @@ -183,7 +183,7 @@ public: , m_terminate(false) { m_thread = std::thread([this] { - uint32_t interval = mxs::RoutingWorker::get_internal_watchdog_interval(); + uint32_t interval = mxs::RoutingWorker::s_watchdog_interval.secs(); timespec timeout = { interval, 0 }; while (!mxb::atomic::load(&m_terminate, mxb::atomic::RELAXED)) From b44ea837b9ee0fcc70ca4abc315a4cbbc94eed99 Mon Sep 17 00:00:00 2001 From: Timofey Turenko Date: Wed, 21 Nov 2018 13:39:01 +0200 Subject: [PATCH 09/19] do not use ssh to check MariaDB server start command maxscale-system-test/set_env.sh script is slow because it checks if MariaDB start command is mysql or mysqld Now all versions can be started with 'mysql' except MySQL 5.5 which is not supported --- maxscale-system-test/mdbci/set_env.sh | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/maxscale-system-test/mdbci/set_env.sh b/maxscale-system-test/mdbci/set_env.sh index 46893015d..0e46748b7 100644 --- a/maxscale-system-test/mdbci/set_env.sh +++ b/maxscale-system-test/mdbci/set_env.sh @@ -52,29 +52,8 @@ do start_cmd_var="$prefix"_"$num"_start_db_command stop_cmd_var="$prefix"_"$num"_stop_db_command - mysql_exe=`${mdbci_dir}/mdbci ssh --command 'ls /etc/init.d/mysql* 2> /dev/null | tr -cd "[:print:]"' $config_name/node_$num --silent 2> /dev/null` - echo $mysql_exe | grep -i "mysql" - if [ $? != 0 ] ; then - service_name=`${mdbci_dir}/mdbci ssh --command 'systemctl list-unit-files | grep mysql' $config_name/node_$num --silent` - echo $service_name | grep mysql - if [ $? == 0 ] ; then - echo $service_name | grep mysqld - if [ $? == 0 ] ; then - eval 'export $start_cmd_var="service mysqld start "' - eval 'export $stop_cmd_var="service mysqld stop "' - else - eval 'export $start_cmd_var="service mysql start "' - eval 'export $stop_cmd_var="service mysql stop "' - fi - else - ${mdbci_dir}/mdbci ssh --command 'echo \"/usr/sbin/mysqld \$* 2> stderr.log > stdout.log &\" > mysql_start.sh; echo \"sleep 20\" >> mysql_start.sh; echo \"disown\" >> mysql_start.sh; chmod a+x mysql_start.sh' $config_name/node_$num --silent - eval 'export $start_cmd_var="/home/$au/mysql_start.sh "' - eval 'export $stop_cmd_var="killall mysqld "' - fi - else - eval 'export $start_cmd_var="$mysql_exe start "' - eval 'export $stop_cmd_var="$mysql_exe stop "' - fi + eval 'export $start_cmd_var="service mysql start "' + eval 'export $stop_cmd_var="service mysql stop "' eval 'export "$prefix"_"$num"_start_vm_command="cd ${MDBCI_VM_PATH}/$config_name;vagrant resume ${prefix}_$num ; cd $curr_dir"' eval 'export "$prefix"_"$num"_stop_vm_command="cd ${MDBCI_VM_PATH}/$config_name;vagrant suspend ${prefix}_$num ; cd $curr_dir"' From 1591ff7f463653e4ba2a9522a2d594663d7fbfee Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Wed, 21 Nov 2018 13:47:04 +0200 Subject: [PATCH 10/19] MXS-2179 Activate WD workaround when fetching users Fetching the users may potentially take longer than the watchdog timeout. To ensure that MaxScale is not killed by systemd, we must ensure that the notifications are generated also when MaxScale synchronously is fetching the users. --- server/core/service.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/server/core/service.cc b/server/core/service.cc index 8900dc722..4ca6afd80 100644 --- a/server/core/service.cc +++ b/server/core/service.cc @@ -1422,6 +1422,7 @@ void dListListeners(DCB* dcb) bool Service::refresh_users() { + mxs::WatchdogWorkaround workaround; bool ret = true; int self = mxs_rworker_get_current_id(); mxb_assert(self >= 0); From 7777732f1943797b0e667f3bc6ebbc90e5ebdb56 Mon Sep 17 00:00:00 2001 From: Timofey Turenko Date: Wed, 21 Nov 2018 13:53:50 +0200 Subject: [PATCH 11/19] remove tabs and spaces from set_env.sh --- maxscale-system-test/mdbci/set_env.sh | 31 +++++++++++++-------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/maxscale-system-test/mdbci/set_env.sh b/maxscale-system-test/mdbci/set_env.sh index 0e46748b7..bfae2a772 100644 --- a/maxscale-system-test/mdbci/set_env.sh +++ b/maxscale-system-test/mdbci/set_env.sh @@ -41,23 +41,22 @@ export maxadmin_password="mariadb" for prefix in "node" "galera" "maxscale" do - N_var="$prefix"_N - Nx=${!N_var} - N=`expr $Nx - 1` - for i in $(seq 0 $N) - do - num=`printf "%03d" $i` - eval 'export "$prefix"_"$num"_port=3306' - eval 'export "$prefix"_"$num"_access_sudo=sudo' + N_var="$prefix"_N + Nx=${!N_var} + N=`expr $Nx - 1` + for i in $(seq 0 $N) + do + num=`printf "%03d" $i` + eval 'export "$prefix"_"$num"_port=3306' + eval 'export "$prefix"_"$num"_access_sudo=sudo' - start_cmd_var="$prefix"_"$num"_start_db_command - stop_cmd_var="$prefix"_"$num"_stop_db_command - eval 'export $start_cmd_var="service mysql start "' - eval 'export $stop_cmd_var="service mysql stop "' - - eval 'export "$prefix"_"$num"_start_vm_command="cd ${MDBCI_VM_PATH}/$config_name;vagrant resume ${prefix}_$num ; cd $curr_dir"' - eval 'export "$prefix"_"$num"_stop_vm_command="cd ${MDBCI_VM_PATH}/$config_name;vagrant suspend ${prefix}_$num ; cd $curr_dir"' - done + start_cmd_var="$prefix"_"$num"_start_db_command + stop_cmd_var="$prefix"_"$num"_stop_db_command + eval 'export $start_cmd_var="service mysql start "' + eval 'export $stop_cmd_var="service mysql stop "' + eval 'export "$prefix"_"$num"_start_vm_command="cd ${MDBCI_VM_PATH}/$config_name;vagrant resume ${prefix}_$num ; cd $curr_dir"' + eval 'export "$prefix"_"$num"_stop_vm_command="cd ${MDBCI_VM_PATH}/$config_name;vagrant suspend ${prefix}_$num ; cd $curr_dir"' + done done From dd27fe0c041350499876853d51fc7b6901cb6e07 Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Wed, 21 Nov 2018 14:10:40 +0200 Subject: [PATCH 12/19] Enable the watchdog As the watchdog related problems have now been dealt with, it is safe to have it enabled by default. --- Documentation/Changelog.md | 1 + .../Getting-Started/Configuration-Guide.md | 6 --- .../MaxScale-2.3.2-Release-Notes.md | 39 +++++++++++++++++++ etc/maxscale.service.in | 4 +- 4 files changed, 41 insertions(+), 9 deletions(-) create mode 100644 Documentation/Release-Notes/MaxScale-2.3.2-Release-Notes.md diff --git a/Documentation/Changelog.md b/Documentation/Changelog.md index bef0050bd..0990c7f54 100644 --- a/Documentation/Changelog.md +++ b/Documentation/Changelog.md @@ -30,6 +30,7 @@ For more details, please refer to: +* [MariaDB MaxScale 2.3.2 Release Notes](Release-Notes/MaxScale-2.3.2-Release-Notes.md) * [MariaDB MaxScale 2.3.1 Release Notes](Release-Notes/MaxScale-2.3.1-Release-Notes.md) * [MariaDB MaxScale 2.3.0 Release Notes](Release-Notes/MaxScale-2.3.0-Release-Notes.md) diff --git a/Documentation/Getting-Started/Configuration-Guide.md b/Documentation/Getting-Started/Configuration-Guide.md index 5d72b0754..d93202f73 100644 --- a/Documentation/Getting-Started/Configuration-Guide.md +++ b/Documentation/Getting-Started/Configuration-Guide.md @@ -1988,9 +1988,3 @@ enabled MaxScale will check that all threads are running and notify systemd with a "keep-alive ping". Systemd reference: https://www.freedesktop.org/software/systemd/man/systemd.service.html - -*NOTE*: In 2.3.1 there is a deficiency that manifests itself so that if -_any_ administrative operation, performed using _maxctrl_ or _maxadmin_, -takes longer that the specified watchdog timeout, then the watchdog will -kill and restart MaxScale. Please take that into account before enabling -the watchdog. diff --git a/Documentation/Release-Notes/MaxScale-2.3.2-Release-Notes.md b/Documentation/Release-Notes/MaxScale-2.3.2-Release-Notes.md new file mode 100644 index 000000000..eab0a9881 --- /dev/null +++ b/Documentation/Release-Notes/MaxScale-2.3.2-Release-Notes.md @@ -0,0 +1,39 @@ +# MariaDB MaxScale 2.3.2 Release Notes + +Release 2.3.2 is a XXX release. + +This document describes the changes in release 2.3.2, when compared to the +previous release in the same series. + +For any problems you encounter, please consider submitting a bug +report on [our Jira](https://jira.mariadb.org/projects/MXS). + +## Changed Features + +### Watchdog + +The systemd watchdog is now safe to use in all circumstances. + +By default it is enabled with a timeout of 60 seconds. + +## Bug fixes + +## Known Issues and Limitations + +There are some limitations and known issues within this version of MaxScale. +For more information, please refer to the [Limitations](../About/Limitations.md) document. + +## Packaging + +RPM and Debian packages are provided for supported the Linux distributions. + +Packages can be downloaded [here](https://mariadb.com/downloads/mariadb-tx/maxscale). + +## Source Code + +The source code of MaxScale is tagged at GitHub with a tag, which is identical +with the version of MaxScale. For instance, the tag of version X.Y.Z of MaxScale +is `maxscale-X.Y.Z`. Further, the default branch is always the latest GA version +of MaxScale. + +The source code is available [here](https://github.com/mariadb-corporation/MaxScale). diff --git a/etc/maxscale.service.in b/etc/maxscale.service.in index 283de5038..4f335ed41 100644 --- a/etc/maxscale.service.in +++ b/etc/maxscale.service.in @@ -20,9 +20,7 @@ ExecStart=@CMAKE_INSTALL_PREFIX@/@MAXSCALE_BINDIR@/maxscale TimeoutStartSec=120 LimitNOFILE=65535 StartLimitBurst=0 -#Please see the MaxScale release notes and/or watchdog documentation -#before turning the watchdog on. -#WatchdogSec=60s +WatchdogSec=60s # Only relevant when MaxScale is linked with -fsanitize=address Environment=ASAN_OPTIONS=abort_on_error=1 From 01628dd0deb92d135d8780255f531f09598f3c85 Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Mon, 19 Nov 2018 16:10:10 +0200 Subject: [PATCH 13/19] Cleanup server version updating --- include/maxscale/mysql_utils.h | 9 ++++---- server/core/mysql_utils.cc | 22 +++++-------------- server/core/server.cc | 16 ++++++-------- .../authenticator/MySQLAuth/dbusers.cc | 4 ++-- server/modules/monitor/galeramon/galeramon.cc | 2 +- .../monitor/mariadbmon/mariadbserver.cc | 2 +- server/modules/monitor/mmmon/mmmon.cc | 2 +- .../monitor/ndbclustermon/ndbclustermon.cc | 2 +- 8 files changed, 23 insertions(+), 36 deletions(-) diff --git a/include/maxscale/mysql_utils.h b/include/maxscale/mysql_utils.h index c24d98704..082136c83 100644 --- a/include/maxscale/mysql_utils.h +++ b/include/maxscale/mysql_utils.h @@ -134,12 +134,13 @@ mxs_mysql_name_kind_t mxs_mysql_name_to_pcre(char* pcre, mxs_pcre_quote_approach_t approach); /** - * Set the server information + * Get server information from connector, store it to server object. This does not query + * the server as the data has been read while connecting. * - * @param mysql A MySQL handle to the server. - * @param server The server whose version information should be updated. + * @param mysql MySQL handle from which information is read. + * @param server Server object to write. */ -void mxs_mysql_set_server_version(MYSQL* mysql, SERVER* server); +void mxs_mysql_update_server_version(MYSQL* mysql, SERVER* server); /** * Enable/disable the logging of all SQL statements MaxScale sends to diff --git a/server/core/mysql_utils.cc b/server/core/mysql_utils.cc index d009fb946..c82b8d8fd 100644 --- a/server/core/mysql_utils.cc +++ b/server/core/mysql_utils.cc @@ -407,25 +407,13 @@ mxs_mysql_name_kind_t mxs_mysql_name_to_pcre(char* pcre, return rv; } -void mxs_mysql_set_server_version(MYSQL* mysql, SERVER* server) +void mxs_mysql_update_server_version(MYSQL* mysql, SERVER* server) { + // This function should only be called for a live connection. const char* version_string = mysql_get_server_info(mysql); - - if (version_string) - { - unsigned long version = mysql_get_server_version(mysql); - - server_set_version(server, version_string, version); - - if (strcasestr(version_string, "mariadb") != NULL) - { - server->server_type = SERVER_TYPE_MARIADB; - } - else - { - server->server_type = SERVER_TYPE_MYSQL; - } - } + unsigned long version_num = mysql_get_server_version(mysql); + mxb_assert(version_string != NULL && version_num != 0); + server_set_version(server, version_string, version_num); } void mxs_mysql_set_log_statements(bool enable) diff --git a/server/core/server.cc b/server/core/server.cc index 8ddede3ce..aef0b9f2e 100644 --- a/server/core/server.cc +++ b/server/core/server.cc @@ -1092,13 +1092,11 @@ uint64_t server_map_status(const char* str) */ void server_set_version_string(SERVER* server, const char* version_string) { - // There is a race here. The string may be accessed, while we are - // updating it. Thus we take some precautions to ensure that the - // string cannot be completely garbled at any point. - + // Possible data race. The string may be accessed while we are updating it. + // Take some precautions to ensure that the string cannot be completely garbled at any point. + // Strictly speaking, this is not fool-proof as writes may not appear in order to the reader. size_t old_len = strlen(server->version_string); size_t new_len = strlen(version_string); - if (new_len >= MAX_SERVER_VERSION_LEN) { new_len = MAX_SERVER_VERSION_LEN - 1; @@ -1111,10 +1109,9 @@ void server_set_version_string(SERVER* server, const char* version_string) memset(server->version_string + new_len, 0, old_len - new_len); } + // No null-byte needs to be set. The array starts out as all zeros and the above memset adds + // the necessary null, should the new string be shorter than the old. strncpy(server->version_string, version_string, new_len); - // No null-byte needs to be set. The array starts out as all zeros - // and the above memset adds the necessary null, should the new string - // be shorter than the old. } /** @@ -1128,8 +1125,9 @@ void server_set_version_string(SERVER* server, const char* version_string) void server_set_version(SERVER* server, const char* version_string, uint64_t version) { server_set_version_string(server, version_string); - atomic_store_uint64(&server->version, version); + bool is_mariadb = (strcasestr(version_string, "mariadb") != NULL); + server->server_type = is_mariadb ? SERVER_TYPE_MARIADB : SERVER_TYPE_MYSQL; } uint64_t server_get_version(const SERVER* server) diff --git a/server/modules/authenticator/MySQLAuth/dbusers.cc b/server/modules/authenticator/MySQLAuth/dbusers.cc index 868eb651a..b2da46c71 100644 --- a/server/modules/authenticator/MySQLAuth/dbusers.cc +++ b/server/modules/authenticator/MySQLAuth/dbusers.cc @@ -687,7 +687,7 @@ static bool check_server_permissions(SERVICE* service, if (server->version_string[0] == 0) { - mxs_mysql_set_server_version(mysql, server); + mxs_mysql_update_server_version(mysql, server); } const char* format = "SELECT user, host, %s, Select_priv FROM mysql.user limit 1"; @@ -1010,7 +1010,7 @@ int get_users_from_server(MYSQL* con, SERVER_REF* server_ref, SERVICE* service, { if (server_ref->server->version_string[0] == 0) { - mxs_mysql_set_server_version(con, server_ref->server); + mxs_mysql_update_server_version(con, server_ref->server); } char* query = get_users_query(server_ref->server->version_string, diff --git a/server/modules/monitor/galeramon/galeramon.cc b/server/modules/monitor/galeramon/galeramon.cc index 550ce351c..77b6bb738 100644 --- a/server/modules/monitor/galeramon/galeramon.cc +++ b/server/modules/monitor/galeramon/galeramon.cc @@ -132,7 +132,7 @@ void GaleraMonitor::update_server_status(MXS_MONITORED_SERVER* monitored_server) char* server_string; /* get server version string */ - mxs_mysql_set_server_version(monitored_server->con, monitored_server->server); + mxs_mysql_update_server_version(monitored_server->con, monitored_server->server); server_string = monitored_server->server->version_string; /* Check if the the Galera FSM shows this node is joined to the cluster */ diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc index 4deb47da5..a96c35e1e 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.cc +++ b/server/modules/monitor/mariadbmon/mariadbserver.cc @@ -881,7 +881,7 @@ void MariaDBServer::update_server_version() /* Get server version string, also get/set numeric representation. This function does not query the * server, since the data was obtained when connecting. */ - mxs_mysql_set_server_version(conn, srv); + mxs_mysql_update_server_version(conn, srv); // Check whether this server is a MaxScale Binlog Server. MYSQL_RES* result; diff --git a/server/modules/monitor/mmmon/mmmon.cc b/server/modules/monitor/mmmon/mmmon.cc index 5b88290cc..80afdaf20 100644 --- a/server/modules/monitor/mmmon/mmmon.cc +++ b/server/modules/monitor/mmmon/mmmon.cc @@ -90,7 +90,7 @@ void MMMonitor::update_server_status(MXS_MONITORED_SERVER* monitored_server) server_version = mysql_get_server_version(monitored_server->con); /* get server version string */ - mxs_mysql_set_server_version(monitored_server->con, monitored_server->server); + mxs_mysql_update_server_version(monitored_server->con, monitored_server->server); server_string = monitored_server->server->version_string; /* get server_id form current node */ diff --git a/server/modules/monitor/ndbclustermon/ndbclustermon.cc b/server/modules/monitor/ndbclustermon/ndbclustermon.cc index 824341348..a534ba25e 100644 --- a/server/modules/monitor/ndbclustermon/ndbclustermon.cc +++ b/server/modules/monitor/ndbclustermon/ndbclustermon.cc @@ -51,7 +51,7 @@ void NDBCMonitor::update_server_status(MXS_MONITORED_SERVER* monitored_server) char* server_string; /* get server version string */ - mxs_mysql_set_server_version(monitored_server->con, monitored_server->server); + mxs_mysql_update_server_version(monitored_server->con, monitored_server->server); server_string = monitored_server->server->version_string; /* Check if the the SQL node is able to contact one or more data nodes */ From fb52e565fe36877eb39eb5d3a93ef42f4834e65a Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Mon, 19 Nov 2018 16:11:32 +0200 Subject: [PATCH 14/19] Store capabilities of monitored server Checking the version number in various places in the code gets confusing. It's better to check the version in one place and record the relevant data. --- .../monitor/mariadbmon/cluster_discovery.cc | 2 +- .../mariadbmon/cluster_manipulation.cc | 5 +- .../modules/monitor/mariadbmon/mariadbmon.cc | 6 +- .../monitor/mariadbmon/mariadbserver.cc | 112 +++++++++--------- .../monitor/mariadbmon/mariadbserver.hh | 21 ++-- 5 files changed, 78 insertions(+), 68 deletions(-) diff --git a/server/modules/monitor/mariadbmon/cluster_discovery.cc b/server/modules/monitor/mariadbmon/cluster_discovery.cc index f6ff8f91b..8cc9075d4 100644 --- a/server/modules/monitor/mariadbmon/cluster_discovery.cc +++ b/server/modules/monitor/mariadbmon/cluster_discovery.cc @@ -654,7 +654,7 @@ void MariaDBMonitor::assign_slave_and_relay_master(MariaDBServer* start_node) } // If the node is a binlog relay, remove any slave bits that may have been set. // Relay master bit can stay. - if (parent->m_version == MariaDBServer::version::BINLOG_ROUTER) + if (parent->m_srv_type == MariaDBServer::server_type::BINLOG_ROUTER) { parent->clear_status(SERVER_SLAVE); } diff --git a/server/modules/monitor/mariadbmon/cluster_manipulation.cc b/server/modules/monitor/mariadbmon/cluster_manipulation.cc index a57e53d46..b9c3a9f16 100644 --- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc +++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc @@ -1488,8 +1488,7 @@ void MariaDBMonitor::check_cluster_operations_support() { // Need to accept unknown versions here. Otherwise servers which are down when the monitor starts // would deactivate failover. - if (server->m_version != MariaDBServer::version::UNKNOWN - && server->m_version != MariaDBServer::version::MARIADB_100) + if (server->m_srv_type != MariaDBServer::server_type::UNKNOWN && !server->m_capabilities.gtid) { supported = false; auto reason = string_printf("The version of %s (%s) is not supported. Failover/switchover " @@ -1704,7 +1703,7 @@ void MariaDBMonitor::enforce_read_only_on_slaves() for (MariaDBServer* server : m_servers) { if (server->is_slave() && !server->is_read_only() - && (server->m_version != MariaDBServer::version::BINLOG_ROUTER)) + && (server->m_srv_type != MariaDBServer::server_type::BINLOG_ROUTER)) { MYSQL* conn = server->m_server_base->con; if (mxs_mysql_query(conn, QUERY) == 0) diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index ecad99d1c..4d238f99f 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -357,10 +357,8 @@ void MariaDBMonitor::update_server(MariaDBServer* server) server->update_server_version(); } - auto server_vrs = server->m_version; - if (server_vrs == MariaDBServer::version::MARIADB_MYSQL_55 - || server_vrs == MariaDBServer::version::MARIADB_100 - || server_vrs == MariaDBServer::version::BINLOG_ROUTER) + if (server->m_capabilities.basic_support + || server->m_srv_type == MariaDBServer::server_type::BINLOG_ROUTER) { // Check permissions if permissions failed last time or if this is a new connection. if (server->had_status(SERVER_AUTH_ERROR) || conn_status == MONITOR_CONN_NEWCONN_OK) diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc index a96c35e1e..878cd364a 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.cc +++ b/server/modules/monitor/mariadbmon/mariadbserver.cc @@ -218,21 +218,20 @@ bool MariaDBServer::do_show_slave_status(string* errmsg_out) unsigned int columns = 0; string query; bool all_slaves_status = false; - switch (m_version) + if (m_capabilities.gtid || m_srv_type == server_type::BINLOG_ROUTER) { - case version::MARIADB_100: - case version::BINLOG_ROUTER: + // Versions with gtid also support the extended slave status query. columns = 42; all_slaves_status = true; - query = "SHOW ALL SLAVES STATUS"; - break; - - case version::MARIADB_MYSQL_55: + query = "SHOW ALL SLAVES STATUS;"; + } + else if (m_capabilities.basic_support) + { columns = 40; - query = "SHOW SLAVE STATUS"; - break; - - default: + query = "SHOW SLAVE STATUS;"; + } + else + { mxb_assert(!true); // This method should not be called for versions < 5.5 return false; } @@ -432,15 +431,10 @@ bool MariaDBServer::update_replication_settings(std::string* errmsg_out) bool MariaDBServer::read_server_variables(string* errmsg_out) { - MXS_MONITORED_SERVER* database = m_server_base; - string query = "SELECT @@global.server_id, @@read_only;"; - int columns = 2; - if (m_version == version::MARIADB_100) - { - query.erase(query.end() - 1); - query += ", @@global.gtid_domain_id;"; - columns = 3; - } + const string query_no_gtid = "SELECT @@global.server_id, @@read_only;"; + const string query_with_gtid = "SELECT @@global.server_id, @@read_only, @@global.gtid_domain_id;"; + const bool use_gtid = m_capabilities.gtid; + const string& query = use_gtid ? query_with_gtid : query_no_gtid; int i_id = 0; int i_ro = 1; @@ -461,7 +455,7 @@ bool MariaDBServer::read_server_variables(string* errmsg_out) m_server_id = server_id_parsed; m_topology_changed = true; } - database->server->node_id = server_id_parsed; + m_server_base->server->node_id = server_id_parsed; bool read_only_parsed = result->get_bool(i_ro); if (read_only_parsed != m_read_only) @@ -470,7 +464,7 @@ bool MariaDBServer::read_server_variables(string* errmsg_out) m_topology_changed = true; } - if (columns == 3) + if (use_gtid) { int64_t domain_id_parsed = result->get_uint(i_domain); if (domain_id_parsed < 0) // Same here. @@ -818,26 +812,23 @@ void MariaDBServer::monitor_server() string errmsg; bool query_ok = false; /* Query different things depending on server version/type. */ - switch (m_version) + if (m_srv_type == server_type::BINLOG_ROUTER) { - case version::MARIADB_MYSQL_55: - query_ok = read_server_variables(&errmsg) && update_slave_status(&errmsg); - break; - - case version::MARIADB_100: - query_ok = read_server_variables(&errmsg) && update_gtids(&errmsg) - && update_slave_status(&errmsg); - break; - - case version::BINLOG_ROUTER: // TODO: Add special version of server variable query. query_ok = update_slave_status(&errmsg); - break; - - default: - // Do not update unknown versions. + } + else if (m_capabilities.basic_support) + { + query_ok = read_server_variables(&errmsg) && update_slave_status(&errmsg); + if (query_ok && m_capabilities.gtid) + { + query_ok = update_gtids(&errmsg); + } + } + else + { + // Not a binlog server and no normal support, don't update. query_ok = true; - break; } if (query_ok) @@ -875,7 +866,7 @@ bool MariaDBServer::update_slave_status(string* errmsg_out) void MariaDBServer::update_server_version() { - m_version = version::UNKNOWN; + m_srv_type = server_type::UNKNOWN; auto conn = m_server_base->con; auto srv = m_server_base->server; @@ -888,28 +879,43 @@ void MariaDBServer::update_server_version() if (mxs_mysql_query(conn, "SELECT @@maxscale_version") == 0 && (result = mysql_store_result(conn)) != NULL) { - m_version = version::BINLOG_ROUTER; + m_srv_type = server_type::BINLOG_ROUTER; mysql_free_result(result); } else { - /* Not a binlog server, check version number. */ - uint64_t version_num = server_get_version(srv); - if (version_num >= 100000 && srv->server_type == SERVER_TYPE_MARIADB) + /* Not a binlog server, check version number and supported features. */ + m_srv_type = server_type::NORMAL; + m_capabilities = Capabilities(); + SERVER_VERSION decoded = {0, 0, 0}; + server_decode_version(server_get_version(srv), &decoded); + auto major = decoded.major; + auto minor = decoded.minor; + auto patch = decoded.patch; + // MariaDB/MySQL 5.5 is the oldest supported version. MySQL 6 and later are treated as 5.5. + if ((major == 5 && minor >= 5) || major > 5) { - m_version = version::MARIADB_100; - } - else if (version_num >= 5 * 10000 + 5 * 100) - { - m_version = version::MARIADB_MYSQL_55; + m_capabilities.basic_support = true; + // For more specific features, at least MariaDB 10.X is needed. + if (srv->server_type == SERVER_TYPE_MARIADB && major >= 10) + { + // 10.0.2 or 10.1.X or greater than 10 + if (((minor == 0 && patch >= 2) || minor >= 1) || major > 10) + { + m_capabilities.gtid = true; + } + // 10.1.2 (10.1.1 has limited support, not enough) or 10.2.X or greater than 10 + if (((minor == 1 && patch >= 2) || minor >= 2) || major > 10) + { + m_capabilities.max_statement_time = true; + } + } } else { - m_version = version::OLD; - MXS_ERROR("MariaDB/MySQL version of server '%s' is less than 5.5, which is not supported. " - "The server is ignored by the monitor. Server version: '%s'.", - name(), - srv->version_string); + MXS_ERROR("MariaDB/MySQL version of %s is less than 5.5, which is not supported. " + "The server is ignored by the monitor. Server version string: '%s'.", + name(), srv->version_string); } } } diff --git a/server/modules/monitor/mariadbmon/mariadbserver.hh b/server/modules/monitor/mariadbmon/mariadbserver.hh index 6ec7eca12..374d72f76 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.hh +++ b/server/modules/monitor/mariadbmon/mariadbserver.hh @@ -72,14 +72,10 @@ struct NodeData class MariaDBServer { public: - enum class version + enum class server_type { UNKNOWN, /* Totally unknown. Server has not been connected to yet. */ - OLD, /* Anything older than 5.5. These are no longer supported by the monitor. */ - MARIADB_MYSQL_55, /* MariaDB 5.5 series or MySQL 5.5 and later. Does not have gtid (on MariaDB) so - * all gtid-related features (failover etc.) are disabled. */ - MARIADB_100, /* MariaDB 10.0 and greater. In practice though, 10.0.2 or greater is assumed. - * All monitor features are on. */ + NORMAL, /* A normal MariaDB/MySQL server, possibly supported. */ BINLOG_ROUTER /* MaxScale binlog server. Requires special handling. */ }; @@ -89,6 +85,15 @@ public: BINLOG_OFF }; + // Class which encapsulates server capabilities depending on its version. + class Capabilities + { + public: + bool basic_support = false; // Is the server version supported by the monitor at all? + bool gtid = false; // Supports MariaDB gtid? Required for failover etc. + bool max_statement_time = false; // Supports max_statement_time? + }; + // This class groups some miscellaneous replication related settings together. class ReplicationSettings { @@ -104,7 +109,9 @@ public: /* What position this server has in the monitor config? Used for tiebreaking between servers. */ int m_config_index = 0; - version m_version = version::UNKNOWN; /* Server version/type. */ + server_type m_srv_type = server_type::UNKNOWN; /* Server type. */ + Capabilities m_capabilities; /* Server capabilities */ + int64_t m_server_id = SERVER_ID_UNKNOWN; /* Value of @@server_id. Valid values are * 32bit unsigned. */ int64_t m_gtid_domain_id = GTID_DOMAIN_UNKNOWN; /* The value of gtid_domain_id, the domain which is used From 816e45ccb951916f980082dde589c445a16ba300 Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Wed, 21 Nov 2018 15:20:54 +0200 Subject: [PATCH 15/19] Add multimaster test with 'assume_unique_hostnames' disabled Uses the same test code, just changes the config file. This test is currently the only one in which the setting is disabled. --- maxscale-system-test/CMakeLists.txt | 3 + ...cnf.template.mysqlmon_multimaster_serverid | 88 +++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 maxscale-system-test/cnf/maxscale.cnf.template.mysqlmon_multimaster_serverid diff --git a/maxscale-system-test/CMakeLists.txt b/maxscale-system-test/CMakeLists.txt index 0d18c6813..abbf16d86 100644 --- a/maxscale-system-test/CMakeLists.txt +++ b/maxscale-system-test/CMakeLists.txt @@ -210,6 +210,9 @@ add_test_executable(keepalived_masterdown.cpp keepalived_masterdown keepalived_m # MySQL Monitor with Multi-master configurations add_test_executable(mysqlmon_multimaster.cpp mysqlmon_multimaster mysqlmon_multimaster LABELS mysqlmon REPL_BACKEND BREAKS_REPL) +# MySQL Monitor with Multi-master configurations (assume_unique_hostnames=OFF) +add_test_derived(mysqlmon_multimaster_serverid mysqlmon_multimaster mysqlmon_multimaster_serverid LABELS mysqlmon REPL_BACKEND BREAKS_REPL) + # MySQL Monitor Failover Test add_test_executable(mysqlmon_detect_standalone_master.cpp mysqlmon_detect_standalone_master mysqlmon_detect_standalone_master LABELS mysqlmon REPL_BACKEND) diff --git a/maxscale-system-test/cnf/maxscale.cnf.template.mysqlmon_multimaster_serverid b/maxscale-system-test/cnf/maxscale.cnf.template.mysqlmon_multimaster_serverid new file mode 100644 index 000000000..749147a54 --- /dev/null +++ b/maxscale-system-test/cnf/maxscale.cnf.template.mysqlmon_multimaster_serverid @@ -0,0 +1,88 @@ +[maxscale] +threads=###threads### +log_warning=1 + +[MySQL Monitor] +type=monitor +module=mysqlmon +servers= server1, server2, server3, server4 +user=maxskysql +password= skysql +detect_stale_master=0 +monitor_interval=1000 + +[RW Split Router] +type=service +router= readwritesplit +servers=server1, server2, server3, server4 +user=maxskysql +password=skysql +slave_selection_criteria=LEAST_ROUTER_CONNECTIONS + +[Read Connection Router Slave] +type=service +router=readconnroute +router_options= slave +servers=server1,server2 +user=maxskysql +password=skysql + +[Read Connection Router Master] +type=service +router=readconnroute +router_options=master +servers=server1,server2 +user=maxskysql +password=skysql + +[RW Split Listener] +type=listener +service=RW Split Router +protocol=MySQLClient +port=4006 + +[Read Connection Listener Slave] +type=listener +service=Read Connection Router Slave +protocol=MySQLClient +port=4009 + +[Read Connection Listener Master] +type=listener +service=Read Connection Router Master +protocol=MySQLClient +port=4008 + +[CLI] +type=service +router=cli + +[CLI Listener] +type=listener +service=CLI +protocol=maxscaled +socket=default + +[server1] +type=server +address=###node_server_IP_1### +port=###node_server_port_1### +protocol=MySQLBackend + +[server2] +type=server +address=###node_server_IP_2### +port=###node_server_port_2### +protocol=MySQLBackend + +[server3] +type=server +address=###node_server_IP_3### +port=###node_server_port_3### +protocol=MySQLBackend + +[server4] +type=server +address=###node_server_IP_4### +port=###node_server_port_4### +protocol=MySQLBackend From 2ff95216a9e266c0b94c0326ba391bd0f236c1e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Sun, 18 Nov 2018 14:32:34 +0200 Subject: [PATCH 16/19] MXS-2081: Prevent unwanted stripping of binaries RPM packages by default strip all executables on some systems after installation. To work around this, the post install part needs to be prevented. This does not mean the post-install scripts used to create the directories required by MaxScale. --- cmake/package_rpm.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/package_rpm.cmake b/cmake/package_rpm.cmake index 7980aa36c..77b02960f 100644 --- a/cmake/package_rpm.cmake +++ b/cmake/package_rpm.cmake @@ -8,6 +8,7 @@ set(CPACK_RPM_SPEC_MORE_DEFINE "%define ignore \#") set(CPACK_RPM_PACKAGE_NAME "${CPACK_PACKAGE_NAME}") set(CPACK_RPM_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}") set(CPACK_RPM_PACKAGE_DESCRIPTION "${CPACK_PACKAGE_DESCRIPTION}") +set(CPACK_RPM_SPEC_INSTALL_POST "/bin/true") # This prevents the default %post from running which causes binaries to be # striped. Without this, MaxCtrl will not work on all systems as the From cb67d4cee30acb3e737cefb81d766787da2bc09d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Mon, 19 Nov 2018 12:38:12 +0200 Subject: [PATCH 17/19] Log instructions for bypassing test dependency checks If not all test programs are installed, instructions on how to bypass them are logged. This allows testing without installing all of the packages. --- maxscale-system-test/utilities.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maxscale-system-test/utilities.cmake b/maxscale-system-test/utilities.cmake index 4c8e06fd6..0031807d4 100644 --- a/maxscale-system-test/utilities.cmake +++ b/maxscale-system-test/utilities.cmake @@ -109,10 +109,10 @@ add_test_executable_notest(sysbench_example.cpp sysbench_example replication) find_program(HAVE_MYSQLTEST mysqltest) if (NOT HAVE_MYSQLTEST) - message(FATAL_ERROR "Could not find mysqltest.") + message(FATAL_ERROR "Could not find mysqltest. Add -DHAVE_MYSQLTEST=Y to CMake invocation ignore this.") endif() find_program(HAVE_PHP php) if (NOT HAVE_PHP) - message(FATAL_ERROR "Could not find php.") + message(FATAL_ERROR "Could not find php. Add -DHAVE_PHP=Y to CMake invocation ignore this.") endif() From aa2572c67755ce8b0708fea2a086a2a8c9c48851 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Mon, 19 Nov 2018 13:01:37 +0200 Subject: [PATCH 18/19] Pass database as a parameter in connect When connecting to a node, a database can now be optionally given as a parameter. This makes testing with different databases easier as the need to use the explicit functions is removed. --- maxscale-system-test/mariadb_nodes.cpp | 8 +++---- maxscale-system-test/mariadb_nodes.h | 4 ++-- maxscale-system-test/maxscales.cpp | 32 +++++++++++++++----------- maxscale-system-test/maxscales.h | 26 ++++++++++----------- 4 files changed, 38 insertions(+), 32 deletions(-) diff --git a/maxscale-system-test/mariadb_nodes.cpp b/maxscale-system-test/mariadb_nodes.cpp index 3c35a5ddf..649944e60 100644 --- a/maxscale-system-test/mariadb_nodes.cpp +++ b/maxscale-system-test/mariadb_nodes.cpp @@ -59,7 +59,7 @@ Mariadb_nodes::~Mariadb_nodes() } } -int Mariadb_nodes::connect(int i) +int Mariadb_nodes::connect(int i, const std::string& db) { if (nodes[i] == NULL || mysql_ping(nodes[i]) != 0) { @@ -67,7 +67,7 @@ int Mariadb_nodes::connect(int i) { mysql_close(nodes[i]); } - nodes[i] = open_conn_db_timeout(port[i], IP[i], "test", user_name, password, 50, ssl); + nodes[i] = open_conn_db_timeout(port[i], IP[i], db.c_str(), user_name, password, 50, ssl); } if ((nodes[i] != NULL) && (mysql_errno(nodes[i]) != 0)) @@ -80,13 +80,13 @@ int Mariadb_nodes::connect(int i) } } -int Mariadb_nodes::connect() +int Mariadb_nodes::connect(const std::string& db) { int res = 0; for (int i = 0; i < N; i++) { - res += connect(i); + res += connect(i, db); } return res; diff --git a/maxscale-system-test/mariadb_nodes.h b/maxscale-system-test/mariadb_nodes.h index f393871a9..34460627b 100644 --- a/maxscale-system-test/mariadb_nodes.h +++ b/maxscale-system-test/mariadb_nodes.h @@ -150,8 +150,8 @@ public: */ char* revert_snapshot_command; - int connect(int i); - int connect(); + int connect(int i, const std::string& db = "test"); + int connect(const std::string& db = "test"); /** * Repeatedly try to connect with one second sleep in between attempts diff --git a/maxscale-system-test/maxscales.cpp b/maxscale-system-test/maxscales.cpp index 17da49ef9..682d53e00 100644 --- a/maxscale-system-test/maxscales.cpp +++ b/maxscale-system-test/maxscales.cpp @@ -103,20 +103,22 @@ int Maxscales::read_env() } -int Maxscales::connect_rwsplit(int m) +int Maxscales::connect_rwsplit(int m, const std::string& db) { if (use_ipv6) { - conn_rwsplit[m] = open_conn(rwsplit_port[m], + conn_rwsplit[m] = open_conn_db(rwsplit_port[m], IP6[m], + db, user_name, password, ssl); } else { - conn_rwsplit[m] = open_conn(rwsplit_port[m], + conn_rwsplit[m] = open_conn_db(rwsplit_port[m], IP[m], + db, user_name, password, ssl); @@ -138,20 +140,22 @@ int Maxscales::connect_rwsplit(int m) return rc; } -int Maxscales::connect_readconn_master(int m) +int Maxscales::connect_readconn_master(int m, const std::string& db) { if (use_ipv6) { - conn_master[m] = open_conn(readconn_master_port[m], + conn_master[m] = open_conn_db(readconn_master_port[m], IP6[m], + db, user_name, password, ssl); } else { - conn_master[m] = open_conn(readconn_master_port[m], + conn_master[m] = open_conn_db(readconn_master_port[m], IP[m], + db, user_name, password, ssl); @@ -173,20 +177,22 @@ int Maxscales::connect_readconn_master(int m) return rc; } -int Maxscales::connect_readconn_slave(int m) +int Maxscales::connect_readconn_slave(int m, const std::string& db) { if (use_ipv6) { - conn_slave[m] = open_conn(readconn_slave_port[m], + conn_slave[m] = open_conn_db(readconn_slave_port[m], IP6[m], + db, user_name, password, ssl); } else { - conn_slave[m] = open_conn(readconn_slave_port[m], + conn_slave[m] = open_conn_db(readconn_slave_port[m], IP[m], + db, user_name, password, ssl); @@ -208,11 +214,11 @@ int Maxscales::connect_readconn_slave(int m) return rc; } -int Maxscales::connect_maxscale(int m) +int Maxscales::connect_maxscale(int m, const std::string& db) { - return connect_rwsplit(m) - + connect_readconn_master(m) - + connect_readconn_slave(m); + return connect_rwsplit(m, db) + + connect_readconn_master(m, db) + + connect_readconn_slave(m, db); } int Maxscales::close_maxscale_connections(int m) diff --git a/maxscale-system-test/maxscales.h b/maxscale-system-test/maxscales.h index 0df0a82bd..da47acf8d 100644 --- a/maxscale-system-test/maxscales.h +++ b/maxscale-system-test/maxscales.h @@ -114,10 +114,10 @@ public: * maxscales->conn_slave[0] MYSQL structs * @return 0 in case of success */ - int connect_maxscale(int m = 0); - int connect(int m = 0) + int connect_maxscale(int m = 0, const std::string& db = "test"); + int connect(int m = 0, const std::string& db = "test") { - return connect_maxscale(m); + return connect_maxscale(m, db); } /** @@ -135,28 +135,28 @@ public: * maxscales->conn_rwsplit[0] * @return 0 in case of success */ - int connect_rwsplit(int m = 0); + int connect_rwsplit(int m = 0, const std::string& db = "test"); /** * @brief ConnectReadMaster Opens connections to ReadConn master and store MYSQL struct in * maxscales->conn_master[0] * @return 0 in case of success */ - int connect_readconn_master(int m = 0); + int connect_readconn_master(int m = 0, const std::string& db = "test"); /** * @brief ConnectReadSlave Opens connections to ReadConn slave and store MYSQL struct in * maxscales->conn_slave[0] * @return 0 in case of success */ - int connect_readconn_slave(int m = 0); + int connect_readconn_slave(int m = 0, const std::string& db = "test"); /** * @brief OpenRWSplitConn Opens new connections to RWSplit and returns MYSQL struct * To close connection mysql_close() have to be called * @return MYSQL struct */ - MYSQL* open_rwsplit_connection(int m = 0) + MYSQL* open_rwsplit_connection(int m = 0, const std::string& db = "test") { return open_conn(rwsplit_port[m], IP[m], user_name, password, ssl); } @@ -164,9 +164,9 @@ public: /** * Get a readwritesplit Connection */ - Connection rwsplit(int m = 0) + Connection rwsplit(int m = 0, const std::string& db = "test") { - return Connection(IP[m], rwsplit_port[m], user_name, password, "test", ssl); + return Connection(IP[m], rwsplit_port[m], user_name, password, db, ssl); } /** @@ -186,9 +186,9 @@ public: /** * Get a readconnroute master Connection */ - Connection readconn_master(int m = 0) + Connection readconn_master(int m = 0, const std::string& db = "test") { - return Connection(IP[m], readconn_master_port[m], user_name, password, "test", ssl); + return Connection(IP[m], readconn_master_port[m], user_name, password, db, ssl); } /** @@ -208,9 +208,9 @@ public: /** * Get a readconnroute slave Connection */ - Connection readconn_slave(int m = 0) + Connection readconn_slave(int m = 0, const std::string& db = "test") { - return Connection(IP[m], readconn_slave_port[m], user_name, password, "test", ssl); + return Connection(IP[m], readconn_slave_port[m], user_name, password, db, ssl); } /** From de49797014ee4bc44e1a0b7099f18aa2ae300c31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Mon, 19 Nov 2018 13:48:13 +0200 Subject: [PATCH 19/19] Rewrite bug561.sh in C++ Rewrote the bug561.sh test as the error_messages test that covers what was tested by the script as well as some new parts that were untested. This revealed a bug in the error messages where MaxScale always returns the database name in the error. --- maxscale-system-test/CMakeLists.txt | 2 +- maxscale-system-test/bug561.sh | 74 ---------------- maxscale-system-test/error_messages.cpp | 110 ++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 75 deletions(-) delete mode 100755 maxscale-system-test/bug561.sh create mode 100644 maxscale-system-test/error_messages.cpp diff --git a/maxscale-system-test/CMakeLists.txt b/maxscale-system-test/CMakeLists.txt index abbf16d86..794f7c7eb 100644 --- a/maxscale-system-test/CMakeLists.txt +++ b/maxscale-system-test/CMakeLists.txt @@ -102,7 +102,7 @@ add_test_executable(bug547.cpp bug547 replication LABELS readwritesplit REPL_BAC add_test_executable(bug681.cpp bug681 bug681 LABELS readwritesplit REPL_BACKEND) # Regression case for the bug ""Different error messages from MariaDB and Maxscale" -add_test_script(bug561.sh bug561.sh replication LABELS MySQLAuth REPL_BACKEND) +add_test_executable(error_messages.cpp error_messages replication LABELS MySQLAuth REPL_BACKEND) # Regression case for the bug "Wrong error message for Access denied error" add_test_script(bug562.sh bug562.sh replication LABELS MySQLAuth REPL_BACKEND) diff --git a/maxscale-system-test/bug561.sh b/maxscale-system-test/bug561.sh deleted file mode 100755 index b2136c4b4..000000000 --- a/maxscale-system-test/bug561.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/bash - -### -## @file bug561.sh Regression case for the bug "Different error messages from MariaDB and Maxscale" -## - try to connect to non existing DB directly to MariaDB server and via Maxscale -## - compare error messages -## - repeat for RWSplit, ReadConn - - -rp=`realpath $0` -export src_dir=`dirname $rp` -export test_dir=`pwd` -export test_name=`basename $rp` - -$test_dir/non_native_setup $test_name - -if [ $? -ne 0 ] ; then - echo "configuring maxscale failed" - exit 1 -fi -export ssl_options="--ssl-cert=$src_dir/ssl-cert/client-cert.pem --ssl-key=$src_dir/ssl-cert/client-key.pem" - -#echo "Waiting for 15 seconds" -#sleep 15 - -mariadb_err=`mysql -u$node_user -p$node_password -h $node_000_network $ssl_options --socket=$node_000_socket -P $node_000_port non_existing_db 2>&1` -maxscale_err=`mysql -u$node_user -p$node_password -h $maxscale_IP -P 4006 $ssl_options non_existing_db 2>&1` - -maxscale_err1=`mysql -u$node_user -p$node_password -h $maxscale_IP -P 4008 $ssl_options non_existing_db 2>&1` -maxscale_err2=`mysql -u$node_user -p$node_password -h $maxscale_IP -P 4009 $ssl_options non_existing_db 2>&1` - -echo "MariaDB message" -echo "$mariadb_err" -echo " " -echo "Maxscale message from RWSplit" -echo "$maxscale_err" -echo "Maxscale message from ReadConn master" -echo "$maxscale_err1" -echo "Maxscale message from ReadConn slave" -echo "$maxscale_err2" - -res=0 - -#echo "$maxscale_err" | grep "$mariadb_err" -if [ "$maxscale_err" != "$mariadb_err" ] ; then - echo "Messages are different!" - echo "MaxScale: $maxscale_err" - echo "Server: $mariadb_err" - res=1 -else - echo "Messages are same" -fi - -if [ "$maxscale_err1" != "$mariadb_err" ] ; then - echo "Messages are different!" - echo "MaxScale: $maxscale_err1" - echo "Server: $mariadb_err" - res=1 -else - echo "Messages are same" -fi - -if [ "$maxscale_err2" != "$mariadb_err" ] ; then - echo "Messages are different!" - echo "MaxScale: $maxscale_err2" - echo "Server: $mariadb_err" - - res=1 -else - echo "Messages are same" -fi - -$src_dir/copy_logs.sh bug561 -exit $res diff --git a/maxscale-system-test/error_messages.cpp b/maxscale-system-test/error_messages.cpp new file mode 100644 index 000000000..43f526d41 --- /dev/null +++ b/maxscale-system-test/error_messages.cpp @@ -0,0 +1,110 @@ +/** + * Regression case for the bug "Different error messages from MariaDB and Maxscale" + * + * - try to connect to non existing DB directly to MariaDB server and via Maxscale + * - compare error messages + * - repeat for RWSplit, ReadConn + */ + +#include "testconnections.h" +#include +#include + +using std::cout; +using std::endl; + +std::string remove_host(std::string str) +{ + auto start = std::find(str.begin(), str.end(), '@'); + if (start != str.end()) + { + start += 2; + auto end = std::find(start, str.end(), '\''); + if (end != str.end()) + { + str.erase(start, end); + } + } + + return str; +} + + +bool is_equal_error(MYSQL* direct, MYSQL* conn) +{ + bool rval = true; + std::string direct_err = remove_host(mysql_error(direct)); + std::string conn_err = remove_host(mysql_error(conn)); + + if (direct_err != conn_err) + { + rval = false; + cout << "Wrong error: `" << conn_err << "` (original: `" << direct_err << "`)" << endl; + } + + return rval; +} + +int main(int argc, char** argv) +{ + TestConnections test(argc, argv); + + cout << "Non-existent database" << endl; + test.repl->connect(0, "non_existing_db"); + test.maxscales->connect(0, "non_existing_db"); + test.expect(is_equal_error(test.repl->nodes[0], test.maxscales->conn_rwsplit[0]), "readwritesplit returned wrong error"); + test.expect(is_equal_error(test.repl->nodes[0], test.maxscales->conn_master[0]), "readconnroute returned wrong error"); + test.repl->disconnect(); + test.maxscales->disconnect(); + + cout << "Non-existent user" << endl; + auto conn_direct = open_conn(test.repl->port[0], test.repl->IP[0], "not-a-user", "not-a-password", false); + auto conn_rwsplit = open_conn(test.maxscales->rwsplit_port[0], test.maxscales->IP[0], "not-a-user", "not-a-password", false); + auto conn_rconn = open_conn(test.maxscales->rwsplit_port[0], test.maxscales->IP[0], "not-a-user", "not-a-password", false); + + test.expect(is_equal_error(conn_direct, conn_rwsplit), "readwritesplit returned wrong error"); + test.expect(is_equal_error(conn_direct, conn_rconn), "readconnroute returned wrong error"); + + mysql_close(conn_direct); + mysql_close(conn_rwsplit); + mysql_close(conn_rconn); + + cout << "Wrong password" << endl; + conn_direct = open_conn(test.repl->port[0], test.repl->IP[0], "skysql", "not-a-password", false); + conn_rwsplit = open_conn(test.maxscales->rwsplit_port[0], test.maxscales->IP[0], "skysql", "not-a-password", false); + conn_rconn = open_conn(test.maxscales->rwsplit_port[0], test.maxscales->IP[0], "skysql", "not-a-password", false); + + test.expect(is_equal_error(conn_direct, conn_rwsplit), "readwritesplit returned wrong error"); + test.expect(is_equal_error(conn_direct, conn_rconn), "readconnroute returned wrong error"); + + mysql_close(conn_direct); + mysql_close(conn_rwsplit); + mysql_close(conn_rconn); + + // Create a database and a user without access to it + test.repl->connect(); + test.try_query(test.repl->nodes[0], "%s", "CREATE USER 'bob'@'%' IDENTIFIED BY 's3cret'"); + test.try_query(test.repl->nodes[0], "%s", "CREATE DATABASE error_messages"); + test.repl->sync_slaves(); + test.repl->disconnect(); + + cout << "No permissions on database" << endl; + conn_direct = open_conn_db(test.repl->port[0], test.repl->IP[0], "error_messages", "bob", "s3cret", false); + conn_rwsplit = open_conn_db(test.maxscales->rwsplit_port[0], test.maxscales->IP[0], "error_messages", "bob", "s3cret", false); + conn_rconn = open_conn_db(test.maxscales->rwsplit_port[0], test.maxscales->IP[0], "error_messages", "bob", "s3cret", false); + + test.expect(is_equal_error(conn_direct, conn_rwsplit), "readwritesplit returned wrong error"); + test.expect(is_equal_error(conn_direct, conn_rconn), "readconnroute returned wrong error"); + + mysql_close(conn_direct); + mysql_close(conn_rwsplit); + mysql_close(conn_rconn); + + // Create a database and a user without access to it + test.repl->connect(); + test.try_query(test.repl->nodes[0], "%s", "DROP USER 'bob'@'%'"); + test.try_query(test.repl->nodes[0], "%s", "DROP DATABASE error_messages"); + test.repl->disconnect(); + + return test.global_result; +}