From 5324a1bdaa0adb4ea014a6da16f41d09be246340 Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Fri, 8 Jun 2018 11:02:53 +0300 Subject: [PATCH] MXS-1845 Assign server roles Assign server roles (master, slave, relay master, slave of external master) for a graph with possibly multiple paths to a slave server. --- .../monitor/mariadbmon/cluster_discovery.cc | 165 ++++++++++++++++-- .../mariadbmon/cluster_manipulation.cc | 2 +- .../modules/monitor/mariadbmon/mariadbmon.cc | 46 ++++- .../modules/monitor/mariadbmon/mariadbmon.hh | 5 +- .../monitor/mariadbmon/mariadbserver.cc | 5 + .../monitor/mariadbmon/mariadbserver.hh | 7 + 6 files changed, 206 insertions(+), 24 deletions(-) diff --git a/server/modules/monitor/mariadbmon/cluster_discovery.cc b/server/modules/monitor/mariadbmon/cluster_discovery.cc index e96ebd03e..4ae85a769 100644 --- a/server/modules/monitor/mariadbmon/cluster_discovery.cc +++ b/server/modules/monitor/mariadbmon/cluster_discovery.cc @@ -27,6 +27,35 @@ static bool check_replicate_wild_ignore_table(MXS_MONITORED_SERVER* database); static const char HB_TABLE_NAME[] = "maxscale_schema.replication_heartbeat"; static const char SERVER_DISQUALIFIED[] = "Server '%s' was disqualified from new master selection because " "it is %s."; +static const int64_t MASTER_BITS = SERVER_MASTER | SERVER_WAS_MASTER; +static const int64_t SLAVE_BITS = SERVER_SLAVE | SERVER_WAS_SLAVE; + + +/** + * Generic depth-first search. Iterates through child nodes (slaves) and runs the 'visit_func' on the nodes. + * Isn't flexible enough for all uses. + * + * @param node Starting server. The server and all its slaves are visited. + * @param data Caller-specific data, which is given to the 'visit_func'. + * @param visit_func Function to run on a node when visiting it + */ +template +void topology_DFS(MariaDBServer* node, T* data, void (*visit_func)(MariaDBServer* node, T* data)) +{ + node->m_node.index = NodeData::INDEX_FIRST; + if (visit_func) + { + visit_func(node, data); + } + for (auto iter = node->m_node.children.begin(); iter != node->m_node.children.end(); iter++) + { + MariaDBServer* slave = *iter; + if (slave->m_node.index == NodeData::INDEX_NOT_VISITED) + { + topology_DFS(slave, data, visit_func); + } + } +} /** * This function computes the replication tree from a set of monitored servers and returns the root server @@ -384,7 +413,6 @@ void MariaDBMonitor::build_replication_graph() */ void MariaDBMonitor::find_graph_cycles() { - build_replication_graph(); m_cycles.clear(); // The next items need to be passed around in the recursive calls to keep track of algorithm state. ServerArray stack; @@ -927,7 +955,7 @@ static string disqualify_reasons_to_string(MariaDBServer* disqualified) reasons += separator + "down"; separator = word_and; } - if (disqualified->m_read_only) + if (disqualified->is_read_only()) { reasons += separator + "in read_only mode"; } @@ -955,7 +983,7 @@ MariaDBServer* MariaDBMonitor::find_topology_master_server() MariaDBServer* server = *iter; if (server->m_node.parents.empty()) { - if (server->is_running() && server->m_read_only) + if (server->is_running() && !server->is_read_only()) { master_candidates.push_back(server); } @@ -1004,6 +1032,11 @@ MariaDBServer* MariaDBMonitor::find_topology_master_server() return found_master; } +static void node_reach_visit(MariaDBServer* node, int* reach) +{ + *reach = *reach + 1; +} + /** * Calculate the total number of reachable child nodes for the given node. A node can always reach itself. * The result is saved into the node data. @@ -1012,21 +1045,10 @@ void MariaDBMonitor::calculate_node_reach(MariaDBServer* node) { ss_dassert(node && node->m_node.reach == NodeData::REACH_UNKNOWN); // Reset indexes since they will be reused. - for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) - { - (*iter)->m_node.reset_indexes(); - } - - int reach = 1; // The starting node can reach itself. - for (auto iter = node->m_node.children.begin(); iter != node->m_node.children.end(); iter++) - { - MariaDBServer* slave = *iter; - if (slave->m_node.index == NodeData::INDEX_NOT_VISITED) - { // TODO: Think if is_down() should be checked here. Could cause weird behaviour. - reach += calc_reach_visit_node(slave); - } - } + reset_node_index_info(); + int reach = 0; + topology_DFS(node, &reach, node_reach_visit); node->m_node.reach = reach; } @@ -1067,10 +1089,117 @@ MariaDBServer* MariaDBMonitor::find_master_inside_cycle(ServerArray& cycle_membe { MariaDBServer* server = *iter; ss_dassert(server->m_node.cycle != NodeData::CYCLE_NONE); - if (server->is_running() && !server->m_read_only) + if (server->is_running() && !server->is_read_only()) { return server; } } return NULL; } + +/** + * Assign replication role status bits to the servers in the cluster. Starts from the cluster master server. + */ +void MariaDBMonitor::assign_master_and_slave() +{ + // Remove any existing [Master], [Slave] etc flags. + const uint64_t remove_bits = SERVER_MASTER | SERVER_SLAVE | SERVER_RELAY_MASTER | + SERVER_SLAVE_OF_EXT_MASTER; + for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) + { + (*iter)->clear_status(remove_bits); + } + + // Check the the master node, label it as the [Master] if... + if (m_master) + { + // the node has slaves, even if their slave sql threads are stopped ... + if (!m_master->m_node.children.empty() || + // or detect standalone master is on ... + m_detect_standalone_master || + // or "detect_stale_master" is on and the server was a master before. + (m_detect_stale_master && (m_master->m_server_base->pending_status & SERVER_WAS_MASTER))) + { + m_master->clear_status(SLAVE_BITS | SERVER_RELAY_MASTER); + m_master->set_status(MASTER_BITS); + } + + // Run another DFS, this time assigning slaves. + reset_node_index_info(); + assign_slave_and_relay_master(m_master); + } +} + +/** + * Check if the servers replicating from the given node qualify for [Slave] and mark them. Continue the + * search to any found slaves. + * + * @param node The node to process. The node itself is not marked [Slave]. + */ +void MariaDBMonitor::assign_slave_and_relay_master(MariaDBServer* node) +{ + ss_dassert(node->m_node.index == NodeData::INDEX_NOT_VISITED); + node->m_node.index = NodeData::INDEX_FIRST; + bool require_was_slave = false; + + if (node->is_down()) + { + // If 'detect_stale_slave' is off, this node can only have slaves if the node is running. + if (m_detect_stale_slave) + { + require_was_slave = true; + } + else + { + return; + } + } + + int slaves = 0; + for (auto iter = node->m_node.children.begin(); iter != node->m_node.children.end(); iter++) + { + MariaDBServer* slave = *iter; + // If the node has an index, it has already been labeled master/slave and visited. Even when this + // is the case, the slave has to be checked to get correct [Relay Master] labels. + if (slave->m_node.index == NodeData::INDEX_NOT_VISITED) + { + slave->clear_status(MASTER_BITS); + } + // The slave node may have several slave connections, need to find the right one. + bool found_slave_conn = false; + for (auto iter2 = slave->m_slave_status.begin(); iter2 != slave->m_slave_status.end(); iter2++) + { + SlaveStatus& ss = *iter2; + auto master_id = ss.master_server_id; + auto io_running = ss.slave_io_running; + // Should this check 'Master_Host' and 'Master_Port' instead of server id:s? + if (master_id > 0 && master_id == node->m_server_id && ss.slave_sql_running && + (io_running == SlaveStatus::SLAVE_IO_YES || + io_running == SlaveStatus::SLAVE_IO_CONNECTING) && + // Can in theory cause a 'SERVER_WAS_SLAVE' bit from another master to affect the result. + (!require_was_slave || (slave->m_server_base->pending_status & SERVER_WAS_SLAVE))) + { + found_slave_conn = true; + break; + } + } + + // If the slave had a valid connection, label it as a slave and recurse. + if (found_slave_conn) + { + slaves++; + if (slave->m_node.index == NodeData::INDEX_NOT_VISITED) + { + slave->clear_status(MASTER_BITS); + slave->set_status(SLAVE_BITS); + assign_slave_and_relay_master(slave); + } + } + } + + // Finally, if the node itself is a slave and has slaves of its own, label it as relay slave. + if ((node->m_server_base->pending_status & SERVER_SLAVE) && slaves > 0) + { + node->set_status(SERVER_RELAY_MASTER); + } +} diff --git a/server/modules/monitor/mariadbmon/cluster_manipulation.cc b/server/modules/monitor/mariadbmon/cluster_manipulation.cc index c1204df3f..cf08c3174 100644 --- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc +++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc @@ -1561,7 +1561,7 @@ void MariaDBMonitor::enforce_read_only_on_slaves() for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) { MariaDBServer* server = *iter; - if (server->is_slave() && !server->m_read_only && + if (server->is_slave() && !server->is_read_only() && (server->m_version != MariaDBServer::version::BINLOG_ROUTER)) { MYSQL* conn = server->m_server_base->con; diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index eb1e3d4a9..07ad713a5 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -105,6 +105,14 @@ void MariaDBMonitor::clear_server_info() m_external_master_port = PORT_UNKNOWN; } +void MariaDBMonitor::reset_node_index_info() +{ + for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) + { + (*iter)->m_node.reset_indexes(); + } +} + /** * Get monitor-specific server info for the monitored server. * @@ -382,8 +390,40 @@ void MariaDBMonitor::tick() } } + build_replication_graph(); + find_graph_cycles(); // Use the information to find the so far best master server. - MariaDBServer* root_master = find_root_master(); + MariaDBServer* root_master = find_topology_master_server(); + if (root_master) + { + MXS_DEBUG("Server '%s' is the best master candidate with %d slaves.", + root_master->name(), root_master->m_node.reach); + m_master = root_master; + } +#ifdef SS_DEBUG + else + { + MXS_DEBUG("No valid master server found in the cluster."); + } +#endif + assign_master_and_slave(); + + if (!m_ignore_external_masters) + { + // Do a sweep through all the nodes in the cluster (even the master) and mark other states. + for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) + { + MariaDBServer* server = *iter; + if (!server->m_node.external_masters.empty()) + { + server->set_status(SERVER_SLAVE_OF_EXT_MASTER); + } + else + { + server->clear_status(SERVER_SLAVE_OF_EXT_MASTER); + } + } + } if (m_master != NULL && m_master->is_master()) { @@ -392,11 +432,9 @@ void MariaDBMonitor::tick() update_external_master(); } - // Assign relay masters, clear SERVER_SLAVE from binlog relays + // Clear SERVER_SLAVE from binlog relays for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) { - assign_relay_master(**iter); - /* Remove SLAVE status if this server is a Binlog Server relay */ if ((*iter)->m_version == MariaDBServer::version::BINLOG_ROUTER) { diff --git a/server/modules/monitor/mariadbmon/mariadbmon.hh b/server/modules/monitor/mariadbmon/mariadbmon.hh index 6ba98713e..9a2829f57 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.hh +++ b/server/modules/monitor/mariadbmon/mariadbmon.hh @@ -161,13 +161,14 @@ private: MariaDBMonitor(MXS_MONITOR* monitor_base); void reset_server_info(); void clear_server_info(); + void reset_node_index_info(); bool configure(const MXS_CONFIG_PARAMETER* params); bool set_replication_credentials(const MXS_CONFIG_PARAMETER* params); MariaDBServer* get_server_info(MXS_MONITORED_SERVER* db); MariaDBServer* get_server(int64_t id); - void update_server(MariaDBServer& server); // Cluster discovery and status assignment methods + void update_server(MariaDBServer& server); MariaDBServer* find_root_master(); MXS_MONITORED_SERVER* get_replication_tree(); MXS_MONITORED_SERVER* build_mysql51_replication_tree(); @@ -193,6 +194,8 @@ private: void calculate_node_reach(MariaDBServer* node); int calc_reach_visit_node(MariaDBServer* node); MariaDBServer* find_master_inside_cycle(ServerArray& cycle_servers); + void assign_master_and_slave(); + void assign_slave_and_relay_master(MariaDBServer* node); // Switchover methods bool switchover_check(SERVER* new_master, SERVER* current_master, diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc index 81419fd8c..459c1832e 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.cc +++ b/server/modules/monitor/mariadbmon/mariadbserver.cc @@ -448,6 +448,11 @@ bool MariaDBServer::is_relay_server() const (SERVER_RUNNING | SERVER_MASTER | SERVER_SLAVE); } +bool MariaDBServer::is_read_only() const +{ + return m_read_only; +} + const char* MariaDBServer::name() const { return m_server_base->server->name; diff --git a/server/modules/monitor/mariadbmon/mariadbserver.hh b/server/modules/monitor/mariadbmon/mariadbserver.hh index 8f9c4cbb6..f491f47df 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.hh +++ b/server/modules/monitor/mariadbmon/mariadbserver.hh @@ -261,6 +261,13 @@ public: */ bool is_relay_server() const; + /** + * Getter for m_read_only. + * + * @return True if server is in read_only mode + */ + bool is_read_only() const; + /** * Returns the server name. *