MXS-1845 Find strongly connected components with multiple slave connections

Rewrote the algorithm for clarity.
This commit is contained in:
Esa Korhonen
2018-05-22 14:17:39 +03:00
parent 32c7ae2f9f
commit 4d7aff4ab9
5 changed files with 201 additions and 143 deletions

View File

@ -219,109 +219,131 @@ MXS_MONITORED_SERVER* MariaDBMonitor::getSlaveOfNodeId(long node_id, slave_down_
return NULL; return NULL;
} }
/**
* @brief A node in a graph
*/
struct graph_node
{
int index;
int lowest_index;
int cycle;
bool active;
struct graph_node *parent;
MariaDBServer *info;
MXS_MONITORED_SERVER *db;
};
/** /**
* @brief Visit a node in the graph * @brief Visit a node in the graph
* *
* This function is the main function used to determine whether the node is a * This function is the main function used to determine whether the node is a part of a cycle. It is
* part of a cycle. It is an implementation of the Tarjan's strongly connected * an implementation of the Tarjan's strongly connected component algorithm. All one node cycles are
* component algorithm. All one node cycles are ignored since normal * ignored since normal master-slave monitoring handles that.
* master-slave monitoring handles that.
* *
* Tarjan's strongly connected component algorithm: * Tarjan's strongly connected component algorithm:
* https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
* *
* https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm * @param node Target server/node
* @param stack The stack used by the algorithm, contains nodes which have not yet been assigned a cycle
* @param index Visitation index of next node
* @param cycle Index of next found cycle
*/ */
static void visit_node(struct graph_node *node, struct graph_node **stack, void MariaDBMonitor::tarjan_ssc_visit_node(MariaDBServer *node, ServerArray* stack,
int *stacksize, int *index, int *cycle) int *index, int *cycle)
{ {
/** Assign an index to this node */ /** Assign an index to this node */
node->lowest_index = node->index = *index; NodeData& node_info = node->m_node;
node->active = true; auto ind = *index;
*index += 1; node_info.index = ind;
node_info.lowest_index = ind;
*index = ind + 1;
stack[*stacksize] = node; if (node_info.parents.empty())
*stacksize += 1;
if (node->parent == NULL)
{ {
/** This node does not connect to another node, it can't be a part of a cycle */ /* This node/server does not replicate from any node, it can't be a part of a cycle. Don't even
node->lowest_index = -1; * bother pushing it to the stack. */
}
else if (node->parent->index == 0)
{
/** Node has not been visited */
visit_node(node->parent, stack, stacksize, index, cycle);
if (node->parent->lowest_index < node->lowest_index)
{
/** The parent connects to a node with a lower index, this node
could be a part of a cycle. */
node->lowest_index = node->parent->lowest_index;
}
}
else if (node->parent->active)
{
/** This node could be a root node of the cycle */
if (node->parent->index < node->lowest_index)
{
/** Root node found */
node->lowest_index = node->parent->index;
}
} }
else else
{ {
/** Node connects to an already connected cycle, it can't be a part of it */ // Has master servers, need to investigate.
node->lowest_index = -1; stack->push_back(node);
} node_info.in_stack = true;
if (node->active && node->parent && node->lowest_index > 0) for (auto iter = node_info.parents.begin(); iter != node_info.parents.end(); iter++)
{
if (node->lowest_index == node->index &&
node->lowest_index == node->parent->lowest_index)
{ {
/** NodeData& parent_node = (*iter)->m_node;
* Found a multi-node cycle from the graph. The cycle is formed from the if (parent_node.index == NodeData::INDEX_NOT_VISITED)
* nodes with a lowest_index value equal to the lowest_index value of the
* current node. Rest of the nodes on the stack are not part of a cycle
* and can be discarded.
*/
*cycle += 1;
while (*stacksize > 0)
{ {
struct graph_node *top = stack[(*stacksize) - 1]; /** Node has not been visited, so recurse. */
top->active = false; tarjan_ssc_visit_node((*iter), stack, index, cycle);
node_info.lowest_index = MXS_MIN(node_info.lowest_index, parent_node.lowest_index);
}
else if (parent_node.in_stack)
{
/* The parent node has been visited and is still on the stack. We have a cycle. */
node_info.lowest_index = MXS_MIN(node_info.lowest_index, parent_node.index);
}
if (top->lowest_index == node->lowest_index) /* If parent_node.active==false, the parent has been visited, but is not in the current stack.
* This means that while there is a route from this node to the parent, there is no route
* from the parent to this node. No cycle. */
}
/* At the end of a visit to node, leave this node on the stack if it has a path to a node earlier
* on the stack (index > lowest_index). Otherwise, start popping elements. */
if (node_info.index == node_info.lowest_index)
{
int cycle_size = 0; // Keep track of cycle size since we don't mark one-node cycles.
while (true)
{
ss_dassert(!stack->empty());
NodeData& cycle_node = stack->back()->m_node;
stack->pop_back();
cycle_node.in_stack = false;
cycle_size++;
if (cycle_node.index == node_info.index) // Last node in cycle
{ {
top->cycle = *cycle; if (cycle_size > 1)
{
cycle_node.cycle = *cycle;
// All cycle elements popped. Next cycle...
*cycle = *cycle + 1;
}
break;
}
else
{
cycle_node.cycle = *cycle; // Has more nodes, mark cycle.
} }
*stacksize -= 1;
} }
} }
} }
else }
void MariaDBMonitor::build_replication_graph()
{
/* Here, all slave connections are added to the graph, even if the IO thread cannot connect. Strictly
* speaking, building the parents-array is not required as the data already exists. This construction
* is more for convenience and faster access. */
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
{ {
/** Pop invalid nodes off the stack */ /* All servers are accepted in this loop, even if the server is [Down] or [Maintenance]. For these
node->active = false; * servers, we just use the latest available information. Not adding such servers could suddenly
if (*stacksize > 0) * change the topology quite a bit and all it would take is a momentarily network failure. */
MariaDBServer* server = *iter;
server->m_node.reset();
for (auto iter_ss = server->m_slave_status.begin(); iter_ss != server->m_slave_status.end();
iter_ss++)
{ {
*stacksize -= 1; SlaveStatus& slave_conn = *iter_ss;
/* We always trust the "Master_Server_Id"-field of the SHOW SLAVE STATUS output, as long as
* the id is > 0 (server uses 0 for default). This means that the graph constructed is faulty if
* an old "Master_Server_Id"- value is read from a slave which is still trying to connect to
* a new master. However, a server is only designated [Slave] if both IO- and SQL-threads are
* running fine, so the faulty graph does not cause wrong status settings. */
auto master_id = slave_conn.master_server_id;
if (slave_conn.slave_io_running != SlaveStatus::SLAVE_IO_NO && master_id > 0)
{
// Valid slave connection, find the MariaDBServer with this id.
auto found = get_server(master_id);
if (found != NULL)
{
server->m_node.parents.push_back(found);
found->m_node.children.push_back(server);
}
else
{
// This is an external master connection. Save just the master id for now.
server->m_node.external_masters.push_back(master_id);
}
}
} }
} }
} }
@ -345,74 +367,47 @@ static void visit_node(struct graph_node *node, struct graph_node **stack,
*/ */
void MariaDBMonitor::find_graph_cycles() void MariaDBMonitor::find_graph_cycles()
{ {
const int nservers = m_servers.size(); build_replication_graph();
struct graph_node graph[nservers]; ServerArray stack;
struct graph_node *stack[nservers]; // The next items need to be passed around in the recursive calls to keep track of algorithm state.
int nodes = 0; int index = 1; // Node visit index.
int cycle = 1; // If cycles are found, the nodes in the cycle are given an identical cycle index.
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
{ {
MariaDBServer* server = *iter; /** Index is 0, this node has not yet been visited. */
graph[nodes].info = server; if ((*iter)->m_node.index == NodeData::INDEX_NOT_VISITED)
graph[nodes].db = server->m_server_base;
graph[nodes].index = graph[nodes].lowest_index = 0;
graph[nodes].cycle = 0;
graph[nodes].active = false;
graph[nodes].parent = NULL;
nodes++;
}
/** Build the graph */
for (int i = 0; i < nservers; i++)
{
if (!graph[i].info->m_slave_status.empty() && graph[i].info->m_slave_status[0].master_server_id > 0)
{ {
/** Found a connected node */ tarjan_ssc_visit_node(*iter, &stack, &index, &cycle);
for (int k = 0; k < nservers; k++)
{
if (graph[k].info->m_server_id == graph[i].info->m_slave_status[0].master_server_id)
{
graph[i].parent = &graph[k];
break;
}
}
} }
} }
int index = 1; assign_cycle_roles(cycle);
int cycle = 0; }
int stacksize = 0;
for (int i = 0; i < nservers; i++) void MariaDBMonitor::assign_cycle_roles(int cycle)
{
// TODO: This part needs to be rewritten as it's faulty. And moved elsewhere.
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
{ {
if (graph[i].index == 0) MariaDBServer& server = **iter;
{ MXS_MONITORED_SERVER* mon_srv = server.m_server_base;
/** Index is 0, this node has not yet been visited */ if (server.m_node.cycle > 0)
visit_node(&graph[i], stack, &stacksize, &index, &cycle);
}
}
for (int i = 0; i < nservers; i++)
{
graph[i].info->m_group = graph[i].cycle;
if (graph[i].cycle > 0)
{ {
/** We have at least one cycle in the graph */ /** We have at least one cycle in the graph */
if (graph[i].info->m_read_only) if (server.m_read_only)
{ {
monitor_set_pending_status(graph[i].db, SERVER_SLAVE | SERVER_STALE_SLAVE); monitor_set_pending_status(mon_srv, SERVER_SLAVE | SERVER_STALE_SLAVE);
monitor_clear_pending_status(graph[i].db, SERVER_MASTER); monitor_clear_pending_status(mon_srv, SERVER_MASTER);
} }
else else
{ {
monitor_set_pending_status(graph[i].db, SERVER_MASTER); monitor_set_pending_status(mon_srv, SERVER_MASTER);
monitor_clear_pending_status(graph[i].db, SERVER_SLAVE | SERVER_STALE_SLAVE); monitor_clear_pending_status(mon_srv, SERVER_SLAVE | SERVER_STALE_SLAVE);
} }
} }
else if (m_detect_stale_master && cycle == 0 && else if (m_detect_stale_master && cycle == 1 && mon_srv->mon_prev_status & SERVER_MASTER &&
graph[i].db->mon_prev_status & SERVER_MASTER && (mon_srv->pending_status & SERVER_MASTER) == 0)
(graph[i].db->pending_status & SERVER_MASTER) == 0)
{ {
/** /**
* Stale master detection is handled here for multi-master mode. * Stale master detection is handled here for multi-master mode.
@ -424,16 +419,16 @@ void MariaDBMonitor::find_graph_cycles()
* slave in this case can be either a normal slave or another * slave in this case can be either a normal slave or another
* master. * master.
*/ */
if (graph[i].info->m_read_only) if (server.m_read_only)
{ {
/** The master is in read-only mode, set it into Slave state */ /** The master is in read-only mode, set it into Slave state */
monitor_set_pending_status(graph[i].db, SERVER_SLAVE | SERVER_STALE_SLAVE); monitor_set_pending_status(mon_srv, SERVER_SLAVE | SERVER_STALE_SLAVE);
monitor_clear_pending_status(graph[i].db, SERVER_MASTER | SERVER_STALE_STATUS); monitor_clear_pending_status(mon_srv, SERVER_MASTER | SERVER_STALE_STATUS);
} }
else else
{ {
monitor_set_pending_status(graph[i].db, SERVER_MASTER | SERVER_STALE_STATUS); monitor_set_pending_status(mon_srv, SERVER_MASTER | SERVER_STALE_STATUS);
monitor_clear_pending_status(graph[i].db, SERVER_SLAVE | SERVER_STALE_SLAVE); monitor_clear_pending_status(mon_srv, SERVER_SLAVE | SERVER_STALE_SLAVE);
} }
} }
} }
@ -784,7 +779,7 @@ void MariaDBMonitor::assign_relay_master(MariaDBServer& candidate)
if (ptr->server->node_id > 0 && ptr->server->master_id > 0 && if (ptr->server->node_id > 0 && ptr->server->master_id > 0 &&
getSlaveOfNodeId(ptr->server->node_id, REJECT_DOWN) && getSlaveOfNodeId(ptr->server->node_id, REJECT_DOWN) &&
getServerByNodeId(ptr->server->master_id) && getServerByNodeId(ptr->server->master_id) &&
(!m_detect_multimaster || candidate.m_group == 0)) (!m_detect_multimaster || candidate.m_node.cycle == 0))
{ {
/** This server is both a slave and a master i.e. a relay master */ /** This server is both a slave and a master i.e. a relay master */
monitor_set_pending_status(ptr, SERVER_RELAY_MASTER); monitor_set_pending_status(ptr, SERVER_RELAY_MASTER);

View File

@ -94,6 +94,7 @@ void MariaDBMonitor::clear_server_info()
// All MariaDBServer*:s are now invalid, as well as any dependant data. // All MariaDBServer*:s are now invalid, as well as any dependant data.
m_servers.clear(); m_servers.clear();
m_server_info.clear(); m_server_info.clear();
m_servers_by_id.clear();
m_excluded_servers.clear(); m_excluded_servers.clear();
m_master = NULL; m_master = NULL;
m_master_gtid_domain = GTID_DOMAIN_UNKNOWN; m_master_gtid_domain = GTID_DOMAIN_UNKNOWN;
@ -114,6 +115,12 @@ MariaDBServer* MariaDBMonitor::get_server_info(MXS_MONITORED_SERVER* db)
return m_server_info[db]; return m_server_info[db];
} }
MariaDBServer* MariaDBMonitor::get_server(int64_t id)
{
auto found = m_servers_by_id.find(id);
return (found != m_servers_by_id.end()) ? (*found).second : NULL;
}
bool MariaDBMonitor::set_replication_credentials(const MXS_CONFIG_PARAMETER* params) bool MariaDBMonitor::set_replication_credentials(const MXS_CONFIG_PARAMETER* params)
{ {
bool rval = false; bool rval = false;
@ -319,11 +326,18 @@ void MariaDBMonitor::main()
* all the time. */ * all the time. */
release_monitor_servers(m_monitor); release_monitor_servers(m_monitor);
// Query all servers for their status. // Query all servers for their status. Update the server id array.
m_servers_by_id.clear();
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
{ {
MariaDBServer* server = *iter; MariaDBServer* server = *iter;
update_server_status(server->m_server_base); update_server_status(server->m_server_base);
if (server->m_server_id != SERVER_ID_UNKNOWN)
{
IdToServerMap::value_type new_val(server->m_server_id, server);
m_servers_by_id.insert(new_val);
}
} }
// Use the information to find the so far best master server. // Use the information to find the so far best master server.

View File

@ -30,8 +30,8 @@ class MariaDBMonitor;
// Map of base struct to MariaDBServer. Does not own the server objects. May not be needed at the end. // Map of base struct to MariaDBServer. Does not own the server objects. May not be needed at the end.
typedef std::tr1::unordered_map<MXS_MONITORED_SERVER*, MariaDBServer*> ServerInfoMap; typedef std::tr1::unordered_map<MXS_MONITORED_SERVER*, MariaDBServer*> ServerInfoMap;
// Server pointer array // Map of server id:s to MariaDBServer. Useful when constructing the replication graph.
typedef std::vector<MariaDBServer*> ServerArray; typedef std::tr1::unordered_map<int64_t, MariaDBServer*> IdToServerMap;
// MariaDB Monitor instance data // MariaDB Monitor instance data
class MariaDBMonitor : public maxscale::MonitorInstance class MariaDBMonitor : public maxscale::MonitorInstance
@ -105,6 +105,7 @@ private:
// Values updated by monitor // Values updated by monitor
MariaDBServer* m_master; /**< Master server for Master/Slave replication */ MariaDBServer* m_master; /**< Master server for Master/Slave replication */
IdToServerMap m_servers_by_id; /**< Map from server id:s to MariaDBServer */
int64_t m_master_gtid_domain; /**< gtid_domain_id most recently seen on the master */ int64_t m_master_gtid_domain; /**< gtid_domain_id most recently seen on the master */
std::string m_external_master_host; /**< External master host, for fail/switchover */ std::string m_external_master_host; /**< External master host, for fail/switchover */
int m_external_master_port; /**< External master port */ int m_external_master_port; /**< External master port */
@ -154,6 +155,7 @@ private:
bool configure(const MXS_CONFIG_PARAMETER* params); bool configure(const MXS_CONFIG_PARAMETER* params);
bool set_replication_credentials(const MXS_CONFIG_PARAMETER* params); bool set_replication_credentials(const MXS_CONFIG_PARAMETER* params);
MariaDBServer* get_server_info(MXS_MONITORED_SERVER* db); MariaDBServer* get_server_info(MXS_MONITORED_SERVER* db);
MariaDBServer* get_server(int64_t id);
// Cluster discovery and status assignment methods // Cluster discovery and status assignment methods
MariaDBServer* find_root_master(); MariaDBServer* find_root_master();
@ -173,6 +175,10 @@ private:
void check_maxscale_schema_replication(); void check_maxscale_schema_replication();
MXS_MONITORED_SERVER* getServerByNodeId(long); MXS_MONITORED_SERVER* getServerByNodeId(long);
MXS_MONITORED_SERVER* getSlaveOfNodeId(long, slave_down_setting_t); MXS_MONITORED_SERVER* getSlaveOfNodeId(long, slave_down_setting_t);
void build_replication_graph();
void tarjan_ssc_visit_node(MariaDBServer *node, ServerArray* stack, int *index,
int *cycle);
void assign_cycle_roles(int cycle);
// Switchover methods // Switchover methods
bool switchover_check(SERVER* new_master, SERVER* current_master, bool switchover_check(SERVER* new_master, SERVER* current_master,

View File

@ -43,7 +43,6 @@ MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server)
, m_print_update_errormsg(true) , m_print_update_errormsg(true)
, m_version(version::UNKNOWN) , m_version(version::UNKNOWN)
, m_server_id(SERVER_ID_UNKNOWN) , m_server_id(SERVER_ID_UNKNOWN)
, m_group(0)
, m_read_only(false) , m_read_only(false)
, m_n_slaves_running(0) , m_n_slaves_running(0)
, m_n_slave_heartbeats(0) , m_n_slave_heartbeats(0)
@ -54,6 +53,24 @@ MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server)
ss_dassert(monitored_server); ss_dassert(monitored_server);
} }
NodeData::NodeData()
: index(INDEX_NOT_VISITED)
, lowest_index(INDEX_NOT_VISITED)
, cycle(INDEX_NOT_VISITED)
, in_stack(false)
{}
void NodeData::reset()
{
index = INDEX_NOT_VISITED;
lowest_index = INDEX_NOT_VISITED;
cycle = INDEX_NOT_VISITED;
in_stack = false;
parents.clear();
children.clear();
external_masters.clear();
}
int64_t MariaDBServer::relay_log_events() int64_t MariaDBServer::relay_log_events()
{ {
/* The events_ahead-call below ignores domains where current_pos is ahead of io_pos. This situation is /* The events_ahead-call below ignores domains where current_pos is ahead of io_pos. This situation is
@ -452,7 +469,7 @@ string MariaDBServer::diagnostics(bool multimaster) const
} }
if (multimaster) if (multimaster)
{ {
ss << "Master group: " << m_group << "\n"; ss << "Master group: " << m_node.cycle << "\n";
} }
return ss.str(); return ss.str();
} }
@ -486,7 +503,7 @@ json_t* MariaDBServer::diagnostics_json(bool multimaster) const
} }
if (multimaster) if (multimaster)
{ {
json_object_set_new(srv, "master_group", json_integer(m_group)); json_object_set_new(srv, "master_group", json_integer(m_node.cycle));
} }
return srv; return srv;
} }

View File

@ -25,6 +25,9 @@ enum print_repl_warnings_t
}; };
class QueryResult; class QueryResult;
class MariaDBServer;
// Server pointer array
typedef std::vector<MariaDBServer*> ServerArray;
// Contains data returned by one row of SHOW ALL SLAVES STATUS // Contains data returned by one row of SHOW ALL SLAVES STATUS
class SlaveStatus class SlaveStatus
@ -67,6 +70,29 @@ public:
{} {}
}; };
/**
* Data required for checking replication topology cycles. Not all of the listed data is used yet.
*/
struct NodeData
{
static const int INDEX_NOT_VISITED = 0;
int index; /* Marks the order in which this node was visited. */
int lowest_index; /* The lowest index node this node has in its subtree. */
int cycle; /* Which cycle is this node part of, if any. */
bool in_stack; /* Is this node currently is the search stack. */
ServerArray parents; /* Which nodes is this node replicating from. External masters excluded. */
ServerArray children;/* Which nodes are replicating from this node. */
std::vector<int64_t> external_masters; /* Server id:s of external masters. */
NodeData();
/**
* Reset the data to default values
*/
void reset();
};
/** /**
* Monitor specific information about a server. Eventually, this will be the primary data structure handled * Monitor specific information about a server. Eventually, this will be the primary data structure handled
* by the monitor. These are initialized in @c init_server_info. * by the monitor. These are initialized in @c init_server_info.
@ -91,8 +117,7 @@ public:
bool m_print_update_errormsg;/**< Should an update error be printed. */ bool m_print_update_errormsg;/**< Should an update error be printed. */
version m_version; /**< Server version/type. */ version m_version; /**< Server version/type. */
int64_t m_server_id; /**< Value of @@server_id. Valid values are 32bit unsigned. */ int64_t m_server_id; /**< Value of @@server_id. Valid values are 32bit unsigned. */
int m_group; /**< Multi-master group where this server belongs,
* 0 for servers not in groups */
bool m_read_only; /**< Value of @@read_only */ bool m_read_only; /**< Value of @@read_only */
size_t m_n_slaves_running; /**< Number of running slave connections */ size_t m_n_slaves_running; /**< Number of running slave connections */
int m_n_slave_heartbeats; /**< Number of received heartbeats */ int m_n_slave_heartbeats; /**< Number of received heartbeats */
@ -105,6 +130,7 @@ public:
SlaveStatusArray m_slave_status; /**< Data returned from SHOW SLAVE STATUS */ SlaveStatusArray m_slave_status; /**< Data returned from SHOW SLAVE STATUS */
ReplicationSettings m_rpl_settings; /**< Miscellaneous replication related settings */ ReplicationSettings m_rpl_settings; /**< Miscellaneous replication related settings */
NodeData m_node; /**< Replication topology data */
MariaDBServer(MXS_MONITORED_SERVER* monitored_server); MariaDBServer(MXS_MONITORED_SERVER* monitored_server);
/** /**