From 6bf10904d73e00b8f40c7f9fd1e7692c01b42190 Mon Sep 17 00:00:00 2001
From: Esa Korhonen <esa.korhonen@mariadb.com>
Date: Wed, 20 Jun 2018 15:26:27 +0300
Subject: [PATCH] MXS-1845 Only rebuild topology when required

The monitor now detects when a server has changed such that a replication
graph rebuild is needed and only then rebuilds the graph and detects
cycles and master.

Also, some old code is no longer called in the monitor cycle. It will be
removed in later commits. Refactored some of the related functions.
---
 .../monitor/mariadbmon/cluster_discovery.cc   |  54 ++--
 .../modules/monitor/mariadbmon/mariadbmon.cc  | 230 +++++++++---------
 .../modules/monitor/mariadbmon/mariadbmon.hh  |   6 +-
 .../monitor/mariadbmon/mariadbserver.cc       |  87 +++++--
 .../monitor/mariadbmon/mariadbserver.hh       |   9 +-
 5 files changed, 224 insertions(+), 162 deletions(-)

diff --git a/server/modules/monitor/mariadbmon/cluster_discovery.cc b/server/modules/monitor/mariadbmon/cluster_discovery.cc
index 5332fbe48..75a888551 100644
--- a/server/modules/monitor/mariadbmon/cluster_discovery.cc
+++ b/server/modules/monitor/mariadbmon/cluster_discovery.cc
@@ -27,8 +27,6 @@ static bool check_replicate_wild_do_table(MXS_MONITORED_SERVER* database);
 static bool check_replicate_wild_ignore_table(MXS_MONITORED_SERVER* database);
 
 static const char HB_TABLE_NAME[] = "maxscale_schema.replication_heartbeat";
-static const char SERVER_DISQUALIFIED[] = "Server '%s' was disqualified from new master selection because "
-                                          "it is %s.";
 static const int64_t MASTER_BITS = SERVER_MASTER | SERVER_WAS_MASTER;
 static const int64_t SLAVE_BITS = SERVER_SLAVE | SERVER_WAS_SLAVE;
 
@@ -383,6 +381,9 @@ void MariaDBMonitor::build_replication_graph()
              * an old "Master_Server_Id"- value is read from a slave which is still trying to connect to
              * a new master. However, a server is only designated [Slave] if both IO- and SQL-threads are
              * running fine, so the faulty graph does not cause wrong status settings. */
+
+            /* IF THIS PART IS CHANGED, CHANGE THE COMPARISON IN 'sstatus_arrays_topology_equal'
+             * (in MariaDBServer) accordingly so that any possible topology changes are detected. */
             auto master_id = slave_conn.master_server_id;
             if (slave_conn.slave_io_running != SlaveStatus::SLAVE_IO_NO && master_id > 0)
             {
@@ -977,15 +978,20 @@ static string disqualify_reasons_to_string(MariaDBServer* disqualified)
  * the current master making it unsuitable. Because of this, the method can be quite vocal and not
  * consider the previous master.
  *
+ * @param msg_out Message output. Includes explanations on why potential candidates were not selected.
  * @return The master with most slaves
  */
-MariaDBServer* MariaDBMonitor::find_topology_master_server()
+MariaDBServer* MariaDBMonitor::find_topology_master_server(string* msg_out)
 {
     /* Finding the best master server may get somewhat tricky if the graph is complicated. The general
      * criteria for the best master is that it reaches the most slaves (possibly in multiple layers and
      * cycles). To avoid having to calculate this reachability (doable by a recursive search) to all nodes,
      * let's use the knowledge that the best master is either a server with no masters (external ones don't
-     * count) or is part of a cycle. The server must be running and writable to be eligible. */
+     * count) or is part of a cycle with no out-cycle masters. The server must be running and writable
+     * to be eligible. */
+    string messages;
+    string separator;
+    const char disq[] = "is not a valid master candidate because it is ";
     ServerArray master_candidates;
     for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
     {
@@ -999,7 +1005,8 @@ MariaDBServer* MariaDBMonitor::find_topology_master_server()
             else
             {
                 string reasons = disqualify_reasons_to_string(server);
-                MXS_WARNING(SERVER_DISQUALIFIED, server->name(), reasons.c_str());
+                messages += separator + "'" + server->name() + "' " + disq + reasons + ".";
+                separator = "\n";
             }
         }
     }
@@ -1021,31 +1028,25 @@ MariaDBServer* MariaDBMonitor::find_topology_master_server()
             else
             {
                 // No single server in the cycle was viable.
-                const char WARN_MSG[] = "No valid master server could be found  in the cycle with "
-                                        "servers '%s'.";
+                const char no_valid_servers[] = "No valid master server could be found in the cycle with "
+                                                "servers";
                 string server_names = monitored_servers_to_string(cycle_members);
-                MXS_WARNING(WARN_MSG, server_names.c_str());
+                messages += separator + no_valid_servers + " '" + server_names + "'.";
+                separator = "\n";
 
                 for (auto iter2 = cycle_members.begin(); iter2 != cycle_members.end(); iter2++)
                 {
                     MariaDBServer* disqualified_server = *iter2;
                     string reasons = disqualify_reasons_to_string(disqualified_server);
-                    MXS_WARNING(SERVER_DISQUALIFIED, disqualified_server->name(), reasons.c_str());
+                    messages += separator + "'" + disqualified_server->name() + "' " + disq + reasons + ".";
+                    separator = "\n";
                 }
             }
         }
     }
 
-    MariaDBServer* found_master = NULL;
-    if (!master_candidates.empty())
-    {
-        found_master = find_best_reach_server(master_candidates);
-    }
-    else
-    {
-        MXS_WARNING("No valid master servers in the cluster.");
-    }
-    return found_master;
+    *msg_out = messages;
+    return master_candidates.empty() ? NULL : find_best_reach_server(master_candidates);
 }
 
 static void node_reach_visit(MariaDBServer* node, int* reach)
@@ -1176,7 +1177,7 @@ void MariaDBMonitor::assign_slave_and_relay_master(MariaDBServer* node)
     {
         MariaDBServer* slave = *iter;
         // If the node has an index, it has already been labeled master/slave and visited. Even when this
-        // is the case, the slave has to be checked to get correct [Relay Master] labels.
+        // is the case, the node has to be checked to get correct [Relay Master] labels.
         if (slave->m_node.index == NodeData::INDEX_NOT_VISITED)
         {
             slave->clear_status(MASTER_BITS);
@@ -1214,10 +1215,15 @@ void MariaDBMonitor::assign_slave_and_relay_master(MariaDBServer* node)
     }
 
     // Finally, if the node itself is a slave and has slaves of its own, label it as relay slave.
-    if ((node->m_server_base->pending_status & SERVER_SLAVE) && slaves > 0)
+    if (node->has_status(SERVER_SLAVE) && slaves > 0)
     {
         node->set_status(SERVER_RELAY_MASTER);
     }
+    // If the node is a binlog relay, remove any slave bits that may have been set. Relay master bit can stay.
+    if (node->m_version == MariaDBServer::version::BINLOG_ROUTER)
+    {
+        node->clear_status(SERVER_SLAVE);
+    }
 }
 
 /**
@@ -1248,7 +1254,7 @@ bool MariaDBMonitor::master_no_longer_valid(std::string* reason_out)
         if (!m_master->m_node.parents.empty())
         {
             rval = true;
-            *reason_out = "it has started replicating from another server in the cluster.";
+            *reason_out = "it has started replicating from another server in the cluster";
         }
     }
     // 4) The master was part of a cycle but is no longer, or one of the servers in the cycle is
@@ -1265,7 +1271,7 @@ bool MariaDBMonitor::master_no_longer_valid(std::string* reason_out)
             rval = true;
             ServerArray& old_members = m_master_cycle_status.cycle_members;
             string server_names_old = monitored_servers_to_string(old_members);
-            *reason_out = "it is no longer in the multimaster group (" + server_names_old + ").";
+            *reason_out = "it is no longer in the multimaster group (" + server_names_old + ")";
         }
         // 4b) The master is still in a cycle but the cycle has gained a master outside of the cycle.
         else
@@ -1276,7 +1282,7 @@ bool MariaDBMonitor::master_no_longer_valid(std::string* reason_out)
                 rval = true;
                 string server_names_current = monitored_servers_to_string(current_members);
                 *reason_out = "a server in the master's multimaster group (" + server_names_current +
-                    ") is replicating from a server not in the group.";
+                    ") is replicating from a server not in the group";
             }
         }
     }
diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc
index 890503a28..25efbbc63 100644
--- a/server/modules/monitor/mariadbmon/mariadbmon.cc
+++ b/server/modules/monitor/mariadbmon/mariadbmon.cc
@@ -293,6 +293,7 @@ json_t* MariaDBMonitor::diagnostics_json() const
  */
 void MariaDBMonitor::update_server(MariaDBServer& server)
 {
+    server.m_topology_changed = false;
     MXS_MONITORED_SERVER* mon_srv = server.m_server_base;
     /* Monitor server if not in maintenance. */
     bool in_maintenance = server.is_in_maintenance();
@@ -358,11 +359,22 @@ void MariaDBMonitor::update_server(MariaDBServer& server)
 
 void MariaDBMonitor::pre_loop()
 {
-    // MonitorInstance loaded from the journal the current master into its
-    // m_master member variable, we want the corresponding MariaDBServer into
-    // our own m_master varaible.
+    // MonitorInstance read the journal and has the last known master in its m_master member variable.
+    // Write the corresponding MariaDBServer into the class-specific m_master variable.
     m_master = MonitorInstance::m_master ? get_server_info(MonitorInstance::m_master) : NULL;
 
+    /* It's possible (e.g. after switchover) that the MXS_MONITORED_SERVER-objects have live connections
+     * from last time the monitor was active. These should be closed to avoid confusing the monitor and
+     * making it clear this is a new start. This can be removed once monitor pause/resume is implemented. */
+    for (MariaDBServer* server : m_servers)
+    {
+        if (server->m_server_base->con)
+        {
+            mysql_close(server->m_server_base->con);
+            server->m_server_base->con = NULL;
+        }
+    }
+
     if (m_detect_replication_lag)
     {
         check_maxscale_schema_replication();
@@ -382,68 +394,115 @@ void MariaDBMonitor::tick()
         mon_srv->mon_prev_status = status;
     }
 
-    // Query all servers for their status. Update the server id array.
-    m_servers_by_id.clear();
+    // Query all servers for their status.
+    bool topology_changed = false;
     for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
     {
         MariaDBServer* server = *iter;
         update_server(*server);
-
-        if (server->m_server_id != SERVER_ID_UNKNOWN)
+        if (server->m_topology_changed)
         {
-            IdToServerMap::value_type new_val(server->m_server_id, server);
-            m_servers_by_id.insert(new_val);
+            topology_changed = true;
         }
     }
 
-    build_replication_graph();
-    find_graph_cycles();
-    string reason;
-    MariaDBServer* root_master = m_master; // TODO: Refactor this out by reducing use of root_master
-    if (master_no_longer_valid(&reason))
+    if (topology_changed)
     {
-        if (m_master && !reason.empty())
+        // This means that a server id or a slave connection has changed, or read_only was set.
+        // Update the server id array and check various things.
+        m_servers_by_id.clear();
+        for (auto server : m_servers)
         {
-            MXS_WARNING("The previous master server '%s' is no longer a valid master because %s",
-                        m_master->name(), reason.c_str());
+            m_servers_by_id[server->m_server_id] = server;
         }
+        build_replication_graph();
+        find_graph_cycles();
+        // Find the server that looks like it would be the best master. It does not yet overwrite the
+        // current master.
+        string topology_messages;
+        MariaDBServer* root_master = find_topology_master_server(&topology_messages);
 
-        // The current master is no longer ok (or it never was). Find another. Master changes are logged
-        // by the log_master_changes()-method.
-        root_master = find_topology_master_server();
-        if (root_master)
+        // Check if current master is still valid.
+        string reason;
+        if (master_no_longer_valid(&reason))
         {
-            m_master = root_master;
-            // A new master has been set. Save some data regarding the type of the master.
-            int new_cycle_id = m_master->m_node.cycle;
-            m_master_cycle_status.cycle_id = new_cycle_id;
-            if (new_cycle_id == NodeData::CYCLE_NONE)
+            if (m_master && !reason.empty())
             {
-                m_master_cycle_status.cycle_members.clear();
+                MXS_WARNING("The previous master server '%s' is no longer a valid master because %s. "
+                            "Selecting new master.", m_master->name(), reason.c_str());
             }
             else
             {
-                m_master_cycle_status.cycle_members = m_cycles[new_cycle_id];
+                MXS_NOTICE("Selecting master server.");
             }
+
+            // The current master is no longer ok (or it never was). Change the master, even though this may
+            // break replication. Master changes are logged by the log_master_changes()-method.
+            if (!topology_messages.empty())
+            {
+                MXS_WARNING("%s", topology_messages.c_str());
+            }
+
+            m_master = root_master;
+            if (m_master)
+            {
+                // A new master has been set. Save some data regarding the type of the master.
+                int new_cycle_id = m_master->m_node.cycle;
+                m_master_cycle_status.cycle_id = new_cycle_id;
+                if (new_cycle_id == NodeData::CYCLE_NONE)
+                {
+                    m_master_cycle_status.cycle_members.clear();
+                }
+                else
+                {
+                    m_master_cycle_status.cycle_members = m_cycles[new_cycle_id];
+                }
+                MXS_NOTICE("'%s' is the best master candidate.", m_master->name());
+            }
+            else
+            {
+                // The current master cannot be used and no proper candidate exists.
+                m_master_cycle_status.cycle_id = NodeData::CYCLE_NONE;
+                m_master_cycle_status.cycle_members.clear();
+                MXS_WARNING("No valid master servers found.");
+            }
+        }
+        else if (root_master && m_master != root_master)
+        {
+            // Master is still valid but it is no longer the best master. Print a warning.
+            MXS_WARNING("'%s' is a better master candidate than the current master '%s'. "
+                        "Master will change if '%s' is no longer a valid master.",
+                        root_master->name(), m_master->name(), m_master->name());
         }
     }
 
+    // Always re-assign master, slave etc bits as these depend on other factors outside topology
+    // (e.g. slave sql state).
     assign_master_and_slave();
 
     if (!m_ignore_external_masters)
     {
-        // Do a sweep through all the nodes in the cluster (even the master) and mark other states.
-        for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
+        // Do a sweep through all the nodes in the cluster (even the master) and mark external slaves.
+        for (MariaDBServer* server : m_servers)
         {
-            MariaDBServer* server = *iter;
             if (!server->m_node.external_masters.empty())
             {
                 server->set_status(SERVER_SLAVE_OF_EXT_MASTER);
             }
-            else
-            {
-                server->clear_status(SERVER_SLAVE_OF_EXT_MASTER);
-            }
+        }
+    }
+
+    /* Check if need to use standalone master. TODO: Rewrite these methods. */
+    if (m_detect_standalone_master)
+    {
+        if (standalone_master_required())
+        {
+            // Other servers have died, set last remaining server as master
+            set_standalone_master();
+        }
+        else
+        {
+            m_warn_set_standalone_master = true;
         }
     }
 
@@ -454,67 +513,13 @@ void MariaDBMonitor::tick()
         update_external_master();
     }
 
-    // Clear SERVER_SLAVE from binlog relays
-    for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
-    {
-        /* Remove SLAVE status if this server is a Binlog Server relay */
-        if ((*iter)->m_version == MariaDBServer::version::BINLOG_ROUTER)
-        {
-            monitor_clear_pending_status((*iter)->m_server_base, SERVER_SLAVE);
-        }
-    }
-
-    /* Update server status from monitor pending status on that server*/
-    for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
-    {
-        update_server_states(**iter, root_master);
-    }
-
-    /** Now that all servers have their status correctly set, we can check
-        if we need to use standalone master. */
-    if (m_detect_standalone_master)
-    {
-        if (standalone_master_required())
-        {
-            // Other servers have died, set last remaining server as master
-            if (set_standalone_master())
-            {
-                // Update the root_master to point to the standalone master
-                root_master = m_master;
-            }
-        }
-        else
-        {
-            m_warn_set_standalone_master = true;
-        }
-    }
-
-    if (root_master && root_master->is_master())
-    {
-        // Clear slave and stale slave status bits from current master
-        root_master->clear_status(SERVER_SLAVE | SERVER_WAS_SLAVE);
-
-        /**
-         * Clear external slave status from master if configured to do so.
-         * This allows parts of a multi-tiered replication setup to be used
-         * in MaxScale.
-         */
-        if (m_ignore_external_masters)
-        {
-            root_master->clear_status(SERVER_SLAVE_OF_EXT_MASTER);
-        }
-    }
-
-    ss_dassert(root_master == NULL || root_master == m_master);
-    ss_dassert(root_master == NULL ||
-               ((root_master->m_server_base->pending_status & (SERVER_SLAVE | SERVER_MASTER)) !=
-                (SERVER_SLAVE | SERVER_MASTER)));
+    // Sanity check. Master may not be both slave and master.
+    ss_dassert(m_master == NULL || !m_master->has_status(SERVER_SLAVE | SERVER_MASTER));
 
     /* Generate the replication heartbeat event by performing an update */
-    if (m_detect_replication_lag && root_master &&
-        (root_master->is_master() || root_master->is_relay_server()))
+    if (m_detect_replication_lag && m_master && m_master->is_master())
     {
-        measure_replication_lag(root_master);
+        measure_replication_lag();
     }
 
     // Update shared status. The next functions read the shared status. TODO: change the following
@@ -524,8 +529,7 @@ void MariaDBMonitor::tick()
         mon_srv->server->status = mon_srv->pending_status;
     }
 
-    /* log master detection failure of first master becomes available after failure */
-    log_master_changes(root_master);
+    log_master_changes();
 
     // Before exiting, we need to store the current master into the m_master
     // member variable of MonitorInstance so that the right server will be
@@ -540,7 +544,11 @@ void MariaDBMonitor::process_state_changes()
     m_cluster_modified = false;
     if (m_auto_failover)
     {
-        m_cluster_modified = handle_auto_failover();
+        if ((m_cluster_modified = handle_auto_failover()))
+        {
+            // Force a master selection on next monitor loop, otherwise the old master would stay.
+            m_master = NULL;
+        }
     }
 
     // Do not auto-join servers on this monitor loop if a failover (or any other cluster modification)
@@ -608,35 +616,29 @@ void MariaDBMonitor::update_external_master()
     }
 }
 
-void MariaDBMonitor::measure_replication_lag(MariaDBServer* root_master)
+void MariaDBMonitor::measure_replication_lag()
 {
-    ss_dassert(root_master);
-    MXS_MONITORED_SERVER* mon_root_master = root_master->m_server_base;
-    set_master_heartbeat(root_master);
-    for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
+    ss_dassert(m_master && m_master->is_master());
+    set_master_heartbeat(m_master);
+    for (MariaDBServer* slave : m_servers)
     {
-        MariaDBServer* server = *iter;
-        MXS_MONITORED_SERVER* ptr = server->m_server_base;
-        if ((!server->is_in_maintenance()) && server->is_running())
+        // No lag measurement for Binlog Server
+        if (slave->is_slave() &&
+            (slave->m_version == MariaDBServer::version::MARIADB_MYSQL_55 ||
+             slave->m_version == MariaDBServer::version::MARIADB_100))
         {
-            if (ptr->server->node_id != mon_root_master->server->node_id &&
-                (server->is_slave() || server->is_relay_server()) &&
-                (server->m_version == MariaDBServer::version::MARIADB_MYSQL_55 ||
-                 server->m_version == MariaDBServer::version::MARIADB_100)) // No select lag for Binlog Server
-            {
-                set_slave_heartbeat(server);
-            }
+            set_slave_heartbeat(slave);
         }
     }
 }
 
-void MariaDBMonitor::log_master_changes(MariaDBServer* root_master_server)
+void MariaDBMonitor::log_master_changes()
 {
-    MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->m_server_base : NULL;
+    MXS_MONITORED_SERVER* root_master = m_master ? m_master->m_server_base : NULL;
     if (root_master && mon_status_changed(root_master) &&
         !(root_master->pending_status & SERVER_WAS_MASTER))
     {
-        if ((root_master->pending_status & SERVER_MASTER) && root_master_server->is_running())
+        if ((root_master->pending_status & SERVER_MASTER) && m_master->is_running())
         {
             if (!(root_master->mon_prev_status & SERVER_WAS_MASTER) &&
                 !(root_master->pending_status & SERVER_MAINT))
diff --git a/server/modules/monitor/mariadbmon/mariadbmon.hh b/server/modules/monitor/mariadbmon/mariadbmon.hh
index 389866e3b..87c70d533 100644
--- a/server/modules/monitor/mariadbmon/mariadbmon.hh
+++ b/server/modules/monitor/mariadbmon/mariadbmon.hh
@@ -187,19 +187,19 @@ private:
     bool standalone_master_required();
     bool set_standalone_master();
     void assign_relay_master(MariaDBServer& serv_info);
-    void log_master_changes(MariaDBServer* root_master);
+    void log_master_changes();
     void update_gtid_domain();
     void update_external_master();
     void set_master_heartbeat(MariaDBServer*);
     void set_slave_heartbeat(MariaDBServer*);
-    void measure_replication_lag(MariaDBServer* root_master);
+    void measure_replication_lag();
     void check_maxscale_schema_replication();
     MXS_MONITORED_SERVER* getServerByNodeId(long);
     MXS_MONITORED_SERVER* getSlaveOfNodeId(long, slave_down_setting_t);
     void build_replication_graph();
     void tarjan_scc_visit_node(MariaDBServer *node, ServerArray* stack, int *index, int *cycle);
     void assign_cycle_roles(int cycle);
-    MariaDBServer* find_topology_master_server();
+    MariaDBServer* find_topology_master_server(std::string* msg_out);
     MariaDBServer* find_best_reach_server(const ServerArray& candidates);
     void calculate_node_reach(MariaDBServer* node);
     int calc_reach_visit_node(MariaDBServer* node);
diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc
index 895895d94..bef24160b 100644
--- a/server/modules/monitor/mariadbmon/mariadbserver.cc
+++ b/server/modules/monitor/mariadbmon/mariadbserver.cc
@@ -41,7 +41,6 @@ SlaveStatus::SlaveStatus()
 MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_index)
     : m_server_base(monitored_server)
     , m_config_index(config_index)
-    , m_print_update_errormsg(true)
     , m_version(version::UNKNOWN)
     , m_server_id(SERVER_ID_UNKNOWN)
     , m_read_only(false)
@@ -50,6 +49,7 @@ MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_
     , m_heartbeat_period(0)
     , m_latest_event(0)
     , m_gtid_domain_id(GTID_DOMAIN_UNKNOWN)
+    , m_print_update_errormsg(true)
 {
     ss_dassert(monitored_server);
 }
@@ -174,34 +174,31 @@ bool MariaDBServer::do_show_slave_status(string* errmsg_out)
         }
     }
 
-    m_slave_status.clear();
+    SlaveStatusArray slave_status_new;
     int nrunning = 0;
     while (result->next_row())
     {
-        SlaveStatus sstatus;
-        sstatus.master_host = result->get_string(i_master_host);
-        sstatus.master_port = result->get_uint(i_master_port);
+        SlaveStatus sstatus_row;
+        sstatus_row.master_host = result->get_string(i_master_host);
+        sstatus_row.master_port = result->get_uint(i_master_port);
         string last_io_error = result->get_string(i_last_io_error);
         string last_sql_error = result->get_string(i_last_sql_error);
-        sstatus.last_error = !last_io_error.empty() ? last_io_error : last_sql_error;
+        sstatus_row.last_error = !last_io_error.empty() ? last_io_error : last_sql_error;
 
-        sstatus.slave_io_running =
+        sstatus_row.slave_io_running =
             SlaveStatus::slave_io_from_string(result->get_string(i_slave_io_running));
-        sstatus.slave_sql_running = (result->get_string(i_slave_sql_running) == "Yes");
+        sstatus_row.slave_sql_running = (result->get_string(i_slave_sql_running) == "Yes");
+        sstatus_row.master_server_id = result->get_uint(i_master_server_id);
 
-        if (sstatus.slave_io_running == SlaveStatus::SLAVE_IO_YES)
+        if (sstatus_row.slave_io_running == SlaveStatus::SLAVE_IO_YES && sstatus_row.slave_sql_running)
         {
+            nrunning++;
             // TODO: Fix for multisource replication, check changes to IO_Pos here and save somewhere.
-            sstatus.master_server_id = result->get_uint(i_master_server_id);
-            if (sstatus.slave_sql_running)
-            {
-                nrunning++;
-            }
         }
 
         if (all_slaves_status)
         {
-            sstatus.name = result->get_string(i_connection_name);
+            sstatus_row.name = result->get_string(i_connection_name);
             auto heartbeats = result->get_uint(i_slave_rec_hbs);
             if (m_n_slave_heartbeats < heartbeats) // TODO: Fix for multisource replication
             {
@@ -214,12 +211,21 @@ bool MariaDBServer::do_show_slave_status(string* errmsg_out)
             if (!gtid_io_pos.empty() &&
                 (using_gtid == "Current_Pos" || using_gtid == "Slave_Pos"))
             {
-                sstatus.gtid_io_pos = GtidList::from_string(gtid_io_pos);
+                sstatus_row.gtid_io_pos = GtidList::from_string(gtid_io_pos);
             }
         }
-        m_slave_status.push_back(sstatus);
+        slave_status_new.push_back(sstatus_row);
     }
 
+    if (!sstatus_arrays_topology_equal(slave_status_new, m_slave_status))
+    {
+        m_topology_changed = true;
+    }
+
+    // Always write to m_slave_status. Even if the new status is equal by topology,
+    // gtid:s etc may have changed.
+    m_slave_status = std::move(slave_status_new);
+
     if (m_slave_status.empty())
     {
         /** Query returned no rows, replication is not configured */
@@ -308,9 +314,20 @@ bool MariaDBServer::read_server_variables(string* errmsg_out)
             server_id_parsed = SERVER_ID_UNKNOWN;
             rval = false;
         }
+        if (server_id_parsed != m_server_id)
+        {
+            m_server_id = server_id_parsed;
+            m_topology_changed = true;
+        }
         database->server->node_id = server_id_parsed;
-        m_server_id = server_id_parsed;
-        m_read_only = result->get_bool(i_ro);
+
+        bool read_only_parsed = result->get_bool(i_ro);
+        if (read_only_parsed != m_read_only)
+        {
+            m_read_only = read_only_parsed;
+            m_topology_changed = true;
+        }
+
         if (columns == 3)
         {
             int64_t domain_id_parsed = result->get_uint(i_domain);
@@ -894,6 +911,38 @@ void MariaDBServer::set_status(uint64_t bits)
     monitor_set_pending_status(m_server_base, bits);
 }
 
+/**
+ * Compare if two slave status arrays are equal. Only compares the parts relevant for building replication
+ * topology: master server id:s and slave connection io states.
+ *
+ * @param lhs Left hand side
+ * @param rhs Right hand side
+ * @return True if equal
+ */
+bool MariaDBServer::sstatus_arrays_topology_equal(const SlaveStatusArray& lhs, const SlaveStatusArray& rhs)
+{
+    bool rval = true;
+    if (lhs.size() != rhs.size())
+    {
+        rval = false;
+    }
+    else
+    {
+        for (size_t i = 0; i < lhs.size(); i++)
+        {
+            // It's enough to check just the following two items, as these are used in
+            // 'build_replication_graph'.
+            if (lhs[i].slave_io_running != rhs[i].slave_io_running ||
+                lhs[i].master_server_id != rhs[i].master_server_id)
+            {
+                rval = false;
+                break;
+            }
+        }
+    }
+    return rval;
+}
+
 string SlaveStatus::to_string() const
 {
     using std::setw;
diff --git a/server/modules/monitor/mariadbmon/mariadbserver.hh b/server/modules/monitor/mariadbmon/mariadbserver.hh
index 20a0739a3..24ff72420 100644
--- a/server/modules/monitor/mariadbmon/mariadbserver.hh
+++ b/server/modules/monitor/mariadbmon/mariadbserver.hh
@@ -132,7 +132,7 @@ public:
                                               *  own the struct, it is not freed (or connection closed) when
                                               *  a MariaDBServer is destroyed. Can be const on gcc 4.8 */
     int             m_config_index;         /**< What position this server has in the monitor config */
-    bool            m_print_update_errormsg;/**< Should an update error be printed. */
+
     version         m_version;              /**< Server version/type. */
     int64_t         m_server_id;            /**< Value of @@server_id. Valid values are 32bit unsigned. */
     bool            m_read_only;            /**< Value of @@read_only */
@@ -144,9 +144,13 @@ public:
                                               *  new non-replicated events. */
     GtidList        m_gtid_current_pos;     /**< Gtid of latest event. */
     GtidList        m_gtid_binlog_pos;      /**< Gtid of latest event written to binlog. */
+    bool            m_topology_changed;     /**< Has anything that could affect replication topology changed
+                                              *  this iteration? Causes: server id, slave connections,
+                                              *  read-only. */
+    NodeData        m_node;                 /**< Replication topology data */
     SlaveStatusArray m_slave_status;        /**< Data returned from SHOW SLAVE STATUS */
     ReplicationSettings m_rpl_settings;     /**< Miscellaneous replication related settings */
-    NodeData        m_node;                 /**< Replication topology data */
+    bool            m_print_update_errormsg;/**< Should an update error be printed. */
 
     MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_index);
 
@@ -388,6 +392,7 @@ public:
 
 private:
     bool update_slave_status(std::string* errmsg_out = NULL);
+    static bool sstatus_arrays_topology_equal(const SlaveStatusArray& lhs, const SlaveStatusArray& rhs);
 };
 
 /**