From 9525d3507b5319a7b0e3471f1c5e430edb9aaa15 Mon Sep 17 00:00:00 2001
From: Esa Korhonen <esa.korhonen@mariadb.com>
Date: Thu, 21 Jun 2018 18:34:27 +0300
Subject: [PATCH] Run manual commands without stopping the monitor

The command is saved in a function object which is read by the monitor
thread. This way, manual and automatic cluster modification commands are
ran in the same step of a monitor cycle.

This update required several modifications in related code.
---
 .../monitor/mariadbmon/cluster_discovery.cc   |  19 +-
 .../mariadbmon/cluster_manipulation.cc        |  70 +-----
 .../modules/monitor/mariadbmon/mariadbmon.cc  | 215 ++++++++++++++----
 .../modules/monitor/mariadbmon/mariadbmon.hh  |  58 +++--
 .../monitor/mariadbmon/mariadbserver.cc       |   1 +
 5 files changed, 231 insertions(+), 132 deletions(-)

diff --git a/server/modules/monitor/mariadbmon/cluster_discovery.cc b/server/modules/monitor/mariadbmon/cluster_discovery.cc
index 75a888551..a16959534 100644
--- a/server/modules/monitor/mariadbmon/cluster_discovery.cc
+++ b/server/modules/monitor/mariadbmon/cluster_discovery.cc
@@ -1227,24 +1227,25 @@ void MariaDBMonitor::assign_slave_and_relay_master(MariaDBServer* node)
 }
 
 /**
- * Should a new master server be selected?
+ * Is the current master server still valid or should a new one be selected?
  *
  * @param reason_out Output for a text description
- * @return True, if the current master has changed in a way that a new master should be selected.
+ * @return True, if master is ok. False if the current master has changed in a way that
+ * a new master should be selected.
  */
-bool MariaDBMonitor::master_no_longer_valid(std::string* reason_out)
+bool MariaDBMonitor::master_is_valid(std::string* reason_out)
 {
     // The master server of the cluster needs to be re-calculated in the following four cases:
-    bool rval = false;
+    bool rval = true;
     // 1) There is no master.
     if (m_master == NULL)
     {
-        rval = true;
+        rval = false;
     }
     // 2) read_only has been activated on the master.
     else if (m_master->is_read_only())
     {
-        rval = true;
+        rval = false;
         *reason_out = "it is in read-only mode";
     }
     // 3) The master was a non-replicating master (not in a cycle) but now has a slave connection.
@@ -1253,7 +1254,7 @@ bool MariaDBMonitor::master_no_longer_valid(std::string* reason_out)
         // The master should not have a master of its own.
         if (!m_master->m_node.parents.empty())
         {
-            rval = true;
+            rval = false;
             *reason_out = "it has started replicating from another server in the cluster";
         }
     }
@@ -1268,7 +1269,7 @@ bool MariaDBMonitor::master_no_longer_valid(std::string* reason_out)
         // 4a) The master is no longer in a cycle.
         if (current_cycle_id == NodeData::CYCLE_NONE)
         {
-            rval = true;
+            rval = false;
             ServerArray& old_members = m_master_cycle_status.cycle_members;
             string server_names_old = monitored_servers_to_string(old_members);
             *reason_out = "it is no longer in the multimaster group (" + server_names_old + ")";
@@ -1279,7 +1280,7 @@ bool MariaDBMonitor::master_no_longer_valid(std::string* reason_out)
             ServerArray& current_members = m_cycles[current_cycle_id];
             if (cycle_has_master_server(current_members))
             {
-                rval = true;
+                rval = false;
                 string server_names_current = monitored_servers_to_string(current_members);
                 *reason_out = "a server in the master's multimaster group (" + server_names_current +
                     ") is replicating from a server not in the group";
diff --git a/server/modules/monitor/mariadbmon/cluster_manipulation.cc b/server/modules/monitor/mariadbmon/cluster_manipulation.cc
index be7a6f4a7..d844f15c3 100644
--- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc
+++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc
@@ -25,17 +25,6 @@ static void print_redirect_errors(MariaDBServer* first_server, const ServerArray
 
 bool MariaDBMonitor::manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out)
 {
-    bool running = is_running();
-    if (running)
-    {
-        stop();
-        MXS_NOTICE("Stopped the monitor %s for the duration of switchover.", m_monitor->name);
-    }
-    else
-    {
-        MXS_NOTICE("Monitor %s already stopped, switchover can proceed.", m_monitor->name);
-    }
-
     /* It's possible for either current_master, or both new_master & current_master to be NULL, which means
      * autoselect. Only autoselecting new_master is not possible. Autoselection will happen at the actual
      * switchover function. */
@@ -70,27 +59,11 @@ bool MariaDBMonitor::manual_switchover(SERVER* new_master, SERVER* current_maste
         }
     }
 
-    if (running)
-    {
-        // TODO: What if this fails?
-        start(m_monitor->parameters);
-    }
     return rval;
 }
 
 bool MariaDBMonitor::manual_failover(json_t** output)
 {
-    bool running = is_running();
-    if (running)
-    {
-        stop();
-        MXS_NOTICE("Stopped monitor %s for the duration of failover.", m_monitor->name);
-    }
-    else
-    {
-        MXS_NOTICE("Monitor %s already stopped, failover can proceed.", m_monitor->name);
-    }
-
     bool rv = true;
     string failover_error;
     rv = failover_check(&failover_error);
@@ -112,27 +85,11 @@ bool MariaDBMonitor::manual_failover(json_t** output)
             failover_error.c_str());
     }
 
-    if (running)
-    {
-        // TODO: What if this fails?
-        start(m_monitor->parameters);
-    }
     return rv;
 }
 
 bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
 {
-    bool running = is_running();
-    if (running)
-    {
-        stop();
-        MXS_NOTICE("Stopped monitor %s for the duration of rejoin.", m_monitor->name);
-    }
-    else
-    {
-        MXS_NOTICE("Monitor %s already stopped, rejoin can proceed.", m_monitor->name);
-    }
-
     bool rval = false;
     if (cluster_can_be_joined())
     {
@@ -188,11 +145,6 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
         PRINT_MXS_JSON_ERROR(output, BAD_CLUSTER, m_monitor->name);
     }
 
-    if (running)
-    {
-        // TODO: What if this fails?
-        start(m_monitor->parameters);
-    }
     return rval;
 }
 
@@ -351,6 +303,7 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers, json_t**
             if (op_success)
             {
                 servers_joined++;
+                m_cluster_modified = true;
             }
         }
     }
@@ -587,6 +540,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer
     // Step 2: Set read-only to on, flush logs, update master gtid:s
     if (switchover_demote_master(demotion_target, err_out))
     {
+        m_cluster_modified = true;
         bool catchup_and_promote_success = false;
         time_t step2_time = time(NULL);
         seconds_remaining -= difftime(step2_time, start_time);
@@ -606,6 +560,8 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer
             if (promote_new_master(promotion_target, err_out))
             {
                 catchup_and_promote_success = true;
+                m_next_master = promotion_target;
+
                 // Step 5: Redirect slaves and start replication on old master.
                 ServerArray redirected_slaves;
                 bool start_ok = switchover_start_slave(demotion_target, promotion_target);
@@ -706,6 +662,8 @@ bool MariaDBMonitor::do_failover(json_t** err_out)
         // Step 3: Stop and reset slave, set read-only to 0.
         if (promote_new_master(new_master, err_out))
         {
+            m_next_master = new_master;
+            m_cluster_modified = true;
             // Step 4: Redirect slaves.
             ServerArray redirected_slaves;
             int redirects = redirect_slaves(new_master, redirectable_slaves, &redirected_slaves);
@@ -1378,17 +1336,14 @@ bool MariaDBMonitor::failover_check(string* error_out)
  * If a master failure has occurred and MaxScale is configured with failover functionality, this fuction
  * executes failover to select and promote a new master server. This function should be called immediately
  * after @c mon_process_state_changes. If an error occurs, this method disables automatic failover.
- *
- * @return True if failover was performed, or at least attempted
 */
-bool MariaDBMonitor::handle_auto_failover()
+void MariaDBMonitor::handle_auto_failover()
 {
     const char RE_ENABLE_FMT[] = "%s To re-enable failover, manually set '%s' to 'true' for monitor "
                                  "'%s' via MaxAdmin or the REST API, or restart MaxScale.";
-    bool cluster_modified = false;
-    if (config_get_global_options()->passive || (m_master && m_master->is_master()))
+    if (m_master && m_master->is_master())
     {
-        return cluster_modified;
+        return;
     }
 
     if (failover_not_possible())
@@ -1400,14 +1355,14 @@ bool MariaDBMonitor::handle_auto_failover()
         MXS_ERROR(RE_ENABLE_FMT, PROBLEMS, CN_AUTO_FAILOVER, m_monitor->name);
         m_auto_failover = false;
         disable_setting(CN_AUTO_FAILOVER);
-        return cluster_modified;
+        return;
     }
 
     // If master seems to be down, check if slaves are receiving events.
     if (m_verify_master_failure && m_master && m_master->is_down() && slave_receiving_events())
     {
         MXS_INFO("Master failure not yet confirmed by slaves, delaying failover.");
-        return cluster_modified;
+        return;
     }
 
     MariaDBServer* failed_master = NULL;
@@ -1463,7 +1418,6 @@ bool MariaDBMonitor::handle_auto_failover()
                     m_auto_failover = false;
                     disable_setting(CN_AUTO_FAILOVER);
                 }
-                cluster_modified = true;
             }
             else
             {
@@ -1482,8 +1436,6 @@ bool MariaDBMonitor::handle_auto_failover()
     {
         m_warn_failover_precond = true;
     }
-
-    return cluster_modified;
 }
 
 bool MariaDBMonitor::failover_not_possible()
diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc
index 25efbbc63..279b67d94 100644
--- a/server/modules/monitor/mariadbmon/mariadbmon.cc
+++ b/server/modules/monitor/mariadbmon/mariadbmon.cc
@@ -55,7 +55,8 @@ MariaDBMonitor::MariaDBMonitor(MXS_MONITOR* monitor)
     , m_id(config_get_global_options()->id)
     , m_master_gtid_domain(GTID_DOMAIN_UNKNOWN)
     , m_external_master_port(PORT_UNKNOWN)
-    , m_cluster_modified(true)
+    , m_cluster_topology_changed(true)
+    , m_cluster_modified(false)
     , m_switchover_on_low_disk_space(false)
     , m_warn_set_standalone_master(true)
     , m_log_no_master(true)
@@ -102,6 +103,7 @@ void MariaDBMonitor::clear_server_info()
     m_servers_by_id.clear();
     m_excluded_servers.clear();
     m_master = NULL;
+    m_next_master = NULL;
     m_master_gtid_domain = GTID_DOMAIN_UNKNOWN;
     m_external_master_host.clear();
     m_external_master_port = PORT_UNKNOWN;
@@ -293,7 +295,6 @@ json_t* MariaDBMonitor::diagnostics_json() const
  */
 void MariaDBMonitor::update_server(MariaDBServer& server)
 {
-    server.m_topology_changed = false;
     MXS_MONITORED_SERVER* mon_srv = server.m_server_base;
     /* Monitor server if not in maintenance. */
     bool in_maintenance = server.is_in_maintenance();
@@ -395,18 +396,18 @@ void MariaDBMonitor::tick()
     }
 
     // Query all servers for their status.
-    bool topology_changed = false;
     for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
     {
         MariaDBServer* server = *iter;
         update_server(*server);
         if (server->m_topology_changed)
         {
-            topology_changed = true;
+            m_cluster_topology_changed = true;
+            server->m_topology_changed = false;
         }
     }
 
-    if (topology_changed)
+    if (m_cluster_topology_changed)
     {
         // This means that a server id or a slave connection has changed, or read_only was set.
         // Update the server id array and check various things.
@@ -417,6 +418,16 @@ void MariaDBMonitor::tick()
         }
         build_replication_graph();
         find_graph_cycles();
+
+        /* Check if a failover/switchover was performed last loop and the master should change.
+         * In this case, update the master and its cycle info here. */
+        if (m_next_master)
+        {
+            m_master = m_next_master;
+            update_master_cycle_info();
+            m_next_master = NULL;
+        }
+
         // Find the server that looks like it would be the best master. It does not yet overwrite the
         // current master.
         string topology_messages;
@@ -424,7 +435,19 @@ void MariaDBMonitor::tick()
 
         // Check if current master is still valid.
         string reason;
-        if (master_no_longer_valid(&reason))
+        if (master_is_valid(&reason))
+        {
+            // Update master cycle info in case it has changed
+            update_master_cycle_info();
+            if (root_master && m_master != root_master)
+            {
+                // Master is still valid but it is no longer the best master. Print a warning.
+                MXS_WARNING("'%s' is a better master candidate than the current master '%s'. "
+                            "Master will change if '%s' is no longer a valid master.",
+                            root_master->name(), m_master->name(), m_master->name());
+            }
+        }
+        else
         {
             if (m_master && !reason.empty())
             {
@@ -444,36 +467,17 @@ void MariaDBMonitor::tick()
             }
 
             m_master = root_master;
+            update_master_cycle_info();
             if (m_master)
             {
-                // A new master has been set. Save some data regarding the type of the master.
-                int new_cycle_id = m_master->m_node.cycle;
-                m_master_cycle_status.cycle_id = new_cycle_id;
-                if (new_cycle_id == NodeData::CYCLE_NONE)
-                {
-                    m_master_cycle_status.cycle_members.clear();
-                }
-                else
-                {
-                    m_master_cycle_status.cycle_members = m_cycles[new_cycle_id];
-                }
                 MXS_NOTICE("'%s' is the best master candidate.", m_master->name());
             }
             else
             {
-                // The current master cannot be used and no proper candidate exists.
-                m_master_cycle_status.cycle_id = NodeData::CYCLE_NONE;
-                m_master_cycle_status.cycle_members.clear();
                 MXS_WARNING("No valid master servers found.");
             }
         }
-        else if (root_master && m_master != root_master)
-        {
-            // Master is still valid but it is no longer the best master. Print a warning.
-            MXS_WARNING("'%s' is a better master candidate than the current master '%s'. "
-                        "Master will change if '%s' is no longer a valid master.",
-                        root_master->name(), m_master->name(), m_master->name());
-        }
+        m_cluster_topology_changed = false;
     }
 
     // Always re-assign master, slave etc bits as these depend on other factors outside topology
@@ -542,30 +546,76 @@ void MariaDBMonitor::process_state_changes()
     MonitorInstance::process_state_changes();
 
     m_cluster_modified = false;
-    if (m_auto_failover)
+    // Check for manual commands
+    if (m_manual_cmd.command_waiting_exec)
     {
-        if ((m_cluster_modified = handle_auto_failover()))
+        // Looks like a command is waiting. Lock mutex, check again and wait for the condition variable.
+        std::unique_lock<std::mutex> lock(m_manual_cmd.mutex);
+        if (m_manual_cmd.command_waiting_exec)
         {
-            // Force a master selection on next monitor loop, otherwise the old master would stay.
-            m_master = NULL;
+            m_manual_cmd.has_command.wait(lock, [this]{return m_manual_cmd.command_waiting_exec;});
+            m_manual_cmd.method();
+            m_manual_cmd.command_waiting_exec = false;
+            m_manual_cmd.result_waiting = true;
+            // Manual command ran, signal the sender to continue.
+            lock.unlock();
+            m_manual_cmd.has_result.notify_one();
+        }
+        else
+        {
+            // There was no command after all.
+            lock.unlock();
         }
     }
 
-    // Do not auto-join servers on this monitor loop if a failover (or any other cluster modification)
-    // has been performed, as server states have not been updated yet. It will happen next iteration.
-    if (!config_get_global_options()->passive && m_auto_rejoin && !m_cluster_modified &&
-        cluster_can_be_joined())
+    if (!config_get_global_options()->passive)
     {
-        // Check if any servers should be autojoined to the cluster and try to join them.
-        handle_auto_rejoin();
-    }
+        if (m_auto_failover && !m_cluster_modified)
+        {
+            handle_auto_failover();
+        }
 
-    /* Check if any slave servers have read-only off and turn it on if user so wishes. Again, do not
-     * perform this if cluster has been modified this loop since it may not be clear which server
-     * should be a slave. */
-    if (!config_get_global_options()->passive && m_enforce_read_only_slaves && !m_cluster_modified)
+        // Do not auto-join servers on this monitor loop if a failover (or any other cluster modification)
+        // has been performed, as server states have not been updated yet. It will happen next iteration.
+        if (m_auto_rejoin && !m_cluster_modified && cluster_can_be_joined())
+        {
+            // Check if any servers should be autojoined to the cluster and try to join them.
+            handle_auto_rejoin();
+        }
+
+        /* Check if any slave servers have read-only off and turn it on if user so wishes. Again, do not
+         * perform this if cluster has been modified this loop since it may not be clear which server
+         * should be a slave. */
+        if (m_enforce_read_only_slaves && !m_cluster_modified)
+        {
+            enforce_read_only_on_slaves();
+        }
+    }
+}
+
+/**
+ * Save info on the master server's multimaster group, if any. This is required when checking for changes
+ * in the topology.
+ */
+void MariaDBMonitor::update_master_cycle_info()
+{
+    if (m_master)
     {
-        enforce_read_only_on_slaves();
+        int new_cycle_id = m_master->m_node.cycle;
+        m_master_cycle_status.cycle_id = new_cycle_id;
+        if (new_cycle_id == NodeData::CYCLE_NONE)
+        {
+            m_master_cycle_status.cycle_members.clear();
+        }
+        else
+        {
+            m_master_cycle_status.cycle_members = m_cycles[new_cycle_id];
+        }
+    }
+    else
+    {
+        m_master_cycle_status.cycle_id = NodeData::CYCLE_NONE;
+        m_master_cycle_status.cycle_members.clear();
     }
 }
 
@@ -675,7 +725,6 @@ void MariaDBMonitor::handle_auto_rejoin()
         if (joins > 0)
         {
             MXS_NOTICE("%d server(s) redirected or rejoined the cluster.", joins);
-            m_cluster_modified = true;
         }
     }
     else
@@ -968,6 +1017,78 @@ bool MariaDBMonitor::check_sql_files()
     return rval;
 }
 
+/**
+ * Schedule a manual command for execution. It will be ran during the next monitor loop. This method waits
+ * for the command to have finished running.
+ *
+ * @param command Function object containing the method the monitor should execute: switchover, failover or
+ * rejoin.
+ * @param error_out Json error output
+ * @return True if command execution was attempted. False if monitor was in an invalid state
+ * to run the command.
+ */
+bool MariaDBMonitor::execute_manual_command(std::function<void (void)> command, json_t** error_out)
+{
+    bool rval = false;
+    if (state() != MXS_MONITOR_RUNNING)
+    {
+        PRINT_MXS_JSON_ERROR(error_out, "The monitor is not running, cannot execute manual command.");
+    }
+    else if (m_manual_cmd.command_waiting_exec)
+    {
+        PRINT_MXS_JSON_ERROR(error_out,
+                             "Previous command has not been executed, cannot send another command.");
+        ss_dassert(!true);
+    }
+    else
+    {
+        rval = true;
+        // Write the command.
+        std::unique_lock<std::mutex> lock(m_manual_cmd.mutex);
+        m_manual_cmd.method = command;
+        m_manual_cmd.command_waiting_exec = true;
+        // Signal the monitor thread to start running the command.
+        lock.unlock();
+        m_manual_cmd.has_command.notify_one();
+
+        // Wait for the result.
+        lock.lock();
+        m_manual_cmd.has_result.wait(lock, [this]{return m_manual_cmd.result_waiting;});
+        m_manual_cmd.result_waiting = false;
+    }
+    return rval;
+}
+
+bool MariaDBMonitor::run_manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out)
+{
+    bool rval = false;
+    bool send_ok = execute_manual_command([this, &rval, new_master, current_master, error_out]()
+    {
+        rval = manual_switchover(new_master, current_master, error_out);
+    }, error_out);
+    return send_ok && rval;
+}
+
+bool MariaDBMonitor::run_manual_failover(json_t** error_out)
+{
+    bool rval = false;
+    bool send_ok = execute_manual_command([this, &rval, error_out]()
+    {
+        rval = manual_failover(error_out);
+    }, error_out);
+    return send_ok && rval;
+}
+
+bool MariaDBMonitor::run_manual_rejoin(SERVER* rejoin_server, json_t** error_out)
+{
+    bool rval = false;
+    bool send_ok = execute_manual_command([this, &rval, rejoin_server, error_out]()
+    {
+        rval = manual_rejoin(rejoin_server, error_out);
+    }, error_out);
+    return send_ok && rval;
+}
+
 /**
  * Command handler for 'switchover'
  *
@@ -995,7 +1116,7 @@ bool handle_manual_switchover(const MODULECMD_ARG* args, json_t** error_out)
         auto handle = static_cast<MariaDBMonitor*>(mon->instance);
         SERVER* new_master = (args->argc >= 2) ? args->argv[1].value.server : NULL;
         SERVER* current_master = (args->argc == 3) ? args->argv[2].value.server : NULL;
-        rval = handle->manual_switchover(new_master, current_master, error_out);
+        rval = handle->run_manual_switchover(new_master, current_master, error_out);
     }
     return rval;
 }
@@ -1021,7 +1142,7 @@ bool handle_manual_failover(const MODULECMD_ARG* args, json_t** output)
     {
         MXS_MONITOR* mon = args->argv[0].value.monitor;
         auto handle = static_cast<MariaDBMonitor*>(mon->instance);
-        rv = handle->manual_failover(output);
+        rv = handle->run_manual_failover(output);
     }
     return rv;
 }
@@ -1049,7 +1170,7 @@ bool handle_manual_rejoin(const MODULECMD_ARG* args, json_t** output)
         MXS_MONITOR* mon = args->argv[0].value.monitor;
         SERVER* server = args->argv[1].value.server;
         auto handle = static_cast<MariaDBMonitor*>(mon->instance);
-        rv = handle->manual_rejoin(server, output);
+        rv = handle->run_manual_rejoin(server, output);
     }
     return rv;
 }
diff --git a/server/modules/monitor/mariadbmon/mariadbmon.hh b/server/modules/monitor/mariadbmon/mariadbmon.hh
index 87c70d533..7fd7ee5db 100644
--- a/server/modules/monitor/mariadbmon/mariadbmon.hh
+++ b/server/modules/monitor/mariadbmon/mariadbmon.hh
@@ -13,6 +13,8 @@
  * Public License.
  */
 #include "mariadbmon_common.hh"
+#include <condition_variable>
+#include <functional>
 #include <string>
 #include <tr1/unordered_map>
 #include <vector>
@@ -69,32 +71,31 @@ public:
     static MariaDBMonitor* create(MXS_MONITOR *monitor);
 
     /**
-     * Handle switchover
+     * Perform user-activated switchover.
      *
-     * @new_master      The specified new master
-     * @current_master  The specified current master. If NULL, monitor will autoselect.
-     * @output          Pointer where to place output object
-     *
-     * @return True, if switchover was performed, false otherwise.
+     * @param new_master      The specified new master. If NULL, monitor will autoselect.
+     * @param current_master  The specified current master. If NULL, monitor will autoselect.
+     * @param error_out       Json error output
+     * @return True if switchover was performed
      */
-    bool manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);
+    bool run_manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);
 
     /**
      * Perform user-activated failover.
      *
-     * @param output  Json error output
-     * @return True on success
+     * @param error_out Json error output
+     * @return True if failover was performed
      */
-    bool manual_failover(json_t** output);
+    bool run_manual_failover(json_t** error_out);
 
     /**
      * Perform user-activated rejoin
      *
-     * @param rejoin_server     Server to join
-     * @param output            Json error output
-     * @return True on success
+     * @param rejoin_server Server to join
+     * @param error_out Json error output
+     * @return True if rejoin was performed
      */
-    bool manual_rejoin(SERVER* rejoin_server, json_t** output);
+    bool run_manual_rejoin(SERVER* rejoin_server, json_t** error_out);
 
 protected:
     void pre_loop();
@@ -109,17 +110,35 @@ private:
         ServerArray cycle_members;
     };
 
+    /* Structure used to communicate commands and results between the MaxAdmin and monitor threads.
+     * The monitor can only process one manual command at a time, which is already enforced by
+     * the admin thread. */
+    struct ManualCommand
+    {
+    public:
+        std::mutex mutex;                    /**< Mutex used by the condition variables */
+        std::condition_variable has_command; /**< Notified when a command is waiting execution */
+        bool command_waiting_exec = false;   /**< Guard variable for the above */
+        std::function<void (void)> method;   /**< The method to run when executing the command */
+        std::condition_variable has_result;  /**< Notified when the command has ran */
+        bool result_waiting = false;         /**< Guard variable for the above */
+    };
+
     unsigned long m_id;                  /**< Monitor ID */
     ServerArray m_servers;               /**< Servers of the monitor */
     ServerInfoMap m_server_info;         /**< Map from server base struct to MariaDBServer */
+    ManualCommand m_manual_cmd;          /**< Communicates manual commands and results */
 
     // Values updated by monitor
     MariaDBServer* m_master;             /**< Master server for Master/Slave replication */
+    MariaDBServer* m_next_master;        /**< When master changes because of a failover/switchover, the new
+                                           *  master is written here so the next monitor loop picks it up. */
     IdToServerMap m_servers_by_id;       /**< Map from server id:s to MariaDBServer */
     int64_t m_master_gtid_domain;        /**< gtid_domain_id most recently seen on the master  */
     std::string m_external_master_host;  /**< External master host, for fail/switchover */
     int m_external_master_port;          /**< External master port */
-    bool m_cluster_modified;             /**< Has an automatic failover/rejoin been performed this loop? */
+    bool m_cluster_topology_changed;     /**< Has cluster topology changed since last monitor loop? */
+    bool m_cluster_modified;             /**< Has a failover/switchover/rejoin been performed this loop? */
     CycleMap m_cycles;                   /**< Map from cycle number to cycle member servers */
     CycleInfo m_master_cycle_status;     /**< Info about master server cycle from previous round */
 
@@ -176,6 +195,7 @@ private:
     bool set_replication_credentials(const MXS_CONFIG_PARAMETER* params);
     MariaDBServer* get_server_info(MXS_MONITORED_SERVER* db);
     MariaDBServer* get_server(int64_t id);
+    bool execute_manual_command(std::function<void (void)> command, json_t** error_out);
 
     // Cluster discovery and status assignment methods
     void update_server(MariaDBServer& server);
@@ -206,10 +226,12 @@ private:
     MariaDBServer* find_master_inside_cycle(ServerArray& cycle_servers);
     void assign_master_and_slave();
     void assign_slave_and_relay_master(MariaDBServer* node);
-    bool master_no_longer_valid(std::string* reason_out);
+    bool master_is_valid(std::string* reason_out);
     bool cycle_has_master_server(ServerArray& cycle_servers);
+    void update_master_cycle_info();
 
     // Switchover methods
+    bool manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);
     bool switchover_check(SERVER* new_master, SERVER* current_master,
                           MariaDBServer** new_master_out, MariaDBServer** current_master_out,
                           json_t** error_out);
@@ -225,13 +247,15 @@ private:
     bool switchover_start_slave(MariaDBServer* old_master, MariaDBServer* new_master);
 
     // Failover methods
-    bool handle_auto_failover();
+    bool manual_failover(json_t** output);
+    void handle_auto_failover();
     bool failover_not_possible();
     bool slave_receiving_events();
     bool failover_check(std::string* error_out);
     bool do_failover(json_t** err_out);
 
     // Rejoin methods
+    bool manual_rejoin(SERVER* rejoin_server, json_t** output);
     bool cluster_can_be_joined();
     void handle_auto_rejoin();
     bool get_joinable_servers(ServerArray* output);
diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc
index bef24160b..0e336f3f4 100644
--- a/server/modules/monitor/mariadbmon/mariadbserver.cc
+++ b/server/modules/monitor/mariadbmon/mariadbserver.cc
@@ -49,6 +49,7 @@ MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_
     , m_heartbeat_period(0)
     , m_latest_event(0)
     , m_gtid_domain_id(GTID_DOMAIN_UNKNOWN)
+    , m_topology_changed(true)
     , m_print_update_errormsg(true)
 {
     ss_dassert(monitored_server);