Run manual commands without stopping the monitor

The command is saved in a function object which is read by the monitor thread. This way, manual and automatic cluster modification commands are ran in the same step of a monitor cycle. This update required several modifications in related code.
2018-06-21 18:34:27 +03:00
parent 6bf10904d7
commit 9525d3507b
5 changed files with 231 additions and 132 deletions
--- a/server/modules/monitor/mariadbmon/cluster_discovery.cc
+++ b/server/modules/monitor/mariadbmon/cluster_discovery.cc
@ -1227,24 +1227,25 @@ void MariaDBMonitor::assign_slave_and_relay_master(MariaDBServer* node)
 }

 /**
- * Should a new master server be selected?
+ * Is the current master server still valid or should a new one be selected?
 *
 * @param reason_out Output for a text description
- * @return True, if the current master has changed in a way that a new master should be selected.
+ * @return True, if master is ok. False if the current master has changed in a way that
+ * a new master should be selected.
 */
-bool MariaDBMonitor::master_no_longer_valid(std::string* reason_out)
+bool MariaDBMonitor::master_is_valid(std::string* reason_out)
 {
    // The master server of the cluster needs to be re-calculated in the following four cases:
-    bool rval = false;
+    bool rval = true;
    // 1) There is no master.
    if (m_master == NULL)
    {
-        rval = true;
+        rval = false;
    }
    // 2) read_only has been activated on the master.
    else if (m_master->is_read_only())
    {
-        rval = true;
+        rval = false;
        *reason_out = "it is in read-only mode";
    }
    // 3) The master was a non-replicating master (not in a cycle) but now has a slave connection.
@ -1253,7 +1254,7 @@ bool MariaDBMonitor::master_no_longer_valid(std::string* reason_out)
        // The master should not have a master of its own.
        if (!m_master->m_node.parents.empty())
        {
-            rval = true;
+            rval = false;
            *reason_out = "it has started replicating from another server in the cluster";
        }
    }
@ -1268,7 +1269,7 @@ bool MariaDBMonitor::master_no_longer_valid(std::string* reason_out)
        // 4a) The master is no longer in a cycle.
        if (current_cycle_id == NodeData::CYCLE_NONE)
        {
-            rval = true;
+            rval = false;
            ServerArray& old_members = m_master_cycle_status.cycle_members;
            string server_names_old = monitored_servers_to_string(old_members);
            *reason_out = "it is no longer in the multimaster group (" + server_names_old + ")";
@ -1279,7 +1280,7 @@ bool MariaDBMonitor::master_no_longer_valid(std::string* reason_out)
            ServerArray& current_members = m_cycles[current_cycle_id];
            if (cycle_has_master_server(current_members))
            {
-                rval = true;
+                rval = false;
                string server_names_current = monitored_servers_to_string(current_members);
                *reason_out = "a server in the master's multimaster group (" + server_names_current +
                    ") is replicating from a server not in the group";
--- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc
+++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc
@ -25,17 +25,6 @@ static void print_redirect_errors(MariaDBServer* first_server, const ServerArray

 bool MariaDBMonitor::manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out)
 {
-    bool running = is_running();
-    if (running)
-    {
-        stop();
-        MXS_NOTICE("Stopped the monitor %s for the duration of switchover.", m_monitor->name);
-    }
-    else
-    {
-        MXS_NOTICE("Monitor %s already stopped, switchover can proceed.", m_monitor->name);
-    }
-
    /* It's possible for either current_master, or both new_master & current_master to be NULL, which means
     * autoselect. Only autoselecting new_master is not possible. Autoselection will happen at the actual
     * switchover function. */
@ -70,27 +59,11 @@ bool MariaDBMonitor::manual_switchover(SERVER* new_master, SERVER* current_maste
        }
    }

-    if (running)
-    {
-        // TODO: What if this fails?
-        start(m_monitor->parameters);
-    }
    return rval;
 }

 bool MariaDBMonitor::manual_failover(json_t** output)
 {
-    bool running = is_running();
-    if (running)
-    {
-        stop();
-        MXS_NOTICE("Stopped monitor %s for the duration of failover.", m_monitor->name);
-    }
-    else
-    {
-        MXS_NOTICE("Monitor %s already stopped, failover can proceed.", m_monitor->name);
-    }
-
    bool rv = true;
    string failover_error;
    rv = failover_check(&failover_error);
@ -112,27 +85,11 @@ bool MariaDBMonitor::manual_failover(json_t** output)
            failover_error.c_str());
    }

-    if (running)
-    {
-        // TODO: What if this fails?
-        start(m_monitor->parameters);
-    }
    return rv;
 }

 bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
 {
-    bool running = is_running();
-    if (running)
-    {
-        stop();
-        MXS_NOTICE("Stopped monitor %s for the duration of rejoin.", m_monitor->name);
-    }
-    else
-    {
-        MXS_NOTICE("Monitor %s already stopped, rejoin can proceed.", m_monitor->name);
-    }
-
    bool rval = false;
    if (cluster_can_be_joined())
    {
@ -188,11 +145,6 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
        PRINT_MXS_JSON_ERROR(output, BAD_CLUSTER, m_monitor->name);
    }

-    if (running)
-    {
-        // TODO: What if this fails?
-        start(m_monitor->parameters);
-    }
    return rval;
 }

@ -351,6 +303,7 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers, json_t**
            if (op_success)
            {
                servers_joined++;
+                m_cluster_modified = true;
            }
        }
    }
@ -587,6 +540,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer
    // Step 2: Set read-only to on, flush logs, update master gtid:s
    if (switchover_demote_master(demotion_target, err_out))
    {
+        m_cluster_modified = true;
        bool catchup_and_promote_success = false;
        time_t step2_time = time(NULL);
        seconds_remaining -= difftime(step2_time, start_time);
@ -606,6 +560,8 @@ bool MariaDBMonitor::do_switchover(MariaDBServer** current_master, MariaDBServer
            if (promote_new_master(promotion_target, err_out))
            {
                catchup_and_promote_success = true;
+                m_next_master = promotion_target;
+
                // Step 5: Redirect slaves and start replication on old master.
                ServerArray redirected_slaves;
                bool start_ok = switchover_start_slave(demotion_target, promotion_target);
@ -706,6 +662,8 @@ bool MariaDBMonitor::do_failover(json_t** err_out)
        // Step 3: Stop and reset slave, set read-only to 0.
        if (promote_new_master(new_master, err_out))
        {
+            m_next_master = new_master;
+            m_cluster_modified = true;
            // Step 4: Redirect slaves.
            ServerArray redirected_slaves;
            int redirects = redirect_slaves(new_master, redirectable_slaves, &redirected_slaves);
@ -1378,17 +1336,14 @@ bool MariaDBMonitor::failover_check(string* error_out)
 * If a master failure has occurred and MaxScale is configured with failover functionality, this fuction
 * executes failover to select and promote a new master server. This function should be called immediately
 * after @c mon_process_state_changes. If an error occurs, this method disables automatic failover.
- *
- * @return True if failover was performed, or at least attempted
 */
-bool MariaDBMonitor::handle_auto_failover()
+void MariaDBMonitor::handle_auto_failover()
 {
    const char RE_ENABLE_FMT[] = "%s To re-enable failover, manually set '%s' to 'true' for monitor "
                                 "'%s' via MaxAdmin or the REST API, or restart MaxScale.";
-    bool cluster_modified = false;
-    if (config_get_global_options()->passive || (m_master && m_master->is_master()))
+    if (m_master && m_master->is_master())
    {
-        return cluster_modified;
+        return;
    }

    if (failover_not_possible())
@ -1400,14 +1355,14 @@ bool MariaDBMonitor::handle_auto_failover()
        MXS_ERROR(RE_ENABLE_FMT, PROBLEMS, CN_AUTO_FAILOVER, m_monitor->name);
        m_auto_failover = false;
        disable_setting(CN_AUTO_FAILOVER);
-        return cluster_modified;
+        return;
    }

    // If master seems to be down, check if slaves are receiving events.
    if (m_verify_master_failure && m_master && m_master->is_down() && slave_receiving_events())
    {
        MXS_INFO("Master failure not yet confirmed by slaves, delaying failover.");
-        return cluster_modified;
+        return;
    }

    MariaDBServer* failed_master = NULL;
@ -1463,7 +1418,6 @@ bool MariaDBMonitor::handle_auto_failover()
                    m_auto_failover = false;
                    disable_setting(CN_AUTO_FAILOVER);
                }
-                cluster_modified = true;
            }
            else
            {
@ -1482,8 +1436,6 @@ bool MariaDBMonitor::handle_auto_failover()
    {
        m_warn_failover_precond = true;
    }
-
-    return cluster_modified;
 }

 bool MariaDBMonitor::failover_not_possible()
--- a/server/modules/monitor/mariadbmon/mariadbmon.cc
+++ b/server/modules/monitor/mariadbmon/mariadbmon.cc
@ -55,7 +55,8 @@ MariaDBMonitor::MariaDBMonitor(MXS_MONITOR* monitor)
    , m_id(config_get_global_options()->id)
    , m_master_gtid_domain(GTID_DOMAIN_UNKNOWN)
    , m_external_master_port(PORT_UNKNOWN)
-    , m_cluster_modified(true)
+    , m_cluster_topology_changed(true)
+    , m_cluster_modified(false)
    , m_switchover_on_low_disk_space(false)
    , m_warn_set_standalone_master(true)
    , m_log_no_master(true)
@ -102,6 +103,7 @@ void MariaDBMonitor::clear_server_info()
    m_servers_by_id.clear();
    m_excluded_servers.clear();
    m_master = NULL;
+    m_next_master = NULL;
    m_master_gtid_domain = GTID_DOMAIN_UNKNOWN;
    m_external_master_host.clear();
    m_external_master_port = PORT_UNKNOWN;
@ -293,7 +295,6 @@ json_t* MariaDBMonitor::diagnostics_json() const
 */
 void MariaDBMonitor::update_server(MariaDBServer& server)
 {
-    server.m_topology_changed = false;
    MXS_MONITORED_SERVER* mon_srv = server.m_server_base;
    /* Monitor server if not in maintenance. */
    bool in_maintenance = server.is_in_maintenance();
@ -395,18 +396,18 @@ void MariaDBMonitor::tick()
    }

    // Query all servers for their status.
-    bool topology_changed = false;
    for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
    {
        MariaDBServer* server = *iter;
        update_server(*server);
        if (server->m_topology_changed)
        {
-            topology_changed = true;
+            m_cluster_topology_changed = true;
+            server->m_topology_changed = false;
        }
    }

-    if (topology_changed)
+    if (m_cluster_topology_changed)
    {
        // This means that a server id or a slave connection has changed, or read_only was set.
        // Update the server id array and check various things.
@ -417,6 +418,16 @@ void MariaDBMonitor::tick()
        }
        build_replication_graph();
        find_graph_cycles();
+
+        /* Check if a failover/switchover was performed last loop and the master should change.
+         * In this case, update the master and its cycle info here. */
+        if (m_next_master)
+        {
+            m_master = m_next_master;
+            update_master_cycle_info();
+            m_next_master = NULL;
+        }
+
        // Find the server that looks like it would be the best master. It does not yet overwrite the
        // current master.
        string topology_messages;
@ -424,7 +435,19 @@ void MariaDBMonitor::tick()

        // Check if current master is still valid.
        string reason;
-        if (master_no_longer_valid(&reason))
+        if (master_is_valid(&reason))
+        {
+            // Update master cycle info in case it has changed
+            update_master_cycle_info();
+            if (root_master && m_master != root_master)
+            {
+                // Master is still valid but it is no longer the best master. Print a warning.
+                MXS_WARNING("'%s' is a better master candidate than the current master '%s'. "
+                            "Master will change if '%s' is no longer a valid master.",
+                            root_master->name(), m_master->name(), m_master->name());
+            }
+        }
+        else
        {
            if (m_master && !reason.empty())
            {
@ -444,36 +467,17 @@ void MariaDBMonitor::tick()
            }

            m_master = root_master;
+            update_master_cycle_info();
            if (m_master)
            {
-                // A new master has been set. Save some data regarding the type of the master.
-                int new_cycle_id = m_master->m_node.cycle;
-                m_master_cycle_status.cycle_id = new_cycle_id;
-                if (new_cycle_id == NodeData::CYCLE_NONE)
-                {
-                    m_master_cycle_status.cycle_members.clear();
-                }
-                else
-                {
-                    m_master_cycle_status.cycle_members = m_cycles[new_cycle_id];
-                }
                MXS_NOTICE("'%s' is the best master candidate.", m_master->name());
            }
            else
            {
-                // The current master cannot be used and no proper candidate exists.
-                m_master_cycle_status.cycle_id = NodeData::CYCLE_NONE;
-                m_master_cycle_status.cycle_members.clear();
                MXS_WARNING("No valid master servers found.");
            }
        }
-        else if (root_master && m_master != root_master)
-        {
-            // Master is still valid but it is no longer the best master. Print a warning.
-            MXS_WARNING("'%s' is a better master candidate than the current master '%s'. "
-                        "Master will change if '%s' is no longer a valid master.",
-                        root_master->name(), m_master->name(), m_master->name());
-        }
+        m_cluster_topology_changed = false;
    }

    // Always re-assign master, slave etc bits as these depend on other factors outside topology
@ -542,30 +546,76 @@ void MariaDBMonitor::process_state_changes()
    MonitorInstance::process_state_changes();

    m_cluster_modified = false;
-    if (m_auto_failover)
+    // Check for manual commands
+    if (m_manual_cmd.command_waiting_exec)
    {
-        if ((m_cluster_modified = handle_auto_failover()))
+        // Looks like a command is waiting. Lock mutex, check again and wait for the condition variable.
+        std::unique_lock<std::mutex> lock(m_manual_cmd.mutex);
+        if (m_manual_cmd.command_waiting_exec)
        {
-            // Force a master selection on next monitor loop, otherwise the old master would stay.
-            m_master = NULL;
+            m_manual_cmd.has_command.wait(lock, [this]{return m_manual_cmd.command_waiting_exec;});
+            m_manual_cmd.method();
+            m_manual_cmd.command_waiting_exec = false;
+            m_manual_cmd.result_waiting = true;
+            // Manual command ran, signal the sender to continue.
+            lock.unlock();
+            m_manual_cmd.has_result.notify_one();
+        }
+        else
+        {
+            // There was no command after all.
+            lock.unlock();
        }
    }

-    // Do not auto-join servers on this monitor loop if a failover (or any other cluster modification)
-    // has been performed, as server states have not been updated yet. It will happen next iteration.
-    if (!config_get_global_options()->passive && m_auto_rejoin && !m_cluster_modified &&
-        cluster_can_be_joined())
+    if (!config_get_global_options()->passive)
    {
-        // Check if any servers should be autojoined to the cluster and try to join them.
-        handle_auto_rejoin();
-    }
+        if (m_auto_failover && !m_cluster_modified)
+        {
+            handle_auto_failover();
+        }

-    /* Check if any slave servers have read-only off and turn it on if user so wishes. Again, do not
-     * perform this if cluster has been modified this loop since it may not be clear which server
-     * should be a slave. */
-    if (!config_get_global_options()->passive && m_enforce_read_only_slaves && !m_cluster_modified)
+        // Do not auto-join servers on this monitor loop if a failover (or any other cluster modification)
+        // has been performed, as server states have not been updated yet. It will happen next iteration.
+        if (m_auto_rejoin && !m_cluster_modified && cluster_can_be_joined())
+        {
+            // Check if any servers should be autojoined to the cluster and try to join them.
+            handle_auto_rejoin();
+        }
+
+        /* Check if any slave servers have read-only off and turn it on if user so wishes. Again, do not
+         * perform this if cluster has been modified this loop since it may not be clear which server
+         * should be a slave. */
+        if (m_enforce_read_only_slaves && !m_cluster_modified)
+        {
+            enforce_read_only_on_slaves();
+        }
+    }
+}
+
+/**
+ * Save info on the master server's multimaster group, if any. This is required when checking for changes
+ * in the topology.
+ */
+void MariaDBMonitor::update_master_cycle_info()
+{
+    if (m_master)
    {
-        enforce_read_only_on_slaves();
+        int new_cycle_id = m_master->m_node.cycle;
+        m_master_cycle_status.cycle_id = new_cycle_id;
+        if (new_cycle_id == NodeData::CYCLE_NONE)
+        {
+            m_master_cycle_status.cycle_members.clear();
+        }
+        else
+        {
+            m_master_cycle_status.cycle_members = m_cycles[new_cycle_id];
+        }
+    }
+    else
+    {
+        m_master_cycle_status.cycle_id = NodeData::CYCLE_NONE;
+        m_master_cycle_status.cycle_members.clear();
    }
 }

@ -675,7 +725,6 @@ void MariaDBMonitor::handle_auto_rejoin()
        if (joins > 0)
        {
            MXS_NOTICE("%d server(s) redirected or rejoined the cluster.", joins);
-            m_cluster_modified = true;
        }
    }
    else
@ -968,6 +1017,78 @@ bool MariaDBMonitor::check_sql_files()
    return rval;
 }

+/**
+ * Schedule a manual command for execution. It will be ran during the next monitor loop. This method waits
+ * for the command to have finished running.
+ *
+ * @param command Function object containing the method the monitor should execute: switchover, failover or
+ * rejoin.
+ * @param error_out Json error output
+ * @return True if command execution was attempted. False if monitor was in an invalid state
+ * to run the command.
+ */
+bool MariaDBMonitor::execute_manual_command(std::function<void (void)> command, json_t** error_out)
+{
+    bool rval = false;
+    if (state() != MXS_MONITOR_RUNNING)
+    {
+        PRINT_MXS_JSON_ERROR(error_out, "The monitor is not running, cannot execute manual command.");
+    }
+    else if (m_manual_cmd.command_waiting_exec)
+    {
+        PRINT_MXS_JSON_ERROR(error_out,
+                             "Previous command has not been executed, cannot send another command.");
+        ss_dassert(!true);
+    }
+    else
+    {
+        rval = true;
+        // Write the command.
+        std::unique_lock<std::mutex> lock(m_manual_cmd.mutex);
+        m_manual_cmd.method = command;
+        m_manual_cmd.command_waiting_exec = true;
+        // Signal the monitor thread to start running the command.
+        lock.unlock();
+        m_manual_cmd.has_command.notify_one();
+
+        // Wait for the result.
+        lock.lock();
+        m_manual_cmd.has_result.wait(lock, [this]{return m_manual_cmd.result_waiting;});
+        m_manual_cmd.result_waiting = false;
+    }
+    return rval;
+}
+
+bool MariaDBMonitor::run_manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out)
+{
+    bool rval = false;
+    bool send_ok = execute_manual_command([this, &rval, new_master, current_master, error_out]()
+    {
+        rval = manual_switchover(new_master, current_master, error_out);
+    }, error_out);
+    return send_ok && rval;
+}
+
+bool MariaDBMonitor::run_manual_failover(json_t** error_out)
+{
+    bool rval = false;
+    bool send_ok = execute_manual_command([this, &rval, error_out]()
+    {
+        rval = manual_failover(error_out);
+    }, error_out);
+    return send_ok && rval;
+}
+
+bool MariaDBMonitor::run_manual_rejoin(SERVER* rejoin_server, json_t** error_out)
+{
+    bool rval = false;
+    bool send_ok = execute_manual_command([this, &rval, rejoin_server, error_out]()
+    {
+        rval = manual_rejoin(rejoin_server, error_out);
+    }, error_out);
+    return send_ok && rval;
+}
+
 /**
 * Command handler for 'switchover'
 *
@ -995,7 +1116,7 @@ bool handle_manual_switchover(const MODULECMD_ARG* args, json_t** error_out)
        auto handle = static_cast<MariaDBMonitor*>(mon->instance);
        SERVER* new_master = (args->argc >= 2) ? args->argv[1].value.server : NULL;
        SERVER* current_master = (args->argc == 3) ? args->argv[2].value.server : NULL;
-        rval = handle->manual_switchover(new_master, current_master, error_out);
+        rval = handle->run_manual_switchover(new_master, current_master, error_out);
    }
    return rval;
 }
@ -1021,7 +1142,7 @@ bool handle_manual_failover(const MODULECMD_ARG* args, json_t** output)
    {
        MXS_MONITOR* mon = args->argv[0].value.monitor;
        auto handle = static_cast<MariaDBMonitor*>(mon->instance);
-        rv = handle->manual_failover(output);
+        rv = handle->run_manual_failover(output);
    }
    return rv;
 }
@ -1049,7 +1170,7 @@ bool handle_manual_rejoin(const MODULECMD_ARG* args, json_t** output)
        MXS_MONITOR* mon = args->argv[0].value.monitor;
        SERVER* server = args->argv[1].value.server;
        auto handle = static_cast<MariaDBMonitor*>(mon->instance);
-        rv = handle->manual_rejoin(server, output);
+        rv = handle->run_manual_rejoin(server, output);
    }
    return rv;
 }
--- a/server/modules/monitor/mariadbmon/mariadbmon.hh
+++ b/server/modules/monitor/mariadbmon/mariadbmon.hh
@ -13,6 +13,8 @@
 * Public License.
 */
 #include "mariadbmon_common.hh"
+#include <condition_variable>
+#include <functional>
 #include <string>
 #include <tr1/unordered_map>
 #include <vector>
@ -69,32 +71,31 @@ public:
    static MariaDBMonitor* create(MXS_MONITOR *monitor);

    /**
-     * Handle switchover
+     * Perform user-activated switchover.
     *
-     * @new_master      The specified new master
-     * @current_master  The specified current master. If NULL, monitor will autoselect.
-     * @output          Pointer where to place output object
-     *
-     * @return True, if switchover was performed, false otherwise.
+     * @param new_master      The specified new master. If NULL, monitor will autoselect.
+     * @param current_master  The specified current master. If NULL, monitor will autoselect.
+     * @param error_out       Json error output
+     * @return True if switchover was performed
     */
-    bool manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);
+    bool run_manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);

    /**
     * Perform user-activated failover.
     *
-     * @param output  Json error output
-     * @return True on success
+     * @param error_out Json error output
+     * @return True if failover was performed
     */
-    bool manual_failover(json_t** output);
+    bool run_manual_failover(json_t** error_out);

    /**
     * Perform user-activated rejoin
     *
-     * @param rejoin_server     Server to join
-     * @param output            Json error output
-     * @return True on success
+     * @param rejoin_server Server to join
+     * @param error_out Json error output
+     * @return True if rejoin was performed
     */
-    bool manual_rejoin(SERVER* rejoin_server, json_t** output);
+    bool run_manual_rejoin(SERVER* rejoin_server, json_t** error_out);

 protected:
    void pre_loop();
@ -109,17 +110,35 @@ private:
        ServerArray cycle_members;
    };

+    /* Structure used to communicate commands and results between the MaxAdmin and monitor threads.
+     * The monitor can only process one manual command at a time, which is already enforced by
+     * the admin thread. */
+    struct ManualCommand
+    {
+    public:
+        std::mutex mutex;                    /**< Mutex used by the condition variables */
+        std::condition_variable has_command; /**< Notified when a command is waiting execution */
+        bool command_waiting_exec = false;   /**< Guard variable for the above */
+        std::function<void (void)> method;   /**< The method to run when executing the command */
+        std::condition_variable has_result;  /**< Notified when the command has ran */
+        bool result_waiting = false;         /**< Guard variable for the above */
+    };
+
    unsigned long m_id;                  /**< Monitor ID */
    ServerArray m_servers;               /**< Servers of the monitor */
    ServerInfoMap m_server_info;         /**< Map from server base struct to MariaDBServer */
+    ManualCommand m_manual_cmd;          /**< Communicates manual commands and results */

    // Values updated by monitor
    MariaDBServer* m_master;             /**< Master server for Master/Slave replication */
+    MariaDBServer* m_next_master;        /**< When master changes because of a failover/switchover, the new
+                                           *  master is written here so the next monitor loop picks it up. */
    IdToServerMap m_servers_by_id;       /**< Map from server id:s to MariaDBServer */
    int64_t m_master_gtid_domain;        /**< gtid_domain_id most recently seen on the master  */
    std::string m_external_master_host;  /**< External master host, for fail/switchover */
    int m_external_master_port;          /**< External master port */
-    bool m_cluster_modified;             /**< Has an automatic failover/rejoin been performed this loop? */
+    bool m_cluster_topology_changed;     /**< Has cluster topology changed since last monitor loop? */
+    bool m_cluster_modified;             /**< Has a failover/switchover/rejoin been performed this loop? */
    CycleMap m_cycles;                   /**< Map from cycle number to cycle member servers */
    CycleInfo m_master_cycle_status;     /**< Info about master server cycle from previous round */

@ -176,6 +195,7 @@ private:
    bool set_replication_credentials(const MXS_CONFIG_PARAMETER* params);
    MariaDBServer* get_server_info(MXS_MONITORED_SERVER* db);
    MariaDBServer* get_server(int64_t id);
+    bool execute_manual_command(std::function<void (void)> command, json_t** error_out);

    // Cluster discovery and status assignment methods
    void update_server(MariaDBServer& server);
@ -206,10 +226,12 @@ private:
    MariaDBServer* find_master_inside_cycle(ServerArray& cycle_servers);
    void assign_master_and_slave();
    void assign_slave_and_relay_master(MariaDBServer* node);
-    bool master_no_longer_valid(std::string* reason_out);
+    bool master_is_valid(std::string* reason_out);
    bool cycle_has_master_server(ServerArray& cycle_servers);
+    void update_master_cycle_info();

    // Switchover methods
+    bool manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);
    bool switchover_check(SERVER* new_master, SERVER* current_master,
                          MariaDBServer** new_master_out, MariaDBServer** current_master_out,
                          json_t** error_out);
@ -225,13 +247,15 @@ private:
    bool switchover_start_slave(MariaDBServer* old_master, MariaDBServer* new_master);

    // Failover methods
-    bool handle_auto_failover();
+    bool manual_failover(json_t** output);
+    void handle_auto_failover();
    bool failover_not_possible();
    bool slave_receiving_events();
    bool failover_check(std::string* error_out);
    bool do_failover(json_t** err_out);

    // Rejoin methods
+    bool manual_rejoin(SERVER* rejoin_server, json_t** output);
    bool cluster_can_be_joined();
    void handle_auto_rejoin();
    bool get_joinable_servers(ServerArray* output);
--- a/server/modules/monitor/mariadbmon/mariadbserver.cc
+++ b/server/modules/monitor/mariadbmon/mariadbserver.cc
@ -49,6 +49,7 @@ MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_
    , m_heartbeat_period(0)
    , m_latest_event(0)
    , m_gtid_domain_id(GTID_DOMAIN_UNKNOWN)
+    , m_topology_changed(true)
    , m_print_update_errormsg(true)
 {
    ss_dassert(monitored_server);