MXS-1845 Add redirection code

Should work with multimaster replication.
2018-09-28 16:18:38 +03:00
parent 75f9921ca2
commit 1ca5d02abb
4 changed files with 112 additions and 12 deletions
--- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc
+++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc
@ -22,6 +22,7 @@
 using std::string;
 using std::unique_ptr;
 using maxscale::string_printf;
+using maxbase::StopWatch;

 static const char RE_ENABLE_FMT[] = "To re-enable automatic %s, manually set '%s' to 'true' "
                                    "for monitor '%s' via MaxAdmin or the REST API, or restart MaxScale.";
@ -417,6 +418,46 @@ int MariaDBMonitor::redirect_slaves(MariaDBServer* new_master,
    return successes;
 }

+/**
+ * Redirect slaves to replicate from the promotion target.
+ *
+ * @param op Operation descriptor
+ * @param slaves An array of slaves to redirect
+ * @param redirected_slaves A vector where to insert successfully redirected slaves
+ * @return The number of slaves successfully redirected
+ */
+int MariaDBMonitor::redirect_slaves_ex(ClusterOperation& op, const ServerArray& slaves,
+                                       ServerArray* redirected_slaves)
+{
+    mxb_assert(redirected_slaves != NULL);
+    if (slaves.empty())
+    {
+        // This is ok, nothing to do.
+        return 0;
+    }
+
+    string slave_names = monitored_servers_to_string(slaves);
+    MXS_NOTICE("Redirecting %s to replicate from %s instead of %s.",
+               slave_names.c_str(), op.promotion_target->name(), op.demotion_target->name());
+    int successes = 0;
+    for (MariaDBServer* redirectable : slaves)
+    {
+        if (redirectable->redirect_existing_slave_conn(op))
+        {
+            successes++;
+            redirected_slaves->push_back(redirectable);
+        }
+    }
+    if (size_t(successes) == slaves.size())
+    {
+        MXS_NOTICE("All redirects successful.");
+    }
+    else
+    {
+        MXS_WARNING("%lu out of %lu redirects failed.", slaves.size() - successes, slaves.size());
+    }
+    return successes;
+}
 /**
 * Set the new master to replicate from the cluster external master.
 *
@ -723,13 +764,11 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
                {
                    redirected_slaves.push_back(demotion_target);
                }
-                int redirects = redirect_slaves(promotion_target, redirectable_slaves, &redirected_slaves);
+                int redirects = redirect_slaves_ex(op, redirectable_slaves, &redirected_slaves);

                bool success = redirectable_slaves.empty() ? start_ok : start_ok || redirects > 0;
                if (success)
                {
-                    op.time_remaining -= timer.restart();
-
                    // Step 6: Finally, add an event to the new master to advance gtid and wait for the slaves
                    // to receive it. If using external replication, skip this step. Come up with an
                    // alternative later.
@ -793,7 +832,6 @@ bool MariaDBMonitor::failover_perform(ClusterOperation& op)
 {
    mxb_assert(op.promotion_target && op.demotion_target);
    MariaDBServer* const promotion_target = op.promotion_target;
-    maxbase::StopWatch timer;

    // Step 1: Populate a vector with all slaves not the selected master.
    ServerArray redirectable_slaves = get_redirectables(promotion_target, op.demotion_target);
@ -802,17 +840,17 @@ bool MariaDBMonitor::failover_perform(ClusterOperation& op)
    // Step 2: Stop and reset slave, set read-only to 0.
    if (promotion_target->promote(op))
    {
+        // Point of no return. Even if following steps fail, do not try to undo.
        m_next_master = promotion_target;
        m_cluster_modified = true;

        // Step 3: Redirect slaves.
        ServerArray redirected_slaves;
-        int redirects = redirect_slaves(promotion_target, redirectable_slaves, &redirected_slaves);
+        int redirects = redirect_slaves_ex(op, redirectable_slaves, &redirected_slaves);
        bool success = redirectable_slaves.empty() ? true : redirects > 0;
        if (success)
        {
-            op.time_remaining -= timer.restart();
-
+            StopWatch timer;
            // Step 4: Finally, add an event to the new master to advance gtid and wait for the slaves
            // to receive it. seconds_remaining can be 0 or less at this point. Even in such a case
            // wait_cluster_stabilization() may succeed if replication is fast enough. If using external
@ -1679,9 +1717,9 @@ void MariaDBMonitor::check_cluster_operations_support()
 * @return The first connected slave or NULL if none found
 */
 const MariaDBServer* MariaDBMonitor::slave_receiving_events(const MariaDBServer* demotion_target,
-                                                            maxbase::Duration* event_age_out)
+                                                            maxbase::Duration*   event_age_out)
 {
-    auto time_now =  maxbase::Clock::now();
+    auto time_now = maxbase::Clock::now();
    maxbase::Clock::time_point alive_after = time_now - std::chrono::seconds(m_master_failure_timeout);

    const MariaDBServer* connected_slave = NULL;