MXS-1845 Add demotion code

The master demotion in switchover now uses query retrying with the switchover time limit.
2018-10-02 11:08:12 +03:00
parent a438a6df34
commit 49e85d9a28
3 changed files with 125 additions and 10 deletions
--- a/server/modules/monitor/mariadbmon/cluster_manipulation.cc
+++ b/server/modules/monitor/mariadbmon/cluster_manipulation.cc
@ -727,8 +727,6 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
    json_t** const error_out = op.error_out;
    mxb_assert(promotion_target && demotion_target);

-    maxbase::StopWatch timer;
-
    // Step 1: Save all slaves except promotion target to an array.
    // Try to redirect even disconnected slaves.
    // TODO: 'switchover_wait_slaves_catchup' needs to be smarter and not bother with such slaves.
@ -736,12 +734,11 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)

    bool rval = false;
    // Step 2: Set read-only to on, flush logs, update master gtid:s
-    if (switchover_demote_master(demotion_target, error_out))
+    if (demotion_target->demote(op))
    {
        m_cluster_modified = true;
        bool catchup_and_promote_success = false;
-        op.time_remaining -= timer.restart();
-
+        maxbase::StopWatch timer;
        // Step 3: Wait for the slaves (including promotion target) to catch up with master.
        ServerArray catchup_slaves = redirectable_slaves;
        catchup_slaves.push_back(promotion_target);
@ -750,7 +747,7 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
                                           op.time_remaining.secs(),
                                           error_out))
        {
-            auto step3_duration = timer.restart();
+            auto step3_duration = timer.lap();
            MXS_DEBUG("Switchover: slave catchup took %.1f seconds.", step3_duration.secs());
            op.time_remaining -= step3_duration;

@ -759,7 +756,7 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
            {
                catchup_and_promote_success = true;
                m_next_master = promotion_target;
-
+                timer.restart();
                // Step 5: Redirect slaves and start replication on old master.
                ServerArray redirected_slaves;
                bool start_ok = switchover_start_slave(demotion_target, promotion_target);
@ -772,6 +769,7 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
                bool success = redirectable_slaves.empty() ? start_ok : start_ok || redirects > 0;
                if (success)
                {
+                    op.time_remaining -= timer.lap();
                    // Step 6: Finally, add an event to the new master to advance gtid and wait for the slaves
                    // to receive it. If using external replication, skip this step. Come up with an
                    // alternative later.
@ -785,7 +783,7 @@ bool MariaDBMonitor::switchover_perform(ClusterOperation& op)
                                                        op.time_remaining.secs()))
                    {
                        rval = true;
-                        auto step6_duration = timer.restart();
+                        auto step6_duration = timer.lap();
                        op.time_remaining -= step6_duration;
                        MXS_DEBUG("Switchover: slave replication confirmation took %.1f seconds with "
                                  "%.1f seconds to spare.",
--- a/server/modules/monitor/mariadbmon/mariadbserver.cc
+++ b/server/modules/monitor/mariadbmon/mariadbserver.cc
@ -178,7 +178,8 @@ bool MariaDBServer::execute_cmd_no_retry(const std::string& cmd,
 *
 * @param cmd The query to execute. Should be a query with a predictable effect even when retried or
 * ran several times.
- * @param time_limit How long to retry
+ * @param time_limit How long to retry. This does not overwrite the connector-c timeouts which are always
+ * respected.
 * @param errmsg_out Error output
 * @return True, if successful.
 */
@ -1546,6 +1547,114 @@ bool MariaDBServer::promote(ClusterOperation& op)
    return success;
 }

+bool MariaDBServer::demote(ClusterOperation& op)
+{
+    mxb_assert(op.type == OperationType::SWITCHOVER && op.demotion_target == this);
+    json_t** error_out = op.error_out;
+    bool success = false;
+    StopWatch timer;
+
+    // Step 1: Stop & reset slave connections. The promotion target will copy them. The server object
+    // must not be updated before the connections have been copied.
+    bool stop_slave_error = false;
+    for (size_t i = 0; !stop_slave_error && i < m_slave_status.size(); i++)
+    {
+        if (!stop_slave_conn(&m_slave_status[i], StopMode::RESET_ALL, op.time_remaining, error_out))
+        {
+            stop_slave_error = true;
+        }
+        op.time_remaining -= timer.lap();
+    }
+
+    if (!stop_slave_error)
+    {
+        // Step 2: If this server is master, disable writes and scheduled events.
+        // Flush logs, update gtid:s, run demotion_sql_file.
+        bool demotion_error = false;
+        if (op.demotion_target_is_master)
+        {
+            mxb_assert(is_master());
+            // Step 2a: Enabling read-only can take time if writes are on or table locks taken.
+            bool ro_enabled = set_read_only(ReadOnlySetting::ENABLE, op.time_remaining, error_out);
+            op.time_remaining -= timer.lap();
+            if (!ro_enabled)
+            {
+                demotion_error = true;
+            }
+            else
+            {
+                if (op.handle_events)
+                {
+                    // TODO: Add query replying to enable_events
+                    // Step 2b: Using BINLOG_OFF to avoid adding any gtid events,
+                    // which could break external replication.
+                    bool events_disabled = disable_events(BinlogMode::BINLOG_OFF, error_out);
+                    op.time_remaining -= timer.lap();
+                    if (!events_disabled)
+                    {
+                        demotion_error = true;
+                        PRINT_MXS_JSON_ERROR(error_out, "Failed to disable events on %s.", name());
+                    }
+                }
+
+                if (!demotion_error)
+                {
+                    // Step 2c: FLUSH LOGS to ensure that all events have been written to binlog,
+                    // then update gtid:s.
+                    string error_msg;
+                    bool logs_flushed = execute_cmd_time_limit("FLUSH LOGS;", op.time_remaining, &error_msg);
+                    op.time_remaining -= timer.lap();
+                    if (logs_flushed)
+                    {
+                        if (!update_gtids(&error_msg))
+                        {
+                            demotion_error = true;
+                            PRINT_MXS_JSON_ERROR(error_out,
+                                                 "Failed to update gtid:s of %s during demotion: %s.",
+                                                 name(), error_msg.c_str());
+                        }
+                    }
+                    else
+                    {
+                        demotion_error = true;
+                        PRINT_MXS_JSON_ERROR(error_out,
+                                             "Failed to flush binary logs of %s during demotion: %s.",
+                                             name(), error_msg.c_str());
+                    }
+                }
+
+                // Step 2d: Run demotion_sql_file if no errors so far.
+                if (!demotion_error && !op.demotion_sql_file.empty())
+                {
+                    bool file_ran_ok = run_sql_from_file(op.demotion_sql_file, error_out);
+                    op.time_remaining -= timer.lap();
+                    if (!file_ran_ok)
+                    {
+                        demotion_error = true;
+                        PRINT_MXS_JSON_ERROR(error_out,
+                                             "Execution of file '%s' failed during demotion of server %s.",
+                                             op.demotion_sql_file.c_str(), name());
+                    }
+                }
+
+                if (demotion_error)
+                {
+                    // Read_only was enabled but a later step failed. Disable read_only. Connection is
+                    // likely broken so use a short time limit.
+                    // TODO: add smarter undo
+                    set_read_only(ReadOnlySetting::DISABLE, Duration((double)0), NULL);
+                }
+            }
+        }
+
+        if (!demotion_error)
+        {
+            success = true;
+        }
+    }
+    return success;
+}
+
 bool MariaDBServer::stop_slave_conn(SlaveStatus* slave_conn, StopMode mode, Duration time_limit,
                                    json_t** error_out)
 {
--- a/server/modules/monitor/mariadbmon/mariadbserver.hh
+++ b/server/modules/monitor/mariadbmon/mariadbserver.hh
@ -509,7 +509,15 @@ public:
     * @param op Cluster operation descriptor
     * @return True if successful
     */
-    bool promote(ClusterOperation& operation);
+    bool promote(ClusterOperation& op);
+
+    /**
+     * Demote this server. Removes all slave connections. If server was master, sets read_only.
+     *
+     * @param op Cluster operation descriptor
+     * @return True if successful
+     */
+    bool demote(ClusterOperation& op);

    /**
     * Redirect the slave connection going to demotion target to replicate from promotion target.