From 00feb61b2349cfa03151369c6962e8a18dd22455 Mon Sep 17 00:00:00 2001
From: Esa Korhonen <esa.korhonen@mariadb.com>
Date: Fri, 30 Aug 2019 18:14:04 +0300
Subject: [PATCH 1/2] MXS-2652 Do not clear maintenance flag when a server goes
 down

The set of flags to clear should be well-defined.
---
 include/maxscale/monitor.hh                     | 4 ++++
 server/core/monitor.cc                          | 8 +-------
 server/modules/monitor/mariadbmon/mariadbmon.cc | 6 +++---
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/include/maxscale/monitor.hh b/include/maxscale/monitor.hh
index 5359b09b4..49de3c0c4 100644
--- a/include/maxscale/monitor.hh
+++ b/include/maxscale/monitor.hh
@@ -118,6 +118,10 @@ public:
     static int64_t get_time_ms();
 
 protected:
+    // When a monitor detects that a server is down, these bits should be cleared.
+    static constexpr uint64_t SERVER_DOWN_CLEAR_BITS {SERVER_RUNNING | SERVER_AUTH_ERROR | SERVER_MASTER
+        | SERVER_SLAVE | SERVER_SLAVE_OF_EXT_MASTER | SERVER_RELAY | SERVER_JOINED | SERVER_NDB};
+
     MonitorInstance(MXS_MONITOR* pMonitor);
 
     /**
diff --git a/server/core/monitor.cc b/server/core/monitor.cc
index 5c85a5138..3cded8eca 100644
--- a/server/core/monitor.cc
+++ b/server/core/monitor.cc
@@ -2839,18 +2839,12 @@ void MonitorInstanceSimple::tick()
                  * the server state bits. This would allow clearing the state by
                  * zeroing it out.
                  */
-                const uint64_t bits_to_clear = ~SERVER_WAS_MASTER;
-
-                monitor_clear_pending_status(pMs, bits_to_clear);
+                monitor_clear_pending_status(pMs, SERVER_DOWN_CLEAR_BITS);
 
                 if (mysql_errno(pMs->con) == ER_ACCESS_DENIED_ERROR)
                 {
                     monitor_set_pending_status(pMs, SERVER_AUTH_ERROR);
                 }
-                else
-                {
-                    monitor_clear_pending_status(pMs, SERVER_AUTH_ERROR);
-                }
 
                 if (mon_status_changed(pMs) && mon_print_fail_status(pMs))
                 {
diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc
index d0baf04e1..704360b6d 100644
--- a/server/modules/monitor/mariadbmon/mariadbmon.cc
+++ b/server/modules/monitor/mariadbmon/mariadbmon.cc
@@ -379,9 +379,9 @@ void MariaDBMonitor::update_server(MariaDBServer* server)
     }
     else
     {
-        /* The current server is not running. Clear all but the stale master bit as it is used to detect
-         * masters that went down but came up. */
-        server->clear_status(~SERVER_WAS_MASTER);
+        // The server is not running. Clear some of the bits. User-set bits and some long-term bits
+        // can stay.
+        server->clear_status(SERVER_DOWN_CLEAR_BITS);
         auto conn_errno = mysql_errno(conn);
         if (conn_errno == ER_ACCESS_DENIED_ERROR || conn_errno == ER_ACCESS_DENIED_NO_PASSWORD_ERROR)
         {

From ff2048625bab144930fa8dcc3a99c677f6f0dbf9 Mon Sep 17 00:00:00 2001
From: Esa Korhonen <esa.korhonen@mariadb.com>
Date: Thu, 5 Sep 2019 09:57:44 +0300
Subject: [PATCH 2/2] MXS-2652 Add test

Test case added to an existing test.
---
 maxscale-system-test/CMakeLists.txt           |  2 +-
 .../mysqlmon_failover_no_slaves.cpp           | 81 +++++++++++++++++--
 2 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/maxscale-system-test/CMakeLists.txt b/maxscale-system-test/CMakeLists.txt
index 49fa2c6e5..3440f5f70 100644
--- a/maxscale-system-test/CMakeLists.txt
+++ b/maxscale-system-test/CMakeLists.txt
@@ -246,7 +246,7 @@ add_test_executable(mysqlmon_switchover.cpp mysqlmon_switchover mysqlmon_switcho
 # MySQL Monitor switchover with bad master
 add_test_executable(mysqlmon_switchover_bad_master.cpp mysqlmon_switchover_bad_master mysqlmon_switchover_bad_master LABELS mysqlmon REPL_BACKEND)
 
-# MySQL Monitor manual failover with no valid slaves, uses config of mysqlmon_failover_auto
+# MySQL Monitor manual failover with no valid slaves, uses config of mysqlmon_failover_auto. Also MXS-2652.
 add_test_executable(mysqlmon_failover_no_slaves.cpp mysqlmon_failover_no_slaves mysqlmon_failover_auto LABELS mysqlmon REPL_BACKEND)
 
 # MySQL Monitor Rejoin (good) Test
diff --git a/maxscale-system-test/mysqlmon_failover_no_slaves.cpp b/maxscale-system-test/mysqlmon_failover_no_slaves.cpp
index 97c22aad9..95f9755b5 100644
--- a/maxscale-system-test/mysqlmon_failover_no_slaves.cpp
+++ b/maxscale-system-test/mysqlmon_failover_no_slaves.cpp
@@ -11,8 +11,21 @@
  * Public License.
  */
 
+// MXS-2652: https://jira.mariadb.org/browse/MXS-2652
+
 #include "testconnections.h"
 #include "fail_switch_rejoin_common.cpp"
+using std::string;
+
+namespace
+{
+void expect_maintenance(TestConnections& test, std::string server_name, bool value);
+void expect_running(TestConnections& test, std::string server_name, bool value);
+
+const string running = "Running";
+const string down = "Down";
+const string maint = "Maintenance";
+}
 
 int main(int argc, char** argv)
 {
@@ -38,13 +51,33 @@ int main(int argc, char** argv)
     test.repl->stash_server_settings(2);
     test.repl->disable_server_setting(2, "log-bin");
     test.repl->start_node(2, (char*) "");
-    // Slave 3. Stop this slave as well. The monitor is quick to turn failover off it a slave has another
-    // slave connection or if a slave connection is not using gtid, so those situations are hard to test.
-    // This may change later when failover support for such situations is added, so add a more interesting
-    // test for node 3 then.
-    test.try_query(nodes[3], "STOP SLAVE");
-
     test.maxscales->wait_for_monitor();
+
+    // Slave 3. Set node to maintenance, then shut it down. Simultaneously check issue
+    // MXS-2652: Maintenance flag should persist when server goes down & comes back up.
+    int server_ind = 3;
+    int server_num = server_ind + 1;
+    string server_name = "server" + std::to_string(server_num);
+    expect_maintenance(test, server_name, false);
+
+    if (test.ok())
+    {
+        test.maxscales->ssh_node_f(0, true, "maxadmin set server %s maintenance", server_name.c_str());
+        test.maxscales->wait_for_monitor();
+        expect_running(test, server_name, true);
+        expect_maintenance(test, server_name, true);
+
+        test.repl->stop_node(server_ind);
+        test.maxscales->wait_for_monitor();
+        expect_running(test, server_name, false);
+        expect_maintenance(test, server_name, true);
+
+        test.repl->start_node(server_ind);
+        test.maxscales->wait_for_monitor();
+        expect_running(test, server_name, true);
+        expect_maintenance(test, server_name, true);
+    }
+
     get_output(test);
 
     test.tprintf(LINE);
@@ -62,7 +95,41 @@ int main(int argc, char** argv)
     test.repl->stop_node(2);
     test.repl->restore_server_settings(2);
     test.repl->start_node(2, (char*) "");
-    test.try_query(nodes[3], "START SLAVE;");
+    test.maxscales->ssh_node_f(0, true, "maxadmin clear server %s maintenance", server_name.c_str());
+
     test.repl->fix_replication();
     return test.global_result;
 }
+
+namespace
+{
+void expect_running(TestConnections& test, std::string server_name, bool value)
+{
+    auto states = test.get_server_status(server_name.c_str());
+    if (value)
+    {
+        test.expect(states.count(running) == 1, "'%s' is not running when it should be.",
+                    server_name.c_str());
+    }
+    else
+    {
+        test.expect(states.count(down) == 1, "'%s' is not down when it should be.",
+                    server_name.c_str());
+    }
+}
+
+void expect_maintenance(TestConnections& test, std::string server_name, bool value)
+{
+    auto states = test.get_server_status(server_name.c_str());
+    if (value)
+    {
+        test.expect(states.count(maint) == 1, "'%s' is not in maintenance when it should be.",
+                    server_name.c_str());
+    }
+    else
+    {
+        test.expect(states.count(maint) == 0, "'%s' is in maintenance when it should not be.",
+                    server_name.c_str());
+    }
+}
+}