diff --git a/include/maxscale/monitor.hh b/include/maxscale/monitor.hh index b5804f102..31a8ba51d 100644 --- a/include/maxscale/monitor.hh +++ b/include/maxscale/monitor.hh @@ -151,6 +151,10 @@ public: static const int BEING_DRAINED_OFF = 3; static const int BEING_DRAINED_ON = 4; + // When a monitor detects that a server is down, these bits should be cleared. + static constexpr uint64_t SERVER_DOWN_CLEAR_BITS {SERVER_RUNNING | SERVER_AUTH_ERROR | SERVER_MASTER + | SERVER_SLAVE | SERVER_SLAVE_OF_EXT_MASTER | SERVER_RELAY | SERVER_JOINED}; + MonitorServer(SERVER* server, const SERVER::DiskSpaceLimits& monitor_limits); ~MonitorServer(); diff --git a/maxscale-system-test/CMakeLists.txt b/maxscale-system-test/CMakeLists.txt index 64d9507fb..02d19231e 100644 --- a/maxscale-system-test/CMakeLists.txt +++ b/maxscale-system-test/CMakeLists.txt @@ -241,7 +241,7 @@ add_test_executable(mysqlmon_switchover.cpp mysqlmon_switchover mysqlmon_switcho # MySQL Monitor switchover with bad master add_test_executable(mysqlmon_switchover_bad_master.cpp mysqlmon_switchover_bad_master mysqlmon_switchover_bad_master LABELS mysqlmon REPL_BACKEND) -# MySQL Monitor manual failover with no valid slaves, uses config of mysqlmon_failover_auto +# MySQL Monitor manual failover with no valid slaves, uses config of mysqlmon_failover_auto. Also MXS-2652. add_test_executable(mysqlmon_failover_no_slaves.cpp mysqlmon_failover_no_slaves mysqlmon_failover_auto LABELS mysqlmon REPL_BACKEND) # MySQL Monitor Rejoin (good) Test diff --git a/maxscale-system-test/mysqlmon_failover_no_slaves.cpp b/maxscale-system-test/mysqlmon_failover_no_slaves.cpp index baa11b51a..34b99fc2b 100644 --- a/maxscale-system-test/mysqlmon_failover_no_slaves.cpp +++ b/maxscale-system-test/mysqlmon_failover_no_slaves.cpp @@ -11,8 +11,21 @@ * Public License. */ +// MXS-2652: https://jira.mariadb.org/browse/MXS-2652 + #include "testconnections.h" #include "fail_switch_rejoin_common.cpp" +using std::string; + +namespace +{ +void expect_maintenance(TestConnections& test, std::string server_name, bool value); +void expect_running(TestConnections& test, std::string server_name, bool value); + +const string running = "Running"; +const string down = "Down"; +const string maint = "Maintenance"; +} int main(int argc, char** argv) { @@ -38,13 +51,33 @@ int main(int argc, char** argv) test.repl->stash_server_settings(2); test.repl->disable_server_setting(2, "log-bin"); test.repl->start_node(2, (char*) ""); - // Slave 3. Stop this slave as well. The monitor is quick to turn failover off it a slave has another - // slave connection or if a slave connection is not using gtid, so those situations are hard to test. - // This may change later when failover support for such situations is added, so add a more interesting - // test for node 3 then. - test.try_query(nodes[3], "STOP SLAVE"); - test.maxscales->wait_for_monitor(); + + // Slave 3. Set node to maintenance, then shut it down. Simultaneously check issue + // MXS-2652: Maintenance flag should persist when server goes down & comes back up. + int server_ind = 3; + int server_num = server_ind + 1; + string server_name = "server" + std::to_string(server_num); + expect_maintenance(test, server_name, false); + + if (test.ok()) + { + test.maxscales->ssh_node_f(0, true, "maxadmin set server %s maintenance", server_name.c_str()); + test.maxscales->wait_for_monitor(); + expect_running(test, server_name, true); + expect_maintenance(test, server_name, true); + + test.repl->stop_node(server_ind); + test.maxscales->wait_for_monitor(); + expect_running(test, server_name, false); + expect_maintenance(test, server_name, true); + + test.repl->start_node(server_ind); + test.maxscales->wait_for_monitor(); + expect_running(test, server_name, true); + expect_maintenance(test, server_name, true); + } + get_output(test); test.tprintf(LINE); @@ -62,7 +95,41 @@ int main(int argc, char** argv) test.repl->stop_node(2); test.repl->restore_server_settings(2); test.repl->start_node(2, (char*) ""); - test.try_query(nodes[3], "START SLAVE;"); + test.maxscales->ssh_node_f(0, true, "maxadmin clear server %s maintenance", server_name.c_str()); + test.repl->fix_replication(); return test.global_result; } + +namespace +{ +void expect_running(TestConnections& test, std::string server_name, bool value) +{ + auto states = test.get_server_status(server_name.c_str()); + if (value) + { + test.expect(states.count(running) == 1, "'%s' is not running when it should be.", + server_name.c_str()); + } + else + { + test.expect(states.count(down) == 1, "'%s' is not down when it should be.", + server_name.c_str()); + } +} + +void expect_maintenance(TestConnections& test, std::string server_name, bool value) +{ + auto states = test.get_server_status(server_name.c_str()); + if (value) + { + test.expect(states.count(maint) == 1, "'%s' is not in maintenance when it should be.", + server_name.c_str()); + } + else + { + test.expect(states.count(maint) == 0, "'%s' is in maintenance when it should not be.", + server_name.c_str()); + } +} +} diff --git a/server/core/monitor.cc b/server/core/monitor.cc index c6635ac12..9a7b74b63 100644 --- a/server/core/monitor.cc +++ b/server/core/monitor.cc @@ -2105,18 +2105,12 @@ void MonitorWorkerSimple::tick() * the server state bits. This would allow clearing the state by * zeroing it out. */ - const uint64_t bits_to_clear = ~SERVER_WAS_MASTER; - - pMs->clear_pending_status(bits_to_clear); + pMs->clear_pending_status(MonitorServer::SERVER_DOWN_CLEAR_BITS); if (mysql_errno(pMs->con) == ER_ACCESS_DENIED_ERROR) { pMs->set_pending_status(SERVER_AUTH_ERROR); } - else - { - pMs->clear_pending_status(SERVER_AUTH_ERROR); - } if (pMs->status_changed() && pMs->should_print_fail_status()) { diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc index 247f22e20..a838b7177 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.cc +++ b/server/modules/monitor/mariadbmon/mariadbserver.cc @@ -2231,9 +2231,9 @@ void MariaDBServer::update_server(bool time_to_update_disk_space, } else { - /* The current server is not running. Clear all but the stale master bit as it is used to detect - * masters that went down but came up. */ - server->clear_status(~SERVER_WAS_MASTER); + /* The current server is not running. Clear some of the bits. User-set bits and some long-term bits + * can stay. */ + server->clear_status(MonitorServer::SERVER_DOWN_CLEAR_BITS); auto conn_errno = mysql_errno(conn); if (conn_errno == ER_ACCESS_DENIED_ERROR || conn_errno == ER_ACCESS_DENIED_NO_PASSWORD_ERROR) {