diff --git a/maxscale-system-test/.gitignore b/maxscale-system-test/.gitignore index 17c1ca033..c67b0e42f 100644 --- a/maxscale-system-test/.gitignore +++ b/maxscale-system-test/.gitignore @@ -167,6 +167,7 @@ mxs957 mysqlmon_failover_auto mysqlmon_failover_manual mysqlmon_rejoin_good +mysqlmon_rejoin_bad namedserverfilter no_password non_native_setup diff --git a/maxscale-system-test/CMakeLists.txt b/maxscale-system-test/CMakeLists.txt index 6101e179b..bc82b8ec3 100644 --- a/maxscale-system-test/CMakeLists.txt +++ b/maxscale-system-test/CMakeLists.txt @@ -266,9 +266,12 @@ add_test_executable(mysqlmon_failover_manual2.cpp mysqlmon_failover_manual2_2 my # MySQL Monitor manual failover with bad master add_test_executable(mysqlmon_failover_bad_master.cpp mysqlmon_failover_bad_master mysqlmon_failover_bad_master LABELS mysqlmon REPL_BACKEND) -# MySQL Monitor Rejoin Test +# MySQL Monitor Rejoin (good) Test add_test_executable(mysqlmon_rejoin_good.cpp mysqlmon_rejoin_good mysqlmon_rejoin_good LABELS mysqlmon REPL_BACKEND) +# MySQL Monitor Rejoin (bad) Test, use template for Rejoin (good) +add_test_executable(mysqlmon_rejoin_bad.cpp mysqlmon_rejoin_bad mysqlmon_rejoin_good LABELS mysqlmon REPL_BACKEND) + # MySQL Monitor rolling master add_test_executable(mysqlmon_failover_rolling_master.cpp mysqlmon_failover_rolling_master mysqlmon_failover_rolling_master LABELS mysqlmon REPL_BACKEND) diff --git a/maxscale-system-test/mysqlmon_failover_common.cpp b/maxscale-system-test/fail_switch_rejoin_common.cpp similarity index 50% rename from maxscale-system-test/mysqlmon_failover_common.cpp rename to maxscale-system-test/fail_switch_rejoin_common.cpp index 7eef7e689..afb641383 100644 --- a/maxscale-system-test/mysqlmon_failover_common.cpp +++ b/maxscale-system-test/fail_switch_rejoin_common.cpp @@ -3,6 +3,13 @@ int inserts = 0; bool interactive = false; +const char LINE[] = "------------------------------------------"; +const char PRINT_ID[] = "Master server id is %d."; +const char WRONG_SLAVE[] = "Wrong slave was promoted or promotion failed."; +const char GTID_QUERY[] = "SELECT @@gtid_current_pos;"; +const char GTID_FIELD[] = "@@gtid_current_pos"; +const int bufsize = 512; + void get_output(TestConnections& test) { int ec; @@ -101,15 +108,12 @@ void fix_replication_create_table(TestConnections& test) void delete_slave_binlogs(TestConnections& test) { const char RESET[] = "RESET MASTER;"; + execute_query(test.repl->nodes[0], "SET GLOBAL gtid_slave_pos='0-1-0';"); execute_query(test.repl->nodes[1], RESET); execute_query(test.repl->nodes[2], RESET); execute_query(test.repl->nodes[3], RESET); } -const char LINE[] = "------------------------------------------"; -const char PRINT_ID[] = "Master server id is %d."; -const char WRONG_SLAVE[] = "Wrong slave was promoted or promotion failed."; - void basic_test(TestConnections& test) { test.tprintf("Creating table and inserting data."); @@ -121,102 +125,72 @@ void basic_test(TestConnections& test) check(test); get_output(test); } -int prepare_test_1(TestConnections& test) + +/** + * Do inserts, check that results are as expected. + * + * @param test Test connections + * @paran insert_count + */ +void generate_traffic_and_check(TestConnections& test, MYSQL* conn, int insert_count) { - delete_slave_binlogs(test); - test.tprintf("Test 1: Stopping master and waiting for failover. Check that another server is promoted.\n" - "%s", LINE); - get_input(); - int node0_id = test.repl->get_server_id(0); // Read master id now before shutdown. - test.repl->stop_node(0); - return node0_id; + const char INSERT[] = "INSERT INTO test.t1 VALUES (%d);"; + const char SELECT[] = "SELECT * FROM test.t1 ORDER BY id ASC;"; + for (int i = 0; i < insert_count; i++) + { + test.try_query(conn, INSERT, inserts++); + timespec time; + time.tv_sec = 0; + time.tv_nsec = 100000000; + nanosleep(&time, NULL); + } + + mysql_query(conn, SELECT); + MYSQL_RES *res = mysql_store_result(conn); + test.assert(res != NULL, "Query did not return a result set"); + + if (res) + { + MYSQL_ROW row; + // Check all values, they should go from 0 to 'inserts' + int expected_val = 0; + while ((row = mysql_fetch_row(res))) + { + int value_read = strtol(row[0], NULL, 0); + if (value_read != expected_val) + { + test.assert(false, "Query returned %d when %d was expected", value_read, expected_val); + break; + } + expected_val++; + } + int num_rows = expected_val; + test.assert(num_rows == inserts, "Query returned %d rows when %d rows were expected", + num_rows, inserts); + mysql_free_result(res); + } + // mysql_close(conn); } -void check_test_1(TestConnections& test, int node0_id) +void print_gtids(TestConnections& test) { - check(test); - get_output(test); - int master_id = get_master_server_id(test); - test.tprintf(PRINT_ID, master_id); - test.add_result(master_id < 1 && master_id == node0_id, "Master did not change or no master detected."); - fix_replication_create_table(test); + MYSQL* maxconn = test.maxscales->open_rwsplit_connection(0); + if (maxconn) + { + char result_tmp[bufsize]; + if (find_field(maxconn, GTID_QUERY, GTID_FIELD, result_tmp) == 0) + { + test.tprintf("MaxScale gtid: %s", result_tmp); + } + } + mysql_close(maxconn); test.repl->connect(); -} - -void prepare_test_2(TestConnections& test) -{ - delete_slave_binlogs(test); - test.tprintf("Test 2: Disable replication on server 2 and kill master, check that server 3 or 4 is " - "promoted.\n%s", LINE); - get_input(); - execute_query(test.repl->nodes[1], "STOP SLAVE; RESET SLAVE ALL;"); - sleep(2); - test.repl->stop_node(0); -} - -void check_test_2(TestConnections& test) -{ - check(test); - get_output(test); - - int master_id = get_master_server_id(test); - test.tprintf(PRINT_ID, master_id); - test.add_result(master_id < 1 || - (master_id != test.repl->get_server_id(2) && master_id != test.repl->get_server_id(3)), - WRONG_SLAVE); - fix_replication_create_table(test); - test.repl->connect(); -} -void prepare_test_3(TestConnections& test) -{ - delete_slave_binlogs(test); - test.tprintf("Test3: Shutdown two slaves (servers 2 and 4). Disable log_bin on server 2, making it " - "invalid for promotion. Enable log-slave-updates on servers 2 and 4. Check that server 4 is " - "promoted on master failure.\n%s", LINE); - get_input(); - - test.repl->stop_node(1); - test.repl->stop_node(3); - test.repl->stash_server_settings(1); - test.repl->stash_server_settings(3); - test.repl->disable_server_setting(1, "log-bin"); - const char* log_slave = "log_slave_updates=1"; - test.repl->add_server_setting(1, log_slave); - test.repl->add_server_setting(3, log_slave); - test.repl->start_node(1, (char *) ""); - test.repl->start_node(3, (char *) ""); - sleep(4); - test.tprintf("Settings changed."); - get_output(test); - test.tprintf("Stopping master."); - test.repl->stop_node(0); -} - -void check_test_3(TestConnections& test) -{ - check(test); - get_output(test); - - int master_id = get_master_server_id(test); - // Because servers have been restarted, redo connections. - test.repl->connect(); - sleep(2); - test.tprintf(PRINT_ID, master_id); - test.add_result(master_id < 1 || master_id != test.repl->get_server_id(3), WRONG_SLAVE); - // Restore server 2 and 4 settings. Because server 4 is now the master, shutting it down causes - // another failover. Prevent this by stopping maxscale. - test.tprintf("Restoring server settings."); - test.maxscales->stop_maxscale(0); - test.repl->stop_node(1); - test.repl->stop_node(3); - sleep(4); - test.repl->restore_server_settings(1); - test.repl->restore_server_settings(3); - test.repl->start_node(0, (char *) ""); - test.repl->start_node(1, (char *) ""); - test.repl->start_node(3, (char *) ""); - sleep(4); - test.maxscales->start_maxscale(0); - sleep(2); - get_output(test); + for (int i = 0; i < test.repl->N; i++) + { + char result_tmp[bufsize]; + if (find_field(test.repl->nodes[i], GTID_QUERY, GTID_FIELD, result_tmp) == 0) + { + test.tprintf("Node %d gtid: %s", i, result_tmp); + } + } } diff --git a/maxscale-system-test/failover_common.cpp b/maxscale-system-test/failover_common.cpp new file mode 100644 index 000000000..980d59004 --- /dev/null +++ b/maxscale-system-test/failover_common.cpp @@ -0,0 +1,101 @@ +#include "fail_switch_rejoin_common.cpp" + +int prepare_test_1(TestConnections& test) +{ + delete_slave_binlogs(test); + test.tprintf("Test 1: Stopping master and waiting for failover. Check that another server is promoted.\n" + "%s", LINE); + get_input(); + int node0_id = test.repl->get_server_id(0); // Read master id now before shutdown. + test.repl->stop_node(0); + return node0_id; +} + +void check_test_1(TestConnections& test, int node0_id) +{ + check(test); + get_output(test); + int master_id = get_master_server_id(test); + test.tprintf(PRINT_ID, master_id); + test.add_result(master_id < 1 && master_id == node0_id, "Master did not change or no master detected."); + fix_replication_create_table(test); + test.repl->connect(); +} + +void prepare_test_2(TestConnections& test) +{ + delete_slave_binlogs(test); + test.tprintf("Test 2: Disable replication on server 2 and kill master, check that server 3 or 4 is " + "promoted.\n%s", LINE); + get_input(); + execute_query(test.repl->nodes[1], "STOP SLAVE; RESET SLAVE ALL;"); + sleep(2); + test.repl->stop_node(0); +} + +void check_test_2(TestConnections& test) +{ + check(test); + get_output(test); + + int master_id = get_master_server_id(test); + test.tprintf(PRINT_ID, master_id); + test.add_result(master_id < 1 || + (master_id != test.repl->get_server_id(2) && master_id != test.repl->get_server_id(3)), + WRONG_SLAVE); + fix_replication_create_table(test); + test.repl->connect(); +} +void prepare_test_3(TestConnections& test) +{ + delete_slave_binlogs(test); + test.tprintf("Test3: Shutdown two slaves (servers 2 and 4). Disable log_bin on server 2, making it " + "invalid for promotion. Enable log-slave-updates on servers 2 and 4. Check that server 4 is " + "promoted on master failure.\n%s", LINE); + get_input(); + + test.repl->stop_node(1); + test.repl->stop_node(3); + test.repl->stash_server_settings(1); + test.repl->stash_server_settings(3); + test.repl->disable_server_setting(1, "log-bin"); + const char* log_slave = "log_slave_updates=1"; + test.repl->add_server_setting(1, log_slave); + test.repl->add_server_setting(3, log_slave); + test.repl->start_node(1, (char *) ""); + test.repl->start_node(3, (char *) ""); + sleep(4); + test.tprintf("Settings changed."); + get_output(test); + test.tprintf("Stopping master."); + test.repl->stop_node(0); +} + +void check_test_3(TestConnections& test) +{ + check(test); + get_output(test); + + int master_id = get_master_server_id(test); + // Because servers have been restarted, redo connections. + test.repl->connect(); + sleep(2); + test.tprintf(PRINT_ID, master_id); + test.add_result(master_id < 1 || master_id != test.repl->get_server_id(3), WRONG_SLAVE); + // Restore server 2 and 4 settings. Because server 4 is now the master, shutting it down causes + // another failover. Prevent this by stopping maxscale. + test.tprintf("Restoring server settings."); + test.maxscales->stop_maxscale(0); + test.repl->stop_node(1); + test.repl->stop_node(3); + sleep(4); + test.repl->restore_server_settings(1); + test.repl->restore_server_settings(3); + test.repl->start_node(0, (char *) ""); + test.repl->start_node(1, (char *) ""); + test.repl->start_node(3, (char *) ""); + sleep(4); + test.maxscales->start_maxscale(0); + sleep(2); + get_output(test); +} diff --git a/maxscale-system-test/mysqlmon_failover_auto.cpp b/maxscale-system-test/mysqlmon_failover_auto.cpp index 86a600105..6de5915dd 100644 --- a/maxscale-system-test/mysqlmon_failover_auto.cpp +++ b/maxscale-system-test/mysqlmon_failover_auto.cpp @@ -3,7 +3,7 @@ */ #include "testconnections.h" -#include "mysqlmon_failover_common.cpp" +#include "failover_common.cpp" int main(int argc, char** argv) { diff --git a/maxscale-system-test/mysqlmon_failover_manual.cpp b/maxscale-system-test/mysqlmon_failover_manual.cpp index 744f07e3f..cc3fbe70e 100644 --- a/maxscale-system-test/mysqlmon_failover_manual.cpp +++ b/maxscale-system-test/mysqlmon_failover_manual.cpp @@ -3,7 +3,7 @@ */ #include "testconnections.h" -#include "mysqlmon_failover_common.cpp" +#include "failover_common.cpp" int main(int argc, char** argv) { diff --git a/maxscale-system-test/mysqlmon_rejoin_bad.cpp b/maxscale-system-test/mysqlmon_rejoin_bad.cpp new file mode 100644 index 000000000..2ddf81735 --- /dev/null +++ b/maxscale-system-test/mysqlmon_rejoin_bad.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2020-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#include + +#include "testconnections.h" +#include "fail_switch_rejoin_common.cpp" + +using std::string; + +int main(int argc, char** argv) +{ + char result_tmp[bufsize]; + interactive = strcmp(argv[argc - 1], "interactive") == 0; + TestConnections test(argc, argv); + MYSQL* maxconn = test.maxscales->open_rwsplit_connection(0); + + // Set up test table + basic_test(test); + // Delete binlogs to sync gtid:s + delete_slave_binlogs(test); + // Advance gtid:s a bit to so gtid variables are updated. + generate_traffic_and_check(test, maxconn, 10); + test.repl->sync_slaves(0); + + test.tprintf(LINE); + print_gtids(test); + test.tprintf(LINE); + string gtid_begin; + if (find_field(maxconn, GTID_QUERY, GTID_FIELD, result_tmp) == 0) + { + gtid_begin = result_tmp; + } + mysql_close(maxconn); + test.tprintf("Stopping MaxScale..."); + // Mess with the slaves to fix situation such that only one slave can be rejoined. Stop maxscale. + if (test.stop_maxscale(0)) + { + test.assert(false, "Could not stop MaxScale."); + return test.global_result; + } + + // Leave first of three slaves connected so it's clear which one is the master server. + const char STOP_SLAVE[] = "STOP SLAVE;"; + const char RESET_SLAVE[] = "RESET SLAVE ALL;"; + const char READ_ONLY_OFF[] = "SET GLOBAL read_only=0;"; + test.repl->connect(); + const int FIRST_MOD_NODE = 2; // Modify nodes 2 & 3 + const int NODE_COUNT = test.repl->N; + MYSQL** nodes = test.repl->nodes; + + for (int i = FIRST_MOD_NODE; i < NODE_COUNT; i++) + { + if (mysql_query(nodes[i], STOP_SLAVE) != 0 || + mysql_query(nodes[i], RESET_SLAVE) != 0 || + mysql_query(nodes[i], READ_ONLY_OFF) != 0) + { + test.assert(false, "Could not stop slave connections and/or disable read_only for node %d.", i); + return test.global_result; + } + } + + // Add more events to node3. + string gtid_node2, gtid_node3; + test.tprintf("Sending more inserts to server 4."); + generate_traffic_and_check(test, nodes[3], 10); + // Save gtids + if (find_field(nodes[2], GTID_QUERY, GTID_FIELD, result_tmp) == 0) + { + gtid_node2 = result_tmp; + } + if (find_field(nodes[3], GTID_QUERY, GTID_FIELD, result_tmp) == 0) + { + gtid_node3 = result_tmp; + } + print_gtids(test); + bool gtids_ok = (gtid_begin == gtid_node2 && gtid_node2 < gtid_node3); + test.assert(gtids_ok, "Gtid:s have not advanced correctly."); + if (!gtids_ok) + { + return test.global_result; + } + test.tprintf("Restarting MaxScale. Server 4 should not rejoin the cluster."); + test.tprintf(LINE); + if (test.start_maxscale(0)) + { + test.assert(false, "Could not start MaxScale."); + return test.global_result; + } + sleep(5); + get_output(test); + + StringSet node2_states = test.get_server_status("server3"); + StringSet node3_states = test.get_server_status("server4"); + bool states_n2_ok = (node2_states.find("Slave") != node2_states.end()); + bool states_n3_ok = (node3_states.find("Slave") == node3_states.end()); + test.assert(states_n2_ok, "Node 2 has not rejoined when it should have."); + test.assert(states_n3_ok, "Node 3 rejoined when it shouldn't have."); + if (!states_n2_ok || !states_n3_ok) + { + return test.global_result; + } + // Finally, fix replication by telling the current master to replicate from server4 + test.tprintf("Setting server 1 to replicate from server 4. Auto-rejoin should redirect servers 2 and 3."); + const char CHANGE_CMD_FMT[] = "CHANGE MASTER TO MASTER_HOST = '%s', MASTER_PORT = %d, " + "MASTER_USE_GTID = current_pos, MASTER_USER='repl', MASTER_PASSWORD = 'repl';"; + char cmd[256]; + snprintf(cmd, sizeof(cmd), CHANGE_CMD_FMT, test.repl->IP[3], test.repl->port[3]); + mysql_query(nodes[0], cmd); + mysql_query(nodes[0], "START SLAVE;"); + sleep(5); + get_output(test); + int master_id = get_master_server_id(test); + test.assert(master_id == 4, "Server 4 should be the cluster master."); + StringSet node0_states = test.get_server_status("server1"); + bool states_n0_ok = (node0_states.find("Slave") != node0_states.end() && + node0_states.find("Relay Master") == node0_states.end()); + test.assert(states_n0_ok, "Server 1 is not a slave when it should be."); + if (states_n0_ok) + { + int ec; + test.maxscales->ssh_node_output(0, + "maxadmin call command mysqlmon switchover MySQL-Monitor server1 server4" , true, &ec); + sleep(1); + master_id = get_master_server_id(test); + test.assert(master_id == 1, "Server 1 should be the cluster master."); + get_output(test); + } + + test.repl->fix_replication(); + return test.global_result; +} diff --git a/maxscale-system-test/mysqlmon_rejoin_good.cpp b/maxscale-system-test/mysqlmon_rejoin_good.cpp index c692459ab..1b3a16961 100644 --- a/maxscale-system-test/mysqlmon_rejoin_good.cpp +++ b/maxscale-system-test/mysqlmon_rejoin_good.cpp @@ -11,100 +11,22 @@ * Public License. */ -#include - -#include "testconnections.h" -#include "mysqlmon_failover_common.cpp" +#include "fail_switch_rejoin_common.cpp" using std::string; -typedef std::vector StringVector; - -const char GTID_QUERY[] = "SELECT @@gtid_current_pos;"; -const char GTID_FIELD[] = "@@gtid_current_pos"; -const int bufsize = 512; -/** - * Do inserts, check that results are as expected. - * - * @param test Test connections - * @paran insert_count - */ -void generate_traffic_and_check(TestConnections& test, int insert_count) -{ - MYSQL *conn = test.maxscales->open_rwsplit_connection(0); - const char INSERT[] = "INSERT INTO test.t1 VALUES (%d);"; - const char SELECT[] = "SELECT * FROM test.t1 ORDER BY id ASC;"; - for (int i = 0; i < insert_count; i++) - { - test.try_query(conn, INSERT, inserts++); - timespec time; - time.tv_sec = 0; - time.tv_nsec = 100000000; - nanosleep(&time, NULL); - } - - mysql_query(conn, SELECT); - MYSQL_RES *res = mysql_store_result(conn); - test.assert(res != NULL, "Query did not return a result set"); - - if (res) - { - MYSQL_ROW row; - // Check all values, they should go from 0 to 'inserts' - int expected_val = 0; - while ((row = mysql_fetch_row(res))) - { - int value_read = strtol(row[0], NULL, 0); - if (value_read != expected_val) - { - test.assert(false, "Query returned %d when %d was expected", value_read, expected_val); - break; - } - expected_val++; - } - int num_rows = expected_val; - test.assert(num_rows == inserts, "Query returned %d rows when %d rows were expected", - num_rows, inserts); - mysql_free_result(res); - } - mysql_close(conn); -} - -void print_gtids(TestConnections& test) -{ - MYSQL* maxconn = test.maxscales->open_rwsplit_connection(0); - if (maxconn) - { - char result_tmp[bufsize]; - if (find_field(maxconn, GTID_QUERY, GTID_FIELD, result_tmp) == 0) - { - test.tprintf("MaxScale gtid: %s", result_tmp); - } - } - mysql_close(maxconn); - test.repl->connect(); - for (int i = 0; i < test.repl->N; i++) - { - char result_tmp[bufsize]; - if (find_field(test.repl->nodes[i], GTID_QUERY, GTID_FIELD, result_tmp) == 0) - { - test.tprintf("Node %d gtid: %s", i, result_tmp); - } - } -} int main(int argc, char** argv) { interactive = strcmp(argv[argc - 1], "interactive") == 0; TestConnections test(argc, argv); MYSQL* maxconn = test.maxscales->open_rwsplit_connection(0); - // Set up test table basic_test(test); // Delete binlogs to sync gtid:s delete_slave_binlogs(test); char result_tmp[bufsize]; // Advance gtid:s a bit to so gtid variables are updated. - generate_traffic_and_check(test, 10); + generate_traffic_and_check(test, maxconn, 10); sleep(1); test.tprintf(LINE); print_gtids(test); @@ -129,7 +51,7 @@ int main(int argc, char** argv) if (failover_ok) { test.tprintf("Sending more inserts."); - generate_traffic_and_check(test, 5); + generate_traffic_and_check(test, maxconn, 5); sleep(1); if (find_field(maxconn, GTID_QUERY, GTID_FIELD, result_tmp) == 0) { diff --git a/maxscale-system-test/testconnections.cpp b/maxscale-system-test/testconnections.cpp index aba933c32..c8827cfde 100644 --- a/maxscale-system-test/testconnections.cpp +++ b/maxscale-system-test/testconnections.cpp @@ -1259,6 +1259,13 @@ int TestConnections::stop_maxscale(int m) return res; } +int TestConnections::start_maxscale(int m) +{ + int res = maxscales->ssh_node(m, "service maxscale start", true); + check_maxscale_processes(m, 1); + fflush(stdout); + return res; +} int TestConnections::check_maxscale_alive(int m) { diff --git a/maxscale-system-test/testconnections.h b/maxscale-system-test/testconnections.h index a886a3e85..c6d42c52b 100644 --- a/maxscale-system-test/testconnections.h +++ b/maxscale-system-test/testconnections.h @@ -490,7 +490,7 @@ public: void check_current_operations(int m, int value); void check_current_connections(int m, int value); int stop_maxscale(int m); - + int start_maxscale(int m); void process_template(const char *src, const char *dest = "/etc/maxscale.cnf"); private: