MXS-1565: Invalid rejoin test

- 1 master, 3 slaves
- stop maxscale so it does not autorejoin later on
- stop & reset slave on servers 3 & 4
- add data to server 4
- restart maxscale, check that server 3 is rejoined but not server 4
- manually set server 1 to replicate from server 4, creating a relay master
- check that servers 2 & 3 are redirected, making server 1 just a slave
- switchover master to server 1, check that it's the master

Also, moved some common functions into their own files. These functions
are used by multiple tests.
This commit is contained in:
Esa Korhonen
2017-12-13 18:33:42 +02:00
parent 2a9619dd07
commit c3fe8a6b55
10 changed files with 332 additions and 182 deletions

View File

@ -167,6 +167,7 @@ mxs957
mysqlmon_failover_auto mysqlmon_failover_auto
mysqlmon_failover_manual mysqlmon_failover_manual
mysqlmon_rejoin_good mysqlmon_rejoin_good
mysqlmon_rejoin_bad
namedserverfilter namedserverfilter
no_password no_password
non_native_setup non_native_setup

View File

@ -266,9 +266,12 @@ add_test_executable(mysqlmon_failover_manual2.cpp mysqlmon_failover_manual2_2 my
# MySQL Monitor manual failover with bad master # MySQL Monitor manual failover with bad master
add_test_executable(mysqlmon_failover_bad_master.cpp mysqlmon_failover_bad_master mysqlmon_failover_bad_master LABELS mysqlmon REPL_BACKEND) add_test_executable(mysqlmon_failover_bad_master.cpp mysqlmon_failover_bad_master mysqlmon_failover_bad_master LABELS mysqlmon REPL_BACKEND)
# MySQL Monitor Rejoin Test # MySQL Monitor Rejoin (good) Test
add_test_executable(mysqlmon_rejoin_good.cpp mysqlmon_rejoin_good mysqlmon_rejoin_good LABELS mysqlmon REPL_BACKEND) add_test_executable(mysqlmon_rejoin_good.cpp mysqlmon_rejoin_good mysqlmon_rejoin_good LABELS mysqlmon REPL_BACKEND)
# MySQL Monitor Rejoin (bad) Test, use template for Rejoin (good)
add_test_executable(mysqlmon_rejoin_bad.cpp mysqlmon_rejoin_bad mysqlmon_rejoin_good LABELS mysqlmon REPL_BACKEND)
# MySQL Monitor rolling master # MySQL Monitor rolling master
add_test_executable(mysqlmon_failover_rolling_master.cpp mysqlmon_failover_rolling_master mysqlmon_failover_rolling_master LABELS mysqlmon REPL_BACKEND) add_test_executable(mysqlmon_failover_rolling_master.cpp mysqlmon_failover_rolling_master mysqlmon_failover_rolling_master LABELS mysqlmon REPL_BACKEND)

View File

@ -3,6 +3,13 @@
int inserts = 0; int inserts = 0;
bool interactive = false; bool interactive = false;
const char LINE[] = "------------------------------------------";
const char PRINT_ID[] = "Master server id is %d.";
const char WRONG_SLAVE[] = "Wrong slave was promoted or promotion failed.";
const char GTID_QUERY[] = "SELECT @@gtid_current_pos;";
const char GTID_FIELD[] = "@@gtid_current_pos";
const int bufsize = 512;
void get_output(TestConnections& test) void get_output(TestConnections& test)
{ {
int ec; int ec;
@ -101,15 +108,12 @@ void fix_replication_create_table(TestConnections& test)
void delete_slave_binlogs(TestConnections& test) void delete_slave_binlogs(TestConnections& test)
{ {
const char RESET[] = "RESET MASTER;"; const char RESET[] = "RESET MASTER;";
execute_query(test.repl->nodes[0], "SET GLOBAL gtid_slave_pos='0-1-0';");
execute_query(test.repl->nodes[1], RESET); execute_query(test.repl->nodes[1], RESET);
execute_query(test.repl->nodes[2], RESET); execute_query(test.repl->nodes[2], RESET);
execute_query(test.repl->nodes[3], RESET); execute_query(test.repl->nodes[3], RESET);
} }
const char LINE[] = "------------------------------------------";
const char PRINT_ID[] = "Master server id is %d.";
const char WRONG_SLAVE[] = "Wrong slave was promoted or promotion failed.";
void basic_test(TestConnections& test) void basic_test(TestConnections& test)
{ {
test.tprintf("Creating table and inserting data."); test.tprintf("Creating table and inserting data.");
@ -121,102 +125,72 @@ void basic_test(TestConnections& test)
check(test); check(test);
get_output(test); get_output(test);
} }
int prepare_test_1(TestConnections& test)
/**
* Do inserts, check that results are as expected.
*
* @param test Test connections
* @paran insert_count
*/
void generate_traffic_and_check(TestConnections& test, MYSQL* conn, int insert_count)
{ {
delete_slave_binlogs(test); const char INSERT[] = "INSERT INTO test.t1 VALUES (%d);";
test.tprintf("Test 1: Stopping master and waiting for failover. Check that another server is promoted.\n" const char SELECT[] = "SELECT * FROM test.t1 ORDER BY id ASC;";
"%s", LINE); for (int i = 0; i < insert_count; i++)
get_input(); {
int node0_id = test.repl->get_server_id(0); // Read master id now before shutdown. test.try_query(conn, INSERT, inserts++);
test.repl->stop_node(0); timespec time;
return node0_id; time.tv_sec = 0;
time.tv_nsec = 100000000;
nanosleep(&time, NULL);
} }
void check_test_1(TestConnections& test, int node0_id) mysql_query(conn, SELECT);
MYSQL_RES *res = mysql_store_result(conn);
test.assert(res != NULL, "Query did not return a result set");
if (res)
{ {
check(test); MYSQL_ROW row;
get_output(test); // Check all values, they should go from 0 to 'inserts'
int master_id = get_master_server_id(test); int expected_val = 0;
test.tprintf(PRINT_ID, master_id); while ((row = mysql_fetch_row(res)))
test.add_result(master_id < 1 && master_id == node0_id, "Master did not change or no master detected."); {
fix_replication_create_table(test); int value_read = strtol(row[0], NULL, 0);
if (value_read != expected_val)
{
test.assert(false, "Query returned %d when %d was expected", value_read, expected_val);
break;
}
expected_val++;
}
int num_rows = expected_val;
test.assert(num_rows == inserts, "Query returned %d rows when %d rows were expected",
num_rows, inserts);
mysql_free_result(res);
}
// mysql_close(conn);
}
void print_gtids(TestConnections& test)
{
MYSQL* maxconn = test.maxscales->open_rwsplit_connection(0);
if (maxconn)
{
char result_tmp[bufsize];
if (find_field(maxconn, GTID_QUERY, GTID_FIELD, result_tmp) == 0)
{
test.tprintf("MaxScale gtid: %s", result_tmp);
}
}
mysql_close(maxconn);
test.repl->connect(); test.repl->connect();
} for (int i = 0; i < test.repl->N; i++)
void prepare_test_2(TestConnections& test)
{ {
delete_slave_binlogs(test); char result_tmp[bufsize];
test.tprintf("Test 2: Disable replication on server 2 and kill master, check that server 3 or 4 is " if (find_field(test.repl->nodes[i], GTID_QUERY, GTID_FIELD, result_tmp) == 0)
"promoted.\n%s", LINE);
get_input();
execute_query(test.repl->nodes[1], "STOP SLAVE; RESET SLAVE ALL;");
sleep(2);
test.repl->stop_node(0);
}
void check_test_2(TestConnections& test)
{ {
check(test); test.tprintf("Node %d gtid: %s", i, result_tmp);
get_output(test);
int master_id = get_master_server_id(test);
test.tprintf(PRINT_ID, master_id);
test.add_result(master_id < 1 ||
(master_id != test.repl->get_server_id(2) && master_id != test.repl->get_server_id(3)),
WRONG_SLAVE);
fix_replication_create_table(test);
test.repl->connect();
} }
void prepare_test_3(TestConnections& test)
{
delete_slave_binlogs(test);
test.tprintf("Test3: Shutdown two slaves (servers 2 and 4). Disable log_bin on server 2, making it "
"invalid for promotion. Enable log-slave-updates on servers 2 and 4. Check that server 4 is "
"promoted on master failure.\n%s", LINE);
get_input();
test.repl->stop_node(1);
test.repl->stop_node(3);
test.repl->stash_server_settings(1);
test.repl->stash_server_settings(3);
test.repl->disable_server_setting(1, "log-bin");
const char* log_slave = "log_slave_updates=1";
test.repl->add_server_setting(1, log_slave);
test.repl->add_server_setting(3, log_slave);
test.repl->start_node(1, (char *) "");
test.repl->start_node(3, (char *) "");
sleep(4);
test.tprintf("Settings changed.");
get_output(test);
test.tprintf("Stopping master.");
test.repl->stop_node(0);
} }
void check_test_3(TestConnections& test)
{
check(test);
get_output(test);
int master_id = get_master_server_id(test);
// Because servers have been restarted, redo connections.
test.repl->connect();
sleep(2);
test.tprintf(PRINT_ID, master_id);
test.add_result(master_id < 1 || master_id != test.repl->get_server_id(3), WRONG_SLAVE);
// Restore server 2 and 4 settings. Because server 4 is now the master, shutting it down causes
// another failover. Prevent this by stopping maxscale.
test.tprintf("Restoring server settings.");
test.maxscales->stop_maxscale(0);
test.repl->stop_node(1);
test.repl->stop_node(3);
sleep(4);
test.repl->restore_server_settings(1);
test.repl->restore_server_settings(3);
test.repl->start_node(0, (char *) "");
test.repl->start_node(1, (char *) "");
test.repl->start_node(3, (char *) "");
sleep(4);
test.maxscales->start_maxscale(0);
sleep(2);
get_output(test);
} }

View File

@ -0,0 +1,101 @@
#include "fail_switch_rejoin_common.cpp"
int prepare_test_1(TestConnections& test)
{
delete_slave_binlogs(test);
test.tprintf("Test 1: Stopping master and waiting for failover. Check that another server is promoted.\n"
"%s", LINE);
get_input();
int node0_id = test.repl->get_server_id(0); // Read master id now before shutdown.
test.repl->stop_node(0);
return node0_id;
}
void check_test_1(TestConnections& test, int node0_id)
{
check(test);
get_output(test);
int master_id = get_master_server_id(test);
test.tprintf(PRINT_ID, master_id);
test.add_result(master_id < 1 && master_id == node0_id, "Master did not change or no master detected.");
fix_replication_create_table(test);
test.repl->connect();
}
void prepare_test_2(TestConnections& test)
{
delete_slave_binlogs(test);
test.tprintf("Test 2: Disable replication on server 2 and kill master, check that server 3 or 4 is "
"promoted.\n%s", LINE);
get_input();
execute_query(test.repl->nodes[1], "STOP SLAVE; RESET SLAVE ALL;");
sleep(2);
test.repl->stop_node(0);
}
void check_test_2(TestConnections& test)
{
check(test);
get_output(test);
int master_id = get_master_server_id(test);
test.tprintf(PRINT_ID, master_id);
test.add_result(master_id < 1 ||
(master_id != test.repl->get_server_id(2) && master_id != test.repl->get_server_id(3)),
WRONG_SLAVE);
fix_replication_create_table(test);
test.repl->connect();
}
void prepare_test_3(TestConnections& test)
{
delete_slave_binlogs(test);
test.tprintf("Test3: Shutdown two slaves (servers 2 and 4). Disable log_bin on server 2, making it "
"invalid for promotion. Enable log-slave-updates on servers 2 and 4. Check that server 4 is "
"promoted on master failure.\n%s", LINE);
get_input();
test.repl->stop_node(1);
test.repl->stop_node(3);
test.repl->stash_server_settings(1);
test.repl->stash_server_settings(3);
test.repl->disable_server_setting(1, "log-bin");
const char* log_slave = "log_slave_updates=1";
test.repl->add_server_setting(1, log_slave);
test.repl->add_server_setting(3, log_slave);
test.repl->start_node(1, (char *) "");
test.repl->start_node(3, (char *) "");
sleep(4);
test.tprintf("Settings changed.");
get_output(test);
test.tprintf("Stopping master.");
test.repl->stop_node(0);
}
void check_test_3(TestConnections& test)
{
check(test);
get_output(test);
int master_id = get_master_server_id(test);
// Because servers have been restarted, redo connections.
test.repl->connect();
sleep(2);
test.tprintf(PRINT_ID, master_id);
test.add_result(master_id < 1 || master_id != test.repl->get_server_id(3), WRONG_SLAVE);
// Restore server 2 and 4 settings. Because server 4 is now the master, shutting it down causes
// another failover. Prevent this by stopping maxscale.
test.tprintf("Restoring server settings.");
test.maxscales->stop_maxscale(0);
test.repl->stop_node(1);
test.repl->stop_node(3);
sleep(4);
test.repl->restore_server_settings(1);
test.repl->restore_server_settings(3);
test.repl->start_node(0, (char *) "");
test.repl->start_node(1, (char *) "");
test.repl->start_node(3, (char *) "");
sleep(4);
test.maxscales->start_maxscale(0);
sleep(2);
get_output(test);
}

View File

@ -3,7 +3,7 @@
*/ */
#include "testconnections.h" #include "testconnections.h"
#include "mysqlmon_failover_common.cpp" #include "failover_common.cpp"
int main(int argc, char** argv) int main(int argc, char** argv)
{ {

View File

@ -3,7 +3,7 @@
*/ */
#include "testconnections.h" #include "testconnections.h"
#include "mysqlmon_failover_common.cpp" #include "failover_common.cpp"
int main(int argc, char** argv) int main(int argc, char** argv)
{ {

View File

@ -0,0 +1,142 @@
/*
* Copyright (c) 2016 MariaDB Corporation Ab
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file and at www.mariadb.com/bsl11.
*
* Change Date: 2020-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2 or later of the General
* Public License.
*/
#include <vector>
#include "testconnections.h"
#include "fail_switch_rejoin_common.cpp"
using std::string;
int main(int argc, char** argv)
{
char result_tmp[bufsize];
interactive = strcmp(argv[argc - 1], "interactive") == 0;
TestConnections test(argc, argv);
MYSQL* maxconn = test.maxscales->open_rwsplit_connection(0);
// Set up test table
basic_test(test);
// Delete binlogs to sync gtid:s
delete_slave_binlogs(test);
// Advance gtid:s a bit to so gtid variables are updated.
generate_traffic_and_check(test, maxconn, 10);
test.repl->sync_slaves(0);
test.tprintf(LINE);
print_gtids(test);
test.tprintf(LINE);
string gtid_begin;
if (find_field(maxconn, GTID_QUERY, GTID_FIELD, result_tmp) == 0)
{
gtid_begin = result_tmp;
}
mysql_close(maxconn);
test.tprintf("Stopping MaxScale...");
// Mess with the slaves to fix situation such that only one slave can be rejoined. Stop maxscale.
if (test.stop_maxscale(0))
{
test.assert(false, "Could not stop MaxScale.");
return test.global_result;
}
// Leave first of three slaves connected so it's clear which one is the master server.
const char STOP_SLAVE[] = "STOP SLAVE;";
const char RESET_SLAVE[] = "RESET SLAVE ALL;";
const char READ_ONLY_OFF[] = "SET GLOBAL read_only=0;";
test.repl->connect();
const int FIRST_MOD_NODE = 2; // Modify nodes 2 & 3
const int NODE_COUNT = test.repl->N;
MYSQL** nodes = test.repl->nodes;
for (int i = FIRST_MOD_NODE; i < NODE_COUNT; i++)
{
if (mysql_query(nodes[i], STOP_SLAVE) != 0 ||
mysql_query(nodes[i], RESET_SLAVE) != 0 ||
mysql_query(nodes[i], READ_ONLY_OFF) != 0)
{
test.assert(false, "Could not stop slave connections and/or disable read_only for node %d.", i);
return test.global_result;
}
}
// Add more events to node3.
string gtid_node2, gtid_node3;
test.tprintf("Sending more inserts to server 4.");
generate_traffic_and_check(test, nodes[3], 10);
// Save gtids
if (find_field(nodes[2], GTID_QUERY, GTID_FIELD, result_tmp) == 0)
{
gtid_node2 = result_tmp;
}
if (find_field(nodes[3], GTID_QUERY, GTID_FIELD, result_tmp) == 0)
{
gtid_node3 = result_tmp;
}
print_gtids(test);
bool gtids_ok = (gtid_begin == gtid_node2 && gtid_node2 < gtid_node3);
test.assert(gtids_ok, "Gtid:s have not advanced correctly.");
if (!gtids_ok)
{
return test.global_result;
}
test.tprintf("Restarting MaxScale. Server 4 should not rejoin the cluster.");
test.tprintf(LINE);
if (test.start_maxscale(0))
{
test.assert(false, "Could not start MaxScale.");
return test.global_result;
}
sleep(5);
get_output(test);
StringSet node2_states = test.get_server_status("server3");
StringSet node3_states = test.get_server_status("server4");
bool states_n2_ok = (node2_states.find("Slave") != node2_states.end());
bool states_n3_ok = (node3_states.find("Slave") == node3_states.end());
test.assert(states_n2_ok, "Node 2 has not rejoined when it should have.");
test.assert(states_n3_ok, "Node 3 rejoined when it shouldn't have.");
if (!states_n2_ok || !states_n3_ok)
{
return test.global_result;
}
// Finally, fix replication by telling the current master to replicate from server4
test.tprintf("Setting server 1 to replicate from server 4. Auto-rejoin should redirect servers 2 and 3.");
const char CHANGE_CMD_FMT[] = "CHANGE MASTER TO MASTER_HOST = '%s', MASTER_PORT = %d, "
"MASTER_USE_GTID = current_pos, MASTER_USER='repl', MASTER_PASSWORD = 'repl';";
char cmd[256];
snprintf(cmd, sizeof(cmd), CHANGE_CMD_FMT, test.repl->IP[3], test.repl->port[3]);
mysql_query(nodes[0], cmd);
mysql_query(nodes[0], "START SLAVE;");
sleep(5);
get_output(test);
int master_id = get_master_server_id(test);
test.assert(master_id == 4, "Server 4 should be the cluster master.");
StringSet node0_states = test.get_server_status("server1");
bool states_n0_ok = (node0_states.find("Slave") != node0_states.end() &&
node0_states.find("Relay Master") == node0_states.end());
test.assert(states_n0_ok, "Server 1 is not a slave when it should be.");
if (states_n0_ok)
{
int ec;
test.maxscales->ssh_node_output(0,
"maxadmin call command mysqlmon switchover MySQL-Monitor server1 server4" , true, &ec);
sleep(1);
master_id = get_master_server_id(test);
test.assert(master_id == 1, "Server 1 should be the cluster master.");
get_output(test);
}
test.repl->fix_replication();
return test.global_result;
}

View File

@ -11,100 +11,22 @@
* Public License. * Public License.
*/ */
#include <vector> #include "fail_switch_rejoin_common.cpp"
#include "testconnections.h"
#include "mysqlmon_failover_common.cpp"
using std::string; using std::string;
typedef std::vector<string> StringVector;
const char GTID_QUERY[] = "SELECT @@gtid_current_pos;";
const char GTID_FIELD[] = "@@gtid_current_pos";
const int bufsize = 512;
/**
* Do inserts, check that results are as expected.
*
* @param test Test connections
* @paran insert_count
*/
void generate_traffic_and_check(TestConnections& test, int insert_count)
{
MYSQL *conn = test.maxscales->open_rwsplit_connection(0);
const char INSERT[] = "INSERT INTO test.t1 VALUES (%d);";
const char SELECT[] = "SELECT * FROM test.t1 ORDER BY id ASC;";
for (int i = 0; i < insert_count; i++)
{
test.try_query(conn, INSERT, inserts++);
timespec time;
time.tv_sec = 0;
time.tv_nsec = 100000000;
nanosleep(&time, NULL);
}
mysql_query(conn, SELECT);
MYSQL_RES *res = mysql_store_result(conn);
test.assert(res != NULL, "Query did not return a result set");
if (res)
{
MYSQL_ROW row;
// Check all values, they should go from 0 to 'inserts'
int expected_val = 0;
while ((row = mysql_fetch_row(res)))
{
int value_read = strtol(row[0], NULL, 0);
if (value_read != expected_val)
{
test.assert(false, "Query returned %d when %d was expected", value_read, expected_val);
break;
}
expected_val++;
}
int num_rows = expected_val;
test.assert(num_rows == inserts, "Query returned %d rows when %d rows were expected",
num_rows, inserts);
mysql_free_result(res);
}
mysql_close(conn);
}
void print_gtids(TestConnections& test)
{
MYSQL* maxconn = test.maxscales->open_rwsplit_connection(0);
if (maxconn)
{
char result_tmp[bufsize];
if (find_field(maxconn, GTID_QUERY, GTID_FIELD, result_tmp) == 0)
{
test.tprintf("MaxScale gtid: %s", result_tmp);
}
}
mysql_close(maxconn);
test.repl->connect();
for (int i = 0; i < test.repl->N; i++)
{
char result_tmp[bufsize];
if (find_field(test.repl->nodes[i], GTID_QUERY, GTID_FIELD, result_tmp) == 0)
{
test.tprintf("Node %d gtid: %s", i, result_tmp);
}
}
}
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
interactive = strcmp(argv[argc - 1], "interactive") == 0; interactive = strcmp(argv[argc - 1], "interactive") == 0;
TestConnections test(argc, argv); TestConnections test(argc, argv);
MYSQL* maxconn = test.maxscales->open_rwsplit_connection(0); MYSQL* maxconn = test.maxscales->open_rwsplit_connection(0);
// Set up test table // Set up test table
basic_test(test); basic_test(test);
// Delete binlogs to sync gtid:s // Delete binlogs to sync gtid:s
delete_slave_binlogs(test); delete_slave_binlogs(test);
char result_tmp[bufsize]; char result_tmp[bufsize];
// Advance gtid:s a bit to so gtid variables are updated. // Advance gtid:s a bit to so gtid variables are updated.
generate_traffic_and_check(test, 10); generate_traffic_and_check(test, maxconn, 10);
sleep(1); sleep(1);
test.tprintf(LINE); test.tprintf(LINE);
print_gtids(test); print_gtids(test);
@ -129,7 +51,7 @@ int main(int argc, char** argv)
if (failover_ok) if (failover_ok)
{ {
test.tprintf("Sending more inserts."); test.tprintf("Sending more inserts.");
generate_traffic_and_check(test, 5); generate_traffic_and_check(test, maxconn, 5);
sleep(1); sleep(1);
if (find_field(maxconn, GTID_QUERY, GTID_FIELD, result_tmp) == 0) if (find_field(maxconn, GTID_QUERY, GTID_FIELD, result_tmp) == 0)
{ {

View File

@ -1259,6 +1259,13 @@ int TestConnections::stop_maxscale(int m)
return res; return res;
} }
int TestConnections::start_maxscale(int m)
{
int res = maxscales->ssh_node(m, "service maxscale start", true);
check_maxscale_processes(m, 1);
fflush(stdout);
return res;
}
int TestConnections::check_maxscale_alive(int m) int TestConnections::check_maxscale_alive(int m)
{ {

View File

@ -490,7 +490,7 @@ public:
void check_current_operations(int m, int value); void check_current_operations(int m, int value);
void check_current_connections(int m, int value); void check_current_connections(int m, int value);
int stop_maxscale(int m); int stop_maxscale(int m);
int start_maxscale(int m);
void process_template(const char *src, const char *dest = "/etc/maxscale.cnf"); void process_template(const char *src, const char *dest = "/etc/maxscale.cnf");
private: private: