Fix mysqlmon_failover_auto and mysqlmon_failover_manual

The tests now reset the replication state using queries and switchover instead of
calling fix_replication(). The results are checked so these tests now test
switchover as well.

Also, reduce printing when verbose is on for any test using the get_output()-function
in fail_switch_rejoin_common.cpp.
This commit is contained in:
Esa Korhonen 2017-12-20 11:19:17 +02:00
parent 8ef681d8cd
commit 6b2133d6a6
6 changed files with 219 additions and 71 deletions

View File

@ -14,7 +14,9 @@ allow_cluster_recovery=true
auto_failover=true
replication_user=repl
replication_password=repl
backend_connect_timeout=1
backend_connect_timeout=15
backend_read_timeout=15
backend_write_timeout=15
[RW Split Router]
type=service

View File

@ -13,7 +13,9 @@ failcount=1
allow_cluster_recovery=true
replication_user=repl
replication_password=repl
backend_connect_timeout=1
backend_connect_timeout=15
backend_read_timeout=15
backend_write_timeout=15
[RW Split Router]
type=service

View File

@ -18,11 +18,17 @@ void get_output(TestConnections& test)
test.tprintf("%s", output);
free(output);
test.tprintf("MaxScale output:");
if (test.verbose)
{
test.tprintf("MaxScale output:");
}
output = test.maxscales->ssh_node_output(0, "cat /var/log/maxscale/maxscale.log && "
"sudo truncate -s 0 /var/log/maxscale/maxscale.log",
true, &ec);
test.tprintf("%s", output);
if (test.verbose)
{
test.tprintf("%s", output);
}
free(output);
}

View File

@ -1,11 +1,69 @@
#include "fail_switch_rejoin_common.cpp"
#include <sstream>
#include <iostream>
using std::stringstream;
using std::cout;
using std::endl;
void replicate_from(TestConnections& test, int server_ind, int target_ind)
{
stringstream change_master;
change_master << "CHANGE MASTER TO MASTER_HOST = '" << test.repl->IP[target_ind]
<< "', MASTER_PORT = " << test.repl->port[target_ind] << ", MASTER_USE_GTID = current_pos, "
"MASTER_USER='repl', MASTER_PASSWORD='repl';";
cout << "Server " << server_ind + 1 << " starting to replicate from server " << target_ind + 1 << endl;
if (test.verbose)
{
cout << "Query is '" << change_master.str() << "'" << endl;
}
execute_query(test.repl->nodes[server_ind], "STOP SLAVE;");
execute_query(test.repl->nodes[server_ind], change_master.str().c_str());
execute_query(test.repl->nodes[server_ind], "START SLAVE;");
}
void reset_replication(TestConnections& test)
{
int master_id = get_master_server_id(test);
cout << "Reseting..." << endl;
test.repl->start_node(0, (char*)"");
sleep(5);
test.repl->connect();
get_output(test);
// First set the old master to replicate from current master.
if (test.global_result == 0)
{
int ind = master_id - 1;
replicate_from(test, 0, ind);
sleep(3);
get_output(test);
int ec;
stringstream switchover;
switchover << "maxadmin call command mysqlmon switchover MySQL-Monitor server1 server" << master_id;
test.maxscales->ssh_node_output(0, switchover.str().c_str() , true, &ec);
sleep(3);
master_id = get_master_server_id(test);
cout << "Master server id is now back to " << master_id << endl;
test.assert(master_id == 1, "Switchover back to server1 failed");
}
get_output(test);
StringSet node_states;
for (int i = 2; i < 4; i++)
{
stringstream servername;
servername << "server" << i;
node_states = test.get_server_status(servername.str().c_str());
bool states_ok = (node_states.find("Slave") != node_states.end());
test.assert(states_ok, "Server %d is not replicating.", i);
}
}
int prepare_test_1(TestConnections& test)
{
delete_slave_binlogs(test);
test.tprintf("Test 1: Stopping master and waiting for failover. Check that another server is promoted.\n"
"%s", LINE);
get_input();
cout << LINE << endl;
cout << "Part 1: Stopping master and waiting for failover. Check that another server is promoted." <<
endl;
cout << LINE << endl;
int node0_id = test.repl->get_server_id(0); // Read master id now before shutdown.
test.repl->stop_node(0);
return node0_id;
@ -13,89 +71,127 @@ int prepare_test_1(TestConnections& test)
void check_test_1(TestConnections& test, int node0_id)
{
check(test);
get_output(test);
int master_id = get_master_server_id(test);
test.tprintf(PRINT_ID, master_id);
test.add_result(master_id < 1 && master_id == node0_id, "Master did not change or no master detected.");
fix_replication_create_table(test);
test.repl->connect();
cout << "Master server id is " << master_id << endl;
test.assert(master_id > 0 && master_id != node0_id, "Master did not change or no master detected.");
if (test.global_result == 0)
{
check(test);
}
// Reset state
reset_replication(test);
}
void prepare_test_2(TestConnections& test)
{
delete_slave_binlogs(test);
test.tprintf("Test 2: Disable replication on server 2 and kill master, check that server 3 or 4 is "
"promoted.\n%s", LINE);
get_input();
execute_query(test.repl->nodes[1], "STOP SLAVE; RESET SLAVE ALL;");
sleep(2);
test.repl->stop_node(0);
cout << LINE << endl;
cout << "Part 2: Disable replication on server 2 and kill master, check that server 3 or 4 is promoted."
<< endl;
cout << LINE << endl;
test.repl->connect();
check(test);
sleep(1);
print_gtids(test);
test.try_query(test.repl->nodes[1], "STOP SLAVE;");
test.try_query(test.repl->nodes[1], "RESET SLAVE ALL;");
sleep(1);
get_output(test);
if (test.global_result == 0)
{
cout << "Stopping master." << endl;
test.repl->stop_node(0);
}
}
void check_test_2(TestConnections& test)
{
check(test);
get_output(test);
int master_id = get_master_server_id(test);
test.tprintf(PRINT_ID, master_id);
test.add_result(master_id < 1 ||
(master_id != test.repl->get_server_id(2) && master_id != test.repl->get_server_id(3)),
WRONG_SLAVE);
fix_replication_create_table(test);
test.repl->connect();
cout << "Master server id is " << master_id << endl;
bool success = (master_id > 0 &&
(master_id == test.repl->get_server_id(2) || master_id == test.repl->get_server_id(3)));
test.assert(success, WRONG_SLAVE);
if (test.global_result == 0)
{
check(test);
}
// Reset state
replicate_from(test, 1, master_id - 1);
sleep(3);
get_output(test);
StringSet node_states = test.get_server_status("server2");
test.assert(node_states.find("Slave") != node_states.end(), "Server 2 is not replicating.");
if (test.global_result == 0)
{
reset_replication(test);
}
}
void prepare_test_3(TestConnections& test)
{
delete_slave_binlogs(test);
test.tprintf("Test3: Shutdown two slaves (servers 2 and 4). Disable log_bin on server 2, making it "
"invalid for promotion. Enable log-slave-updates on servers 2 and 4. Check that server 4 is "
"promoted on master failure.\n%s", LINE);
get_input();
cout << LINE << endl;
cout << "Part 3: Disable log_bin on server 2, making it invalid for promotion. Enable log-slave-updates "
" on servers 2 and 4. Check that server 4 is promoted on master failure." << endl << LINE << endl;
get_output(test);
test.maxscales->stop_maxscale(0);
test.repl->stop_node(1);
test.repl->stop_node(3);
test.repl->stash_server_settings(1);
test.repl->stash_server_settings(3);
test.repl->disable_server_setting(1, "log-bin");
const char* log_slave = "log_slave_updates=1";
test.repl->add_server_setting(1, log_slave);
test.repl->add_server_setting(3, log_slave);
test.repl->start_node(1, (char *) "");
test.repl->start_node(3, (char *) "");
sleep(4);
test.maxscales->start_maxscale(0);
sleep(2);
test.repl->connect();
test.tprintf("Settings changed.");
get_output(test);
test.tprintf("Stopping master.");
test.repl->stop_node(0);
print_gtids(test);
check(test);
if (test.global_result == 0)
{
cout << "Stopping master." << endl;
test.repl->stop_node(0);
}
}
void check_test_3(TestConnections& test)
{
check(test);
get_output(test);
int master_id = get_master_server_id(test);
// Because servers have been restarted, redo connections.
test.repl->connect();
sleep(2);
test.tprintf(PRINT_ID, master_id);
test.add_result(master_id < 1 || master_id != test.repl->get_server_id(3), WRONG_SLAVE);
// Restore server 2 and 4 settings. Because server 4 is now the master, shutting it down causes
cout << "Master server id is " << master_id << endl;
test.assert(master_id > 0 && master_id == test.repl->get_server_id(3), WRONG_SLAVE);
print_gtids(test);
reset_replication(test);
get_output(test);
// Restore server 2 and 4 settings. Because server 1 is now the master, shutting it down causes
// another failover. Prevent this by stopping maxscale.
test.tprintf("Restoring server settings.");
test.maxscales->stop_maxscale(0);
test.repl->stop_node(1);
test.repl->stop_node(3);
sleep(4);
test.repl->restore_server_settings(1);
test.repl->restore_server_settings(3);
test.repl->start_node(0, (char *) "");
test.repl->start_node(1, (char *) "");
test.repl->start_node(3, (char *) "");
sleep(4);
test.maxscales->start_maxscale(0);
sleep(2);
get_output(test);
test.maxscales->start_maxscale(0);
}

View File

@ -1,5 +1,14 @@
/**
* Test auto_failover
/*
* Copyright (c) 2016 MariaDB Corporation Ab
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file and at www.mariadb.com/bsl11.
*
* Change Date: 2020-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2 or later of the General
* Public License.
*/
#include "testconnections.h"
@ -7,29 +16,39 @@
int main(int argc, char** argv)
{
interactive = strcmp(argv[argc - 1], "interactive") == 0;
Mariadb_nodes::require_gtid(true);
TestConnections test(argc, argv);
test.repl->connect();
delete_slave_binlogs(test);
// Wait a few seconds
sleep(5);
sleep(2);
basic_test(test);
print_gtids(test);
// Test 1
// Part 1
int node0_id = prepare_test_1(test);
sleep(10);
check_test_1(test, node0_id);
// Test 2
if (test.global_result != 0)
{
return test.global_result;
}
// Part 2
prepare_test_2(test);
sleep(10);
check_test_2(test);
// Test 3
if (test.global_result != 0)
{
return test.global_result;
}
// Part 3
prepare_test_3(test);
sleep(10);
check_test_3(test);
test.repl->fix_replication();
return test.global_result;
}

View File

@ -1,5 +1,14 @@
/**
* Test auto_failover
/*
* Copyright (c) 2016 MariaDB Corporation Ab
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file and at www.mariadb.com/bsl11.
*
* Change Date: 2020-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2 or later of the General
* Public License.
*/
#include "testconnections.h"
@ -8,39 +17,53 @@
int main(int argc, char** argv)
{
const char FAILOVER_CMD[] = "maxadmin call command mysqlmon failover MySQL-Monitor";
interactive = strcmp(argv[argc - 1], "interactive") == 0;
//interactive = strcmp(argv[argc - 1], "interactive") == 0;
Mariadb_nodes::require_gtid(true);
TestConnections test(argc, argv);
int ec;
// Wait a few seconds
sleep(5);
basic_test(test);
test.repl->connect();
delete_slave_binlogs(test);
// Test 1
int node0_id = prepare_test_1(test);
sleep(2);
basic_test(test);
print_gtids(test);
int node0_id = -1;
int ec = -1;
// Part 1
node0_id = prepare_test_1(test);
sleep(3);
test.maxscales->ssh_node_output(0, FAILOVER_CMD , true, &ec);
sleep(10);
sleep(5);
check_test_1(test, node0_id);
if (test.global_result != 0)
{
return test.global_result;
}
// Test 2
// Part 2
prepare_test_2(test);
sleep(3);
test.maxscales->ssh_node_output(0, FAILOVER_CMD, true, &ec);
sleep(10);
sleep(5);
check_test_2(test);
if (test.global_result != 0)
{
return test.global_result;
}
// Test 3
// Part 3
prepare_test_3(test);
sleep(3);
test.maxscales->ssh_node_output(0, FAILOVER_CMD, true, &ec);
sleep(10);
sleep(5);
check_test_3(test);
test.repl->fix_replication();
return test.global_result;
}