MXS-1514: Add failover test

The test is composed of a few parts.
1: Test that failover happens on master failure.
2: Test that a server with slave sql thread stopped is not promoted.
3: Test that a server with log_slave_updates=1 is promoted before others.
This commit is contained in:
Esa Korhonen
2017-11-01 14:33:02 +02:00
parent b2b72474d9
commit ced93acc20
5 changed files with 366 additions and 0 deletions

View File

@ -252,6 +252,9 @@ add_test_executable(encrypted_passwords.cpp encrypted_passwords replication LABE
# MySQL Monitor Failover Test
add_test_executable(failover_mysqlmon.cpp failover_mysqlmon failover_mysqlmon LABELS mysqlmon REPL_BACKEND)
# MySQL Monitor Real Failover Test
add_test_executable(failover_mysqlmon_mrm.cpp failover_mysqlmon_mrm failover_mysqlmon_mrm LABELS mysqlmon REPL_BACKEND)
# Test monitor state change events when manually clearing server bits
add_test_executable(false_monitor_state_change.cpp false_monitor_state_change replication LABELS mysqlmon REPL_BACKEND)

View File

@ -0,0 +1,92 @@
[maxscale]
threads=###threads###
[MySQL Monitor]
type=monitor
module=mysqlmon
servers= server1, server2, server3, server4
user=maxskysql
passwd= skysql
monitor_interval=1000
detect_standalone_master=true
failcount=1
allow_cluster_recovery=true
failover=true
replication_user=repl
replication_password=repl
backend_connect_timeout=1
[RW Split Router]
type=service
router= readwritesplit
servers=server1, server2, server3, server4
user=maxskysql
passwd=skysql
[Read Connection Router Slave]
type=service
router=readconnroute
router_options= slave
servers=server1, server2, server3, server4
user=maxskysql
passwd=skysql
[Read Connection Router Master]
type=service
router=readconnroute
router_options=master
servers=server1, server2, server3, server4
user=maxskysql
passwd=skysql
[RW Split Listener]
type=listener
service=RW Split Router
protocol=MySQLClient
port=4006
[Read Connection Listener Slave]
type=listener
service=Read Connection Router Slave
protocol=MySQLClient
port=4009
[Read Connection Listener Master]
type=listener
service=Read Connection Router Master
protocol=MySQLClient
port=4008
[CLI]
type=service
router=cli
[CLI Listener]
type=listener
service=CLI
protocol=maxscaled
socket=default
[server1]
type=server
address=###node_server_IP_1###
port=###node_server_port_1###
protocol=MySQLBackend
[server2]
type=server
address=###node_server_IP_2###
port=###node_server_port_2###
protocol=MySQLBackend
[server3]
type=server
address=###node_server_IP_3###
port=###node_server_port_3###
protocol=MySQLBackend
[server4]
type=server
address=###node_server_IP_4###
port=###node_server_port_4###
protocol=MySQLBackend

View File

@ -0,0 +1,216 @@
/**
* Test replication-manager
*/
#include "testconnections.h"
void get_output(TestConnections& test)
{
test.tprintf("Maxadmin output:");
char *output = test.ssh_maxscale_output(true, "maxadmin list servers");
test.tprintf("%s", output);
free(output);
test.tprintf("MaxScale output:");
output = test.ssh_maxscale_output(true, "cat /var/log/maxscale/maxscale.log && "
"sudo truncate -s 0 /var/log/maxscale/maxscale.log");
test.tprintf("%s", output);
free(output);
}
static int inserts = 0;
void check(TestConnections& test)
{
MYSQL *conn = test.open_rwsplit_connection();
const char *query1 = "INSERT INTO test.t1 VALUES (%d)";
const char *query2 = "SELECT * FROM test.t1";
test.try_query(conn, "BEGIN");
test.tprintf(query1, inserts);
test.try_query(conn, query1, inserts++);
mysql_query(conn, query2);
MYSQL_RES *res = mysql_store_result(conn);
test.add_result(res == NULL, "Query should return a result set");
if (res)
{
std::string values;
MYSQL_ROW row;
int num_rows = mysql_num_rows(res);
test.add_result(num_rows != inserts, "Query returned %d rows when %d rows were expected",
num_rows, inserts);
const char *separator = "";
while ((row = mysql_fetch_row(res)))
{
values += separator;
values += row[0];
separator = ", ";
}
test.tprintf("%s: %s", query2, values.c_str());
}
test.try_query(conn, "COMMIT");
mysql_close(conn);
}
/**
* Get master server id (master decided by MaxScale)
*
* @param test Tester object
* @return Master server id
*/
int get_server_id(TestConnections& test)
{
MYSQL *conn = test.open_rwsplit_connection();
int id = -1;
char str[1024];
if (find_field(conn, "SELECT @@server_id, @@last_insert_id;", "@@server_id", str) == 0)
{
id = atoi(str);
}
mysql_close(conn);
return id;
}
static bool interactive = false;
void get_input()
{
if (interactive)
{
printf("--- Press any key to confinue ---\n");
getchar();
}
}
void fix_replication_create_table(TestConnections& test)
{
test.tprintf("Fix replication and recreate table.");
test.close_maxscale_connections();
test.repl->fix_replication();
test.connect_maxscale();
test.try_query(test.conn_rwsplit, "CREATE OR REPLACE TABLE test.t1(id INT)");
test.repl->sync_slaves();
inserts = 0;
check(test);
get_output(test);
}
int main(int argc, char** argv)
{
const char* LINE = "------------------------------------------";
const char* PRINT_ID = "Master server id is %d.";
const char* WRONG_SLAVE = "Wrong slave was promoted or promotion failed.";
interactive = strcmp(argv[argc - 1], "interactive") == 0;
int master_id = -1;
TestConnections test(argc, argv);
// Wait a few seconds
sleep(5);
test.tprintf("Creating table and inserting data.");
get_input();
test.connect_maxscale();
test.try_query(test.conn_rwsplit, "CREATE OR REPLACE TABLE test.t1(id INT)");
test.repl->sync_slaves();
check(test);
get_output(test);
// Test 1
test.tprintf("Test 1: Stopping master and waiting for failover. Check that another server is promoted.\n"
"%s", LINE);
get_input();
int node0_id = test.repl->get_server_id(0); // Read master id now before shutdown.
test.repl->stop_node(0);
sleep(10);
check(test);
get_output(test);
master_id = get_server_id(test);
test.tprintf(PRINT_ID, master_id);
test.add_result(master_id < 1 && master_id == node0_id, "Master did not change or no master detected.");
fix_replication_create_table(test);
test.repl->connect();
// Test 2
test.tprintf("Test 2: Disable replication on server 2 and kill master, check that server 3 or 4 is "
"promoted.\n%s", LINE);
get_input();
execute_query(test.repl->nodes[1], "STOP SLAVE; RESET SLAVE ALL;");
sleep(2);
test.repl->stop_node(0);
sleep(10);
check(test);
get_output(test);
master_id = get_server_id(test);
test.tprintf(PRINT_ID, master_id);
test.add_result(master_id < 1 ||
(master_id != test.repl->get_server_id(2) && master_id != test.repl->get_server_id(3)),
WRONG_SLAVE);
fix_replication_create_table(test);
test.repl->connect();
// Test 3
test.tprintf("Test3: Shutdown two slaves (servers 2 and 4). Disable log_bin on server 2, making it "
"invalid for promotion. Enable log-slave-updates on servers 2 and 4. Check that server 4 is "
"promoted on master failure.\n%s", LINE);
get_input();
test.repl->stop_node(1);
test.repl->stop_node(3);
test.repl->stash_server_settings(1);
test.repl->stash_server_settings(3);
test.repl->disable_server_setting(1, "log-bin");
const char* log_slave = "log_slave_updates=1";
test.repl->add_server_setting(1, log_slave);
test.repl->add_server_setting(3, log_slave);
test.repl->start_node(1, "");
test.repl->start_node(3, "");
sleep(4);
test.tprintf("Settings changed.");
get_output(test);
test.tprintf("Stopping master.");
test.repl->stop_node(0);
sleep(10);
check(test);
get_output(test);
master_id = get_server_id(test);
// Because servers have been restarted, redo connections.
test.repl->connect();
sleep(2);
test.tprintf(PRINT_ID, master_id);
test.add_result(master_id < 1 || master_id != test.repl->get_server_id(3), WRONG_SLAVE);
// Restore server 2 and 4 settings. Because server 4 is now the master, shutting it down causes
// another failover. Prevent this by stopping maxscale.
test.tprintf("Restoring server settings.");
test.stop_maxscale();
test.repl->stop_node(1);
test.repl->stop_node(3);
sleep(4);
test.repl->restore_server_settings(1);
test.repl->restore_server_settings(3);
test.repl->start_node(0, "");
test.repl->start_node(1, "");
test.repl->start_node(3, "");
sleep(4);
test.start_maxscale();
sleep(2);
get_output(test);
get_input();
test.repl->fix_replication();
return test.global_result;
}

View File

@ -1380,3 +1380,26 @@ void Mariadb_nodes::close_active_connections()
}
}
}
void Mariadb_nodes::stash_server_settings(int node)
{
ssh_node(node, true, "sudo mkdir /etc/my.cnf.d.backup");
ssh_node(node, true, "sudo cp -r /etc/my.cnf.d/* /etc/my.cnf.d.backup/");
}
void Mariadb_nodes::restore_server_settings(int node)
{
ssh_node(node, true, "sudo mv -f /etc/my.cnf.d.backup/* /etc/my.cnf.d/");
}
void Mariadb_nodes::disable_server_setting(int node, const char* setting)
{
ssh_node(node, true, "sudo sed -i 's/%s/#%s/' /etc/my.cnf.d/*", setting, setting);
}
void Mariadb_nodes::add_server_setting(int node, const char* setting)
{
ssh_node(node, true, "sudo sed -i '$a [server]' /etc/my.cnf.d/server.cnf", setting);
ssh_node(node, true, "sudo sed -i '$a %s' /etc/my.cnf.d/server.cnf", setting);
}

View File

@ -442,6 +442,38 @@ public:
*/
bool fix_replication();
/**
* Copy current server settings to a backup directory. Any old backups are overwritten.
*
* @param node Node to modify
*/
void stash_server_settings(int node);
/**
* Restore server settings from a backup directory. Current settings files are overwritten and
* backup settings files are removed.
*
* @param node Node to modify
*/
void restore_server_settings(int node);
/**
* Comment any line starting with the given setting name in server settings files.
*
* @param node Node to modify
* @param setting Setting to remove
*/
void disable_server_setting(int node, const char* setting);
/**
* Add the following lines to the /etc/mysql.cnf.d/server.cnf-file:
* [server]
* parameter
*
* @param node Node to modify
* @param setting Line to add
*/
void add_server_setting(int node, const char* setting);
private:
int check_node_ssh(int node);