MXS-1514: Add failover test
The test is composed of a few parts. 1: Test that failover happens on master failure. 2: Test that a server with slave sql thread stopped is not promoted. 3: Test that a server with log_slave_updates=1 is promoted before others.
This commit is contained in:
@ -252,6 +252,9 @@ add_test_executable(encrypted_passwords.cpp encrypted_passwords replication LABE
|
||||
# MySQL Monitor Failover Test
|
||||
add_test_executable(failover_mysqlmon.cpp failover_mysqlmon failover_mysqlmon LABELS mysqlmon REPL_BACKEND)
|
||||
|
||||
# MySQL Monitor Real Failover Test
|
||||
add_test_executable(failover_mysqlmon_mrm.cpp failover_mysqlmon_mrm failover_mysqlmon_mrm LABELS mysqlmon REPL_BACKEND)
|
||||
|
||||
# Test monitor state change events when manually clearing server bits
|
||||
add_test_executable(false_monitor_state_change.cpp false_monitor_state_change replication LABELS mysqlmon REPL_BACKEND)
|
||||
|
||||
|
@ -0,0 +1,92 @@
|
||||
[maxscale]
|
||||
threads=###threads###
|
||||
|
||||
[MySQL Monitor]
|
||||
type=monitor
|
||||
module=mysqlmon
|
||||
servers= server1, server2, server3, server4
|
||||
user=maxskysql
|
||||
passwd= skysql
|
||||
monitor_interval=1000
|
||||
detect_standalone_master=true
|
||||
failcount=1
|
||||
allow_cluster_recovery=true
|
||||
failover=true
|
||||
replication_user=repl
|
||||
replication_password=repl
|
||||
backend_connect_timeout=1
|
||||
|
||||
[RW Split Router]
|
||||
type=service
|
||||
router= readwritesplit
|
||||
servers=server1, server2, server3, server4
|
||||
user=maxskysql
|
||||
passwd=skysql
|
||||
|
||||
[Read Connection Router Slave]
|
||||
type=service
|
||||
router=readconnroute
|
||||
router_options= slave
|
||||
servers=server1, server2, server3, server4
|
||||
user=maxskysql
|
||||
passwd=skysql
|
||||
|
||||
[Read Connection Router Master]
|
||||
type=service
|
||||
router=readconnroute
|
||||
router_options=master
|
||||
servers=server1, server2, server3, server4
|
||||
user=maxskysql
|
||||
passwd=skysql
|
||||
|
||||
[RW Split Listener]
|
||||
type=listener
|
||||
service=RW Split Router
|
||||
protocol=MySQLClient
|
||||
port=4006
|
||||
|
||||
[Read Connection Listener Slave]
|
||||
type=listener
|
||||
service=Read Connection Router Slave
|
||||
protocol=MySQLClient
|
||||
port=4009
|
||||
|
||||
[Read Connection Listener Master]
|
||||
type=listener
|
||||
service=Read Connection Router Master
|
||||
protocol=MySQLClient
|
||||
port=4008
|
||||
|
||||
[CLI]
|
||||
type=service
|
||||
router=cli
|
||||
|
||||
[CLI Listener]
|
||||
type=listener
|
||||
service=CLI
|
||||
protocol=maxscaled
|
||||
socket=default
|
||||
|
||||
[server1]
|
||||
type=server
|
||||
address=###node_server_IP_1###
|
||||
port=###node_server_port_1###
|
||||
protocol=MySQLBackend
|
||||
|
||||
[server2]
|
||||
type=server
|
||||
address=###node_server_IP_2###
|
||||
port=###node_server_port_2###
|
||||
protocol=MySQLBackend
|
||||
|
||||
[server3]
|
||||
type=server
|
||||
address=###node_server_IP_3###
|
||||
port=###node_server_port_3###
|
||||
protocol=MySQLBackend
|
||||
|
||||
[server4]
|
||||
type=server
|
||||
address=###node_server_IP_4###
|
||||
port=###node_server_port_4###
|
||||
protocol=MySQLBackend
|
216
maxscale-system-test/failover_mysqlmon_mrm.cpp
Normal file
216
maxscale-system-test/failover_mysqlmon_mrm.cpp
Normal file
@ -0,0 +1,216 @@
|
||||
/**
|
||||
* Test replication-manager
|
||||
*/
|
||||
|
||||
#include "testconnections.h"
|
||||
|
||||
void get_output(TestConnections& test)
|
||||
{
|
||||
test.tprintf("Maxadmin output:");
|
||||
char *output = test.ssh_maxscale_output(true, "maxadmin list servers");
|
||||
test.tprintf("%s", output);
|
||||
free(output);
|
||||
|
||||
test.tprintf("MaxScale output:");
|
||||
output = test.ssh_maxscale_output(true, "cat /var/log/maxscale/maxscale.log && "
|
||||
"sudo truncate -s 0 /var/log/maxscale/maxscale.log");
|
||||
test.tprintf("%s", output);
|
||||
free(output);
|
||||
}
|
||||
|
||||
static int inserts = 0;
|
||||
|
||||
void check(TestConnections& test)
|
||||
{
|
||||
MYSQL *conn = test.open_rwsplit_connection();
|
||||
const char *query1 = "INSERT INTO test.t1 VALUES (%d)";
|
||||
const char *query2 = "SELECT * FROM test.t1";
|
||||
|
||||
test.try_query(conn, "BEGIN");
|
||||
test.tprintf(query1, inserts);
|
||||
test.try_query(conn, query1, inserts++);
|
||||
mysql_query(conn, query2);
|
||||
|
||||
MYSQL_RES *res = mysql_store_result(conn);
|
||||
test.add_result(res == NULL, "Query should return a result set");
|
||||
|
||||
if (res)
|
||||
{
|
||||
std::string values;
|
||||
MYSQL_ROW row;
|
||||
int num_rows = mysql_num_rows(res);
|
||||
test.add_result(num_rows != inserts, "Query returned %d rows when %d rows were expected",
|
||||
num_rows, inserts);
|
||||
const char *separator = "";
|
||||
|
||||
while ((row = mysql_fetch_row(res)))
|
||||
{
|
||||
values += separator;
|
||||
values += row[0];
|
||||
separator = ", ";
|
||||
}
|
||||
test.tprintf("%s: %s", query2, values.c_str());
|
||||
}
|
||||
test.try_query(conn, "COMMIT");
|
||||
mysql_close(conn);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get master server id (master decided by MaxScale)
|
||||
*
|
||||
* @param test Tester object
|
||||
* @return Master server id
|
||||
*/
|
||||
int get_server_id(TestConnections& test)
|
||||
{
|
||||
MYSQL *conn = test.open_rwsplit_connection();
|
||||
int id = -1;
|
||||
char str[1024];
|
||||
|
||||
if (find_field(conn, "SELECT @@server_id, @@last_insert_id;", "@@server_id", str) == 0)
|
||||
{
|
||||
id = atoi(str);
|
||||
}
|
||||
|
||||
mysql_close(conn);
|
||||
return id;
|
||||
}
|
||||
|
||||
static bool interactive = false;
|
||||
|
||||
void get_input()
|
||||
{
|
||||
if (interactive)
|
||||
{
|
||||
printf("--- Press any key to confinue ---\n");
|
||||
getchar();
|
||||
}
|
||||
}
|
||||
|
||||
void fix_replication_create_table(TestConnections& test)
|
||||
{
|
||||
test.tprintf("Fix replication and recreate table.");
|
||||
test.close_maxscale_connections();
|
||||
test.repl->fix_replication();
|
||||
test.connect_maxscale();
|
||||
test.try_query(test.conn_rwsplit, "CREATE OR REPLACE TABLE test.t1(id INT)");
|
||||
test.repl->sync_slaves();
|
||||
inserts = 0;
|
||||
|
||||
check(test);
|
||||
get_output(test);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
const char* LINE = "------------------------------------------";
|
||||
const char* PRINT_ID = "Master server id is %d.";
|
||||
const char* WRONG_SLAVE = "Wrong slave was promoted or promotion failed.";
|
||||
|
||||
interactive = strcmp(argv[argc - 1], "interactive") == 0;
|
||||
int master_id = -1;
|
||||
TestConnections test(argc, argv);
|
||||
|
||||
// Wait a few seconds
|
||||
sleep(5);
|
||||
|
||||
test.tprintf("Creating table and inserting data.");
|
||||
get_input();
|
||||
test.connect_maxscale();
|
||||
test.try_query(test.conn_rwsplit, "CREATE OR REPLACE TABLE test.t1(id INT)");
|
||||
test.repl->sync_slaves();
|
||||
|
||||
check(test);
|
||||
get_output(test);
|
||||
|
||||
// Test 1
|
||||
test.tprintf("Test 1: Stopping master and waiting for failover. Check that another server is promoted.\n"
|
||||
"%s", LINE);
|
||||
get_input();
|
||||
int node0_id = test.repl->get_server_id(0); // Read master id now before shutdown.
|
||||
test.repl->stop_node(0);
|
||||
sleep(10);
|
||||
|
||||
check(test);
|
||||
get_output(test);
|
||||
|
||||
master_id = get_server_id(test);
|
||||
test.tprintf(PRINT_ID, master_id);
|
||||
test.add_result(master_id < 1 && master_id == node0_id, "Master did not change or no master detected.");
|
||||
fix_replication_create_table(test);
|
||||
test.repl->connect();
|
||||
|
||||
// Test 2
|
||||
test.tprintf("Test 2: Disable replication on server 2 and kill master, check that server 3 or 4 is "
|
||||
"promoted.\n%s", LINE);
|
||||
get_input();
|
||||
execute_query(test.repl->nodes[1], "STOP SLAVE; RESET SLAVE ALL;");
|
||||
sleep(2);
|
||||
test.repl->stop_node(0);
|
||||
sleep(10);
|
||||
|
||||
check(test);
|
||||
get_output(test);
|
||||
|
||||
master_id = get_server_id(test);
|
||||
test.tprintf(PRINT_ID, master_id);
|
||||
test.add_result(master_id < 1 ||
|
||||
(master_id != test.repl->get_server_id(2) && master_id != test.repl->get_server_id(3)),
|
||||
WRONG_SLAVE);
|
||||
fix_replication_create_table(test);
|
||||
test.repl->connect();
|
||||
|
||||
|
||||
// Test 3
|
||||
test.tprintf("Test3: Shutdown two slaves (servers 2 and 4). Disable log_bin on server 2, making it "
|
||||
"invalid for promotion. Enable log-slave-updates on servers 2 and 4. Check that server 4 is "
|
||||
"promoted on master failure.\n%s", LINE);
|
||||
get_input();
|
||||
|
||||
test.repl->stop_node(1);
|
||||
test.repl->stop_node(3);
|
||||
test.repl->stash_server_settings(1);
|
||||
test.repl->stash_server_settings(3);
|
||||
test.repl->disable_server_setting(1, "log-bin");
|
||||
const char* log_slave = "log_slave_updates=1";
|
||||
test.repl->add_server_setting(1, log_slave);
|
||||
test.repl->add_server_setting(3, log_slave);
|
||||
test.repl->start_node(1, "");
|
||||
test.repl->start_node(3, "");
|
||||
sleep(4);
|
||||
test.tprintf("Settings changed.");
|
||||
get_output(test);
|
||||
test.tprintf("Stopping master.");
|
||||
test.repl->stop_node(0);
|
||||
sleep(10);
|
||||
|
||||
check(test);
|
||||
get_output(test);
|
||||
|
||||
master_id = get_server_id(test);
|
||||
// Because servers have been restarted, redo connections.
|
||||
test.repl->connect();
|
||||
sleep(2);
|
||||
test.tprintf(PRINT_ID, master_id);
|
||||
test.add_result(master_id < 1 || master_id != test.repl->get_server_id(3), WRONG_SLAVE);
|
||||
// Restore server 2 and 4 settings. Because server 4 is now the master, shutting it down causes
|
||||
// another failover. Prevent this by stopping maxscale.
|
||||
test.tprintf("Restoring server settings.");
|
||||
test.stop_maxscale();
|
||||
test.repl->stop_node(1);
|
||||
test.repl->stop_node(3);
|
||||
sleep(4);
|
||||
test.repl->restore_server_settings(1);
|
||||
test.repl->restore_server_settings(3);
|
||||
test.repl->start_node(0, "");
|
||||
test.repl->start_node(1, "");
|
||||
test.repl->start_node(3, "");
|
||||
sleep(4);
|
||||
test.start_maxscale();
|
||||
sleep(2);
|
||||
get_output(test);
|
||||
get_input();
|
||||
|
||||
test.repl->fix_replication();
|
||||
return test.global_result;
|
||||
}
|
@ -1380,3 +1380,26 @@ void Mariadb_nodes::close_active_connections()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Mariadb_nodes::stash_server_settings(int node)
|
||||
{
|
||||
ssh_node(node, true, "sudo mkdir /etc/my.cnf.d.backup");
|
||||
ssh_node(node, true, "sudo cp -r /etc/my.cnf.d/* /etc/my.cnf.d.backup/");
|
||||
}
|
||||
|
||||
void Mariadb_nodes::restore_server_settings(int node)
|
||||
{
|
||||
ssh_node(node, true, "sudo mv -f /etc/my.cnf.d.backup/* /etc/my.cnf.d/");
|
||||
}
|
||||
|
||||
void Mariadb_nodes::disable_server_setting(int node, const char* setting)
|
||||
{
|
||||
ssh_node(node, true, "sudo sed -i 's/%s/#%s/' /etc/my.cnf.d/*", setting, setting);
|
||||
}
|
||||
|
||||
void Mariadb_nodes::add_server_setting(int node, const char* setting)
|
||||
{
|
||||
ssh_node(node, true, "sudo sed -i '$a [server]' /etc/my.cnf.d/server.cnf", setting);
|
||||
ssh_node(node, true, "sudo sed -i '$a %s' /etc/my.cnf.d/server.cnf", setting);
|
||||
}
|
||||
|
||||
|
@ -442,6 +442,38 @@ public:
|
||||
*/
|
||||
bool fix_replication();
|
||||
|
||||
/**
|
||||
* Copy current server settings to a backup directory. Any old backups are overwritten.
|
||||
*
|
||||
* @param node Node to modify
|
||||
*/
|
||||
void stash_server_settings(int node);
|
||||
|
||||
/**
|
||||
* Restore server settings from a backup directory. Current settings files are overwritten and
|
||||
* backup settings files are removed.
|
||||
*
|
||||
* @param node Node to modify
|
||||
*/
|
||||
void restore_server_settings(int node);
|
||||
|
||||
/**
|
||||
* Comment any line starting with the given setting name in server settings files.
|
||||
*
|
||||
* @param node Node to modify
|
||||
* @param setting Setting to remove
|
||||
*/
|
||||
void disable_server_setting(int node, const char* setting);
|
||||
|
||||
/**
|
||||
* Add the following lines to the /etc/mysql.cnf.d/server.cnf-file:
|
||||
* [server]
|
||||
* parameter
|
||||
*
|
||||
* @param node Node to modify
|
||||
* @param setting Line to add
|
||||
*/
|
||||
void add_server_setting(int node, const char* setting);
|
||||
private:
|
||||
|
||||
int check_node_ssh(int node);
|
||||
|
Reference in New Issue
Block a user