diff --git a/maxscale-system-test/CMakeLists.txt b/maxscale-system-test/CMakeLists.txt index afdd6b549..81cc529a5 100644 --- a/maxscale-system-test/CMakeLists.txt +++ b/maxscale-system-test/CMakeLists.txt @@ -252,6 +252,9 @@ add_test_executable(encrypted_passwords.cpp encrypted_passwords replication LABE # MySQL Monitor Failover Test add_test_executable(failover_mysqlmon.cpp failover_mysqlmon failover_mysqlmon LABELS mysqlmon REPL_BACKEND) +# MySQL Monitor Real Failover Test +add_test_executable(failover_mysqlmon_mrm.cpp failover_mysqlmon_mrm failover_mysqlmon_mrm LABELS mysqlmon REPL_BACKEND) + # Test monitor state change events when manually clearing server bits add_test_executable(false_monitor_state_change.cpp false_monitor_state_change replication LABELS mysqlmon REPL_BACKEND) diff --git a/maxscale-system-test/cnf/maxscale.cnf.template.failover_mysqlmon_mrm b/maxscale-system-test/cnf/maxscale.cnf.template.failover_mysqlmon_mrm new file mode 100644 index 000000000..1c38ff294 --- /dev/null +++ b/maxscale-system-test/cnf/maxscale.cnf.template.failover_mysqlmon_mrm @@ -0,0 +1,92 @@ +[maxscale] +threads=###threads### + +[MySQL Monitor] +type=monitor +module=mysqlmon +servers= server1, server2, server3, server4 +user=maxskysql +passwd= skysql +monitor_interval=1000 +detect_standalone_master=true +failcount=1 +allow_cluster_recovery=true +failover=true +replication_user=repl +replication_password=repl +backend_connect_timeout=1 + +[RW Split Router] +type=service +router= readwritesplit +servers=server1, server2, server3, server4 +user=maxskysql +passwd=skysql + +[Read Connection Router Slave] +type=service +router=readconnroute +router_options= slave +servers=server1, server2, server3, server4 +user=maxskysql +passwd=skysql + +[Read Connection Router Master] +type=service +router=readconnroute +router_options=master +servers=server1, server2, server3, server4 +user=maxskysql +passwd=skysql + +[RW Split Listener] +type=listener +service=RW Split Router +protocol=MySQLClient +port=4006 + +[Read Connection Listener Slave] +type=listener +service=Read Connection Router Slave +protocol=MySQLClient +port=4009 + +[Read Connection Listener Master] +type=listener +service=Read Connection Router Master +protocol=MySQLClient +port=4008 + +[CLI] +type=service +router=cli + +[CLI Listener] +type=listener +service=CLI +protocol=maxscaled +socket=default + +[server1] +type=server +address=###node_server_IP_1### +port=###node_server_port_1### +protocol=MySQLBackend + +[server2] +type=server +address=###node_server_IP_2### +port=###node_server_port_2### +protocol=MySQLBackend + +[server3] +type=server +address=###node_server_IP_3### +port=###node_server_port_3### +protocol=MySQLBackend + +[server4] +type=server +address=###node_server_IP_4### +port=###node_server_port_4### +protocol=MySQLBackend diff --git a/maxscale-system-test/failover_mysqlmon_mrm.cpp b/maxscale-system-test/failover_mysqlmon_mrm.cpp new file mode 100644 index 000000000..6e26211a4 --- /dev/null +++ b/maxscale-system-test/failover_mysqlmon_mrm.cpp @@ -0,0 +1,216 @@ +/** + * Test replication-manager + */ + +#include "testconnections.h" + +void get_output(TestConnections& test) +{ + test.tprintf("Maxadmin output:"); + char *output = test.ssh_maxscale_output(true, "maxadmin list servers"); + test.tprintf("%s", output); + free(output); + + test.tprintf("MaxScale output:"); + output = test.ssh_maxscale_output(true, "cat /var/log/maxscale/maxscale.log && " + "sudo truncate -s 0 /var/log/maxscale/maxscale.log"); + test.tprintf("%s", output); + free(output); +} + +static int inserts = 0; + +void check(TestConnections& test) +{ + MYSQL *conn = test.open_rwsplit_connection(); + const char *query1 = "INSERT INTO test.t1 VALUES (%d)"; + const char *query2 = "SELECT * FROM test.t1"; + + test.try_query(conn, "BEGIN"); + test.tprintf(query1, inserts); + test.try_query(conn, query1, inserts++); + mysql_query(conn, query2); + + MYSQL_RES *res = mysql_store_result(conn); + test.add_result(res == NULL, "Query should return a result set"); + + if (res) + { + std::string values; + MYSQL_ROW row; + int num_rows = mysql_num_rows(res); + test.add_result(num_rows != inserts, "Query returned %d rows when %d rows were expected", + num_rows, inserts); + const char *separator = ""; + + while ((row = mysql_fetch_row(res))) + { + values += separator; + values += row[0]; + separator = ", "; + } + test.tprintf("%s: %s", query2, values.c_str()); + } + test.try_query(conn, "COMMIT"); + mysql_close(conn); +} + +/** + * Get master server id (master decided by MaxScale) + * + * @param test Tester object + * @return Master server id + */ +int get_server_id(TestConnections& test) +{ + MYSQL *conn = test.open_rwsplit_connection(); + int id = -1; + char str[1024]; + + if (find_field(conn, "SELECT @@server_id, @@last_insert_id;", "@@server_id", str) == 0) + { + id = atoi(str); + } + + mysql_close(conn); + return id; +} + +static bool interactive = false; + +void get_input() +{ + if (interactive) + { + printf("--- Press any key to confinue ---\n"); + getchar(); + } +} + +void fix_replication_create_table(TestConnections& test) +{ + test.tprintf("Fix replication and recreate table."); + test.close_maxscale_connections(); + test.repl->fix_replication(); + test.connect_maxscale(); + test.try_query(test.conn_rwsplit, "CREATE OR REPLACE TABLE test.t1(id INT)"); + test.repl->sync_slaves(); + inserts = 0; + + check(test); + get_output(test); +} + +int main(int argc, char** argv) +{ + const char* LINE = "------------------------------------------"; + const char* PRINT_ID = "Master server id is %d."; + const char* WRONG_SLAVE = "Wrong slave was promoted or promotion failed."; + + interactive = strcmp(argv[argc - 1], "interactive") == 0; + int master_id = -1; + TestConnections test(argc, argv); + + // Wait a few seconds + sleep(5); + + test.tprintf("Creating table and inserting data."); + get_input(); + test.connect_maxscale(); + test.try_query(test.conn_rwsplit, "CREATE OR REPLACE TABLE test.t1(id INT)"); + test.repl->sync_slaves(); + + check(test); + get_output(test); + + // Test 1 + test.tprintf("Test 1: Stopping master and waiting for failover. Check that another server is promoted.\n" + "%s", LINE); + get_input(); + int node0_id = test.repl->get_server_id(0); // Read master id now before shutdown. + test.repl->stop_node(0); + sleep(10); + + check(test); + get_output(test); + + master_id = get_server_id(test); + test.tprintf(PRINT_ID, master_id); + test.add_result(master_id < 1 && master_id == node0_id, "Master did not change or no master detected."); + fix_replication_create_table(test); + test.repl->connect(); + + // Test 2 + test.tprintf("Test 2: Disable replication on server 2 and kill master, check that server 3 or 4 is " + "promoted.\n%s", LINE); + get_input(); + execute_query(test.repl->nodes[1], "STOP SLAVE; RESET SLAVE ALL;"); + sleep(2); + test.repl->stop_node(0); + sleep(10); + + check(test); + get_output(test); + + master_id = get_server_id(test); + test.tprintf(PRINT_ID, master_id); + test.add_result(master_id < 1 || + (master_id != test.repl->get_server_id(2) && master_id != test.repl->get_server_id(3)), + WRONG_SLAVE); + fix_replication_create_table(test); + test.repl->connect(); + + + // Test 3 + test.tprintf("Test3: Shutdown two slaves (servers 2 and 4). Disable log_bin on server 2, making it " + "invalid for promotion. Enable log-slave-updates on servers 2 and 4. Check that server 4 is " + "promoted on master failure.\n%s", LINE); + get_input(); + + test.repl->stop_node(1); + test.repl->stop_node(3); + test.repl->stash_server_settings(1); + test.repl->stash_server_settings(3); + test.repl->disable_server_setting(1, "log-bin"); + const char* log_slave = "log_slave_updates=1"; + test.repl->add_server_setting(1, log_slave); + test.repl->add_server_setting(3, log_slave); + test.repl->start_node(1, ""); + test.repl->start_node(3, ""); + sleep(4); + test.tprintf("Settings changed."); + get_output(test); + test.tprintf("Stopping master."); + test.repl->stop_node(0); + sleep(10); + + check(test); + get_output(test); + + master_id = get_server_id(test); + // Because servers have been restarted, redo connections. + test.repl->connect(); + sleep(2); + test.tprintf(PRINT_ID, master_id); + test.add_result(master_id < 1 || master_id != test.repl->get_server_id(3), WRONG_SLAVE); + // Restore server 2 and 4 settings. Because server 4 is now the master, shutting it down causes + // another failover. Prevent this by stopping maxscale. + test.tprintf("Restoring server settings."); + test.stop_maxscale(); + test.repl->stop_node(1); + test.repl->stop_node(3); + sleep(4); + test.repl->restore_server_settings(1); + test.repl->restore_server_settings(3); + test.repl->start_node(0, ""); + test.repl->start_node(1, ""); + test.repl->start_node(3, ""); + sleep(4); + test.start_maxscale(); + sleep(2); + get_output(test); + get_input(); + + test.repl->fix_replication(); + return test.global_result; +} diff --git a/maxscale-system-test/mariadb_nodes.cpp b/maxscale-system-test/mariadb_nodes.cpp index a8a530c4c..d55c5ad6d 100644 --- a/maxscale-system-test/mariadb_nodes.cpp +++ b/maxscale-system-test/mariadb_nodes.cpp @@ -1380,3 +1380,26 @@ void Mariadb_nodes::close_active_connections() } } } + +void Mariadb_nodes::stash_server_settings(int node) +{ + ssh_node(node, true, "sudo mkdir /etc/my.cnf.d.backup"); + ssh_node(node, true, "sudo cp -r /etc/my.cnf.d/* /etc/my.cnf.d.backup/"); +} + +void Mariadb_nodes::restore_server_settings(int node) +{ + ssh_node(node, true, "sudo mv -f /etc/my.cnf.d.backup/* /etc/my.cnf.d/"); +} + +void Mariadb_nodes::disable_server_setting(int node, const char* setting) +{ + ssh_node(node, true, "sudo sed -i 's/%s/#%s/' /etc/my.cnf.d/*", setting, setting); +} + +void Mariadb_nodes::add_server_setting(int node, const char* setting) +{ + ssh_node(node, true, "sudo sed -i '$a [server]' /etc/my.cnf.d/server.cnf", setting); + ssh_node(node, true, "sudo sed -i '$a %s' /etc/my.cnf.d/server.cnf", setting); +} + diff --git a/maxscale-system-test/mariadb_nodes.h b/maxscale-system-test/mariadb_nodes.h index c5d4d1ecf..4dbdd75f9 100644 --- a/maxscale-system-test/mariadb_nodes.h +++ b/maxscale-system-test/mariadb_nodes.h @@ -442,6 +442,38 @@ public: */ bool fix_replication(); + /** + * Copy current server settings to a backup directory. Any old backups are overwritten. + * + * @param node Node to modify + */ + void stash_server_settings(int node); + + /** + * Restore server settings from a backup directory. Current settings files are overwritten and + * backup settings files are removed. + * + * @param node Node to modify + */ + void restore_server_settings(int node); + + /** + * Comment any line starting with the given setting name in server settings files. + * + * @param node Node to modify + * @param setting Setting to remove + */ + void disable_server_setting(int node, const char* setting); + + /** + * Add the following lines to the /etc/mysql.cnf.d/server.cnf-file: + * [server] + * parameter + * + * @param node Node to modify + * @param setting Line to add + */ + void add_server_setting(int node, const char* setting); private: int check_node_ssh(int node);