MXS-1596 Stress test for failover
- Start 4 threads where each thread sits in a loop and performs 20% updates and 80% selects. Each thread has a table of its own. - The main thread executes the following in a loop. - Take down the current master and wait a while (failover assumed to happen). - Put up the old master node and wait a while. Keep on doing that for 1.5 minutes. At the end check that: - There is one 'Master'. - The other nodes are either - 'Slave' or - 'Running' in which case it is checked it is because the node could not be rejoined.
This commit is contained in:
parent
1e5125015e
commit
ad634fe31e
1
maxscale-system-test/.gitignore
vendored
1
maxscale-system-test/.gitignore
vendored
@ -122,6 +122,7 @@ mysqlmon_failover_manual2_2
|
||||
mysqlmon_failover_rejoin_old_slave
|
||||
mysqlmon_failover_rolling_master
|
||||
mysqlmon_failover_rolling_restart_slaves
|
||||
mysqlmon_failover_stress
|
||||
mysqlmon_switchover_bad_master
|
||||
mysqlmon_switchover
|
||||
mxs1045
|
||||
|
@ -290,6 +290,8 @@ add_test_executable(mysqlmon_failover_rejoin_old_slave.cpp mysqlmon_failover_rej
|
||||
# MySQL Monitor rolling restart slaves
|
||||
add_test_executable(mysqlmon_failover_rolling_restart_slaves.cpp mysqlmon_failover_rolling_restart_slaves mysqlmon_failover_rolling_restart_slaves LABELS mysqlmon REPL_BACKEND)
|
||||
|
||||
add_test_executable(mysqlmon_failover_stress.cpp mysqlmon_failover_stress mysqlmon_failover_stress LABELS mysqlmon REPL_BACKEND)
|
||||
|
||||
# Test monitor state change events when manually clearing server bits
|
||||
add_test_executable(false_monitor_state_change.cpp false_monitor_state_change replication LABELS mysqlmon REPL_BACKEND)
|
||||
|
||||
|
@ -0,0 +1,94 @@
|
||||
[maxscale]
|
||||
threads=###threads###
|
||||
|
||||
[MySQL-Monitor]
|
||||
type=monitor
|
||||
module=mysqlmon
|
||||
servers= server1, server2, server3, server4
|
||||
user=maxskysql
|
||||
passwd= skysql
|
||||
monitor_interval=1000
|
||||
allow_cluster_recovery=true
|
||||
detect_standalone_master=true
|
||||
auto_failover=true
|
||||
auto_rejoin=true
|
||||
replication_user=repl
|
||||
replication_password=repl
|
||||
backend_connect_timeout=5
|
||||
backend_read_timeout=5
|
||||
backend_write_timeout=5
|
||||
|
||||
[RW-Split-Router]
|
||||
type=service
|
||||
router= readwritesplit
|
||||
servers=server1, server2, server3, server4
|
||||
user=maxskysql
|
||||
passwd=skysql
|
||||
|
||||
[Read-Connection-Router-Slave]
|
||||
type=service
|
||||
router=readconnroute
|
||||
router_options= slave
|
||||
servers=server1, server2, server3, server4
|
||||
user=maxskysql
|
||||
passwd=skysql
|
||||
|
||||
[Read-Connection-Router-Master]
|
||||
type=service
|
||||
router=readconnroute
|
||||
router_options=master
|
||||
servers=server1, server2, server3, server4
|
||||
user=maxskysql
|
||||
passwd=skysql
|
||||
|
||||
[RW-Split-Listener]
|
||||
type=listener
|
||||
service=RW-Split-Router
|
||||
protocol=MySQLClient
|
||||
port=4006
|
||||
|
||||
[Read-Connection-Listener-Slave]
|
||||
type=listener
|
||||
service=Read-Connection-Router-Slave
|
||||
protocol=MySQLClient
|
||||
port=4009
|
||||
|
||||
[Read-Connection-Listener-Master]
|
||||
type=listener
|
||||
service=Read-Connection-Router-Master
|
||||
protocol=MySQLClient
|
||||
port=4008
|
||||
|
||||
[CLI]
|
||||
type=service
|
||||
router=cli
|
||||
|
||||
[CLI Listener]
|
||||
type=listener
|
||||
service=CLI
|
||||
protocol=maxscaled
|
||||
socket=default
|
||||
|
||||
[server1]
|
||||
type=server
|
||||
address=###node_server_IP_1###
|
||||
port=###node_server_port_1###
|
||||
protocol=MySQLBackend
|
||||
|
||||
[server2]
|
||||
type=server
|
||||
address=###node_server_IP_2###
|
||||
port=###node_server_port_2###
|
||||
protocol=MySQLBackend
|
||||
|
||||
[server3]
|
||||
type=server
|
||||
address=###node_server_IP_3###
|
||||
port=###node_server_port_3###
|
||||
protocol=MySQLBackend
|
||||
|
||||
[server4]
|
||||
type=server
|
||||
address=###node_server_IP_4###
|
||||
port=###node_server_port_4###
|
||||
protocol=MySQLBackend
|
576
maxscale-system-test/mysqlmon_failover_stress.cpp
Executable file
576
maxscale-system-test/mysqlmon_failover_stress.cpp
Executable file
@ -0,0 +1,576 @@
|
||||
/*
|
||||
* Copyright (c) 2016 MariaDB Corporation Ab
|
||||
*
|
||||
* Use of this software is governed by the Business Source License included
|
||||
* in the LICENSE.TXT file and at www.mariadb.com/bsl11.
|
||||
*
|
||||
* Change Date: 2020-01-01
|
||||
*
|
||||
* On the date above, in accordance with the Business Source License, use
|
||||
* of this software will be governed by version 2 or later of the General
|
||||
* Public License.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include "testconnections.h"
|
||||
#include "fail_switch_rejoin_common.cpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
// How often the monitor checks the server state.
|
||||
// NOTE: Ensure this is identical with the value in the configuration file.
|
||||
const time_t MONITOR_INTERVAL = 1;
|
||||
|
||||
// After how many seconds should the failover/rejoin operation surely have
|
||||
// been performed. Not very critical.
|
||||
const time_t FAILOVER_DURATION = 5;
|
||||
|
||||
// How long should we keep in running.
|
||||
const time_t TEST_DURATION = 90;
|
||||
|
||||
#define CMESSAGE(msg) \
|
||||
do {\
|
||||
stringstream ss;\
|
||||
ss << "client(" << m_id << ") : " << msg << "\n";\
|
||||
cout << ss.str() << flush;\
|
||||
} while (false)
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
|
||||
#define ss_dassert(x) do { if (!(x)) { fprintf(stderr, "Assertion failed: %s", #x); abort(); } } while(false)
|
||||
#define ss_debug(x) x
|
||||
|
||||
#else
|
||||
|
||||
#define ss_dassert(s)
|
||||
#define ss_debug(x)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
class Client
|
||||
{
|
||||
public:
|
||||
enum
|
||||
{
|
||||
DEFAULT_N_CLIENTS = 4,
|
||||
DEFAULT_N_ROWS = 100
|
||||
};
|
||||
|
||||
static void init(TestConnections& test, size_t nClients, size_t nRows)
|
||||
{
|
||||
s_nClients = nClients;
|
||||
s_nRows = nRows;
|
||||
|
||||
if (create_tables(test))
|
||||
{
|
||||
if (insert_data(test))
|
||||
{
|
||||
cout << "\nSyncing slaves." << endl;
|
||||
test.repl->sync_slaves();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void start(bool verbose,
|
||||
const char* zHost, int port, const char* zUser, const char* zPassword)
|
||||
{
|
||||
for (size_t i = 0; i < s_nClients; ++i)
|
||||
{
|
||||
s_threads.push_back(std::thread(&Client::thread_main,
|
||||
i, verbose, zHost, port, zUser, zPassword));
|
||||
}
|
||||
}
|
||||
|
||||
static void stop()
|
||||
{
|
||||
s_shutdown = true;
|
||||
|
||||
for (size_t i = 0; i < s_nClients; ++i)
|
||||
{
|
||||
s_threads[i].join();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Client(int id, bool verbose)
|
||||
: m_id(id)
|
||||
, m_verbose(verbose)
|
||||
, m_value(1)
|
||||
{
|
||||
ss_debug(int rv);
|
||||
|
||||
unsigned int seed = (time(NULL) << m_id);
|
||||
ss_debug(rv =) initstate_r(seed, m_initstate, sizeof(m_initstate), &m_random_data);
|
||||
ss_dassert(rv == 0);
|
||||
|
||||
ss_debug(rv=) srandom_r(seed, &m_random_data);
|
||||
ss_dassert(rv == 0);
|
||||
}
|
||||
|
||||
enum action_t
|
||||
{
|
||||
ACTION_SELECT,
|
||||
ACTION_UPDATE
|
||||
};
|
||||
|
||||
action_t action() const
|
||||
{
|
||||
double d = random_decimal_fraction();
|
||||
|
||||
// 20% updates
|
||||
// 80% selects
|
||||
if (d <= 0.2)
|
||||
{
|
||||
return ACTION_UPDATE;
|
||||
}
|
||||
else
|
||||
{
|
||||
return ACTION_SELECT;
|
||||
}
|
||||
}
|
||||
|
||||
bool run(MYSQL* pConn)
|
||||
{
|
||||
bool rv = false;
|
||||
|
||||
switch (action())
|
||||
{
|
||||
case ACTION_SELECT:
|
||||
rv = run_select(pConn);
|
||||
break;
|
||||
|
||||
case ACTION_UPDATE:
|
||||
rv = run_update(pConn);
|
||||
break;
|
||||
|
||||
default:
|
||||
ss_dassert(!true);
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
bool run_select(MYSQL* pConn)
|
||||
{
|
||||
bool rv = true;
|
||||
|
||||
string stmt("SELECT * FROM test.t");
|
||||
stmt += std::to_string(m_id);
|
||||
stmt += " WHERE id=";
|
||||
stmt += std::to_string(get_random_id());
|
||||
|
||||
if (mysql_query(pConn, stmt.c_str()) == 0)
|
||||
{
|
||||
flush_response(pConn);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_verbose)
|
||||
{
|
||||
CMESSAGE("\"" << stmt << "\" failed: " << mysql_error(pConn));
|
||||
}
|
||||
rv = false;
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
bool run_update(MYSQL* pConn)
|
||||
{
|
||||
bool rv = true;
|
||||
|
||||
string stmt("UPDATE test.t");
|
||||
stmt += std::to_string(m_id);
|
||||
stmt += " SET id=";
|
||||
stmt += std::to_string(m_value);
|
||||
stmt += " WHERE id=";
|
||||
stmt += std::to_string(get_random_id());
|
||||
m_value = (m_value + 1) % s_nRows;
|
||||
|
||||
if (mysql_query(pConn, stmt.c_str()) == 0)
|
||||
{
|
||||
flush_response(pConn);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_verbose)
|
||||
{
|
||||
CMESSAGE("\"" << stmt << "\" failed: " << mysql_error(pConn));
|
||||
}
|
||||
rv = false;
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static void flush_response(MYSQL* pConn)
|
||||
{
|
||||
do
|
||||
{
|
||||
MYSQL_RES* pRes = mysql_store_result(pConn);
|
||||
mysql_free_result(pRes);
|
||||
}
|
||||
while (mysql_next_result(pConn) == 0);
|
||||
}
|
||||
|
||||
int get_random_id() const
|
||||
{
|
||||
int id = s_nRows * random_decimal_fraction();
|
||||
|
||||
ss_dassert(id >= 0);
|
||||
ss_dassert(id <= s_nRows);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
double random_decimal_fraction() const
|
||||
{
|
||||
int32_t r;
|
||||
ss_debug(int rv=) random_r(&m_random_data, &r);
|
||||
ss_dassert(rv == 0);
|
||||
|
||||
return double(r) / RAND_MAX;
|
||||
}
|
||||
|
||||
void run(const char* zHost, int port, const char* zUser, const char* zPassword)
|
||||
{
|
||||
do
|
||||
{
|
||||
MYSQL* pMysql = mysql_init(NULL);
|
||||
|
||||
if (pMysql)
|
||||
{
|
||||
unsigned int timeout = 5;
|
||||
mysql_options(pMysql, MYSQL_OPT_CONNECT_TIMEOUT, &timeout);
|
||||
mysql_options(pMysql, MYSQL_OPT_READ_TIMEOUT, &timeout);
|
||||
mysql_options(pMysql, MYSQL_OPT_WRITE_TIMEOUT, &timeout);
|
||||
|
||||
if (m_verbose)
|
||||
{
|
||||
CMESSAGE("Connecting");
|
||||
}
|
||||
|
||||
if (mysql_real_connect(pMysql, zHost, zUser, zPassword, "test", port, NULL, 0))
|
||||
{
|
||||
if (m_verbose)
|
||||
{
|
||||
CMESSAGE("Connected.");
|
||||
}
|
||||
|
||||
while (!s_shutdown && run(pMysql))
|
||||
{
|
||||
;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_verbose)
|
||||
{
|
||||
CMESSAGE("mysql_real_connect() failed: " << mysql_error(pMysql));
|
||||
}
|
||||
}
|
||||
|
||||
if (m_verbose)
|
||||
{
|
||||
CMESSAGE("Closing");
|
||||
}
|
||||
mysql_close(pMysql);
|
||||
}
|
||||
else
|
||||
{
|
||||
CMESSAGE("mysql_init() failed.");
|
||||
}
|
||||
|
||||
// To prevent some backend from becoming overwhelmed.
|
||||
sleep(1);
|
||||
}
|
||||
while (!s_shutdown);
|
||||
}
|
||||
|
||||
static void thread_main(int i, bool verbose,
|
||||
const char* zHost, int port, const char* zUser, const char* zPassword)
|
||||
{
|
||||
if (mysql_thread_init() == 0)
|
||||
{
|
||||
Client client(i, verbose);
|
||||
|
||||
client.run(zHost, port, zUser, zPassword);
|
||||
|
||||
mysql_thread_end();
|
||||
}
|
||||
else
|
||||
{
|
||||
int m_id = i;
|
||||
CMESSAGE("mysql_thread_init() failed.");
|
||||
}
|
||||
}
|
||||
|
||||
static bool create_tables(TestConnections& test)
|
||||
{
|
||||
cout << "\nCreating tables." << endl;
|
||||
|
||||
MYSQL* pConn = test.maxscales->conn_rwsplit[0];
|
||||
|
||||
string drop_head("DROP TABLE IF EXISTS test.t");
|
||||
string create_head("CREATE TABLE test.t");
|
||||
string create_tail(" (id INT)");
|
||||
|
||||
for (size_t i = 0; i < s_nClients; ++i)
|
||||
{
|
||||
string drop = drop_head + std::to_string(i);
|
||||
test.try_query(pConn, drop.c_str());
|
||||
|
||||
string create = create_head + std::to_string(i) + create_tail;
|
||||
test.try_query(pConn, create.c_str());
|
||||
}
|
||||
|
||||
return test.ok();
|
||||
}
|
||||
|
||||
static bool insert_data(TestConnections& test)
|
||||
{
|
||||
cout << "\nInserting data." << endl;
|
||||
|
||||
MYSQL* pConn = test.maxscales->conn_rwsplit[0];
|
||||
|
||||
for (size_t i = 0; i < s_nClients; ++i)
|
||||
{
|
||||
string insert("insert into test.t");
|
||||
insert += std::to_string(i);
|
||||
insert += " values ";
|
||||
|
||||
for (size_t j = 0; j < s_nRows; ++j)
|
||||
{
|
||||
insert += "(";
|
||||
insert += std::to_string(j);
|
||||
insert += ")";
|
||||
|
||||
if (j < s_nRows - 1)
|
||||
{
|
||||
insert += ", ";
|
||||
}
|
||||
}
|
||||
|
||||
test.try_query(pConn, insert.c_str());
|
||||
}
|
||||
|
||||
return test.ok();
|
||||
}
|
||||
|
||||
private:
|
||||
enum
|
||||
{
|
||||
INITSTATE_SIZE = 32
|
||||
};
|
||||
|
||||
size_t m_id;
|
||||
bool m_verbose;
|
||||
size_t m_value;
|
||||
char m_initstate[INITSTATE_SIZE];
|
||||
mutable struct random_data m_random_data;
|
||||
|
||||
static size_t s_nClients;
|
||||
static size_t s_nRows;
|
||||
static bool s_shutdown;
|
||||
|
||||
static std::vector<std::thread> s_threads;
|
||||
};
|
||||
|
||||
size_t Client::s_nClients;
|
||||
size_t Client::s_nRows;
|
||||
bool Client::s_shutdown;
|
||||
std::vector<std::thread> Client::s_threads;
|
||||
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
void list_servers(TestConnections& test)
|
||||
{
|
||||
test.maxscales->execute_maxadmin_command_print(0, (char*)"list servers");
|
||||
}
|
||||
|
||||
void sleep(int s)
|
||||
{
|
||||
cout << "Sleeping " << s << " times 1 second" << flush;
|
||||
do
|
||||
{
|
||||
::sleep(1);
|
||||
cout << "." << flush;
|
||||
--s;
|
||||
}
|
||||
while (s > 0);
|
||||
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
bool check_server_status(TestConnections& test, int id)
|
||||
{
|
||||
bool is_master = false;
|
||||
|
||||
Mariadb_nodes* pRepl = test.repl;
|
||||
|
||||
string server = string("server") + std::to_string(id);
|
||||
|
||||
StringSet statuses = test.get_server_status(server.c_str());
|
||||
std::ostream_iterator<string> oi(cout, " ");
|
||||
|
||||
cout << server << ": ";
|
||||
std::copy(statuses.begin(), statuses.end(), oi);
|
||||
|
||||
cout << " => ";
|
||||
|
||||
if (statuses.count("Master"))
|
||||
{
|
||||
is_master = true;
|
||||
cout << "OK";
|
||||
}
|
||||
else if (statuses.count("Slave"))
|
||||
{
|
||||
cout << "OK";
|
||||
}
|
||||
else if (statuses.count("Running"))
|
||||
{
|
||||
MYSQL* pConn = pRepl->nodes[id - 1];
|
||||
|
||||
char result[1024];
|
||||
if (find_field(pConn, "SHOW SLAVE STATUS", "Last_IO_Error", result) == 0)
|
||||
{
|
||||
const char needle[] =
|
||||
", which is not in the master's binlog. "
|
||||
"Since the master's binlog contains GTIDs with higher sequence numbers, "
|
||||
"it probably means that the slave has diverged due to executing extra "
|
||||
"erroneous transactions";
|
||||
|
||||
if (strstr(result, needle))
|
||||
{
|
||||
// A rejoin was attempted, but it failed because the node (old master)
|
||||
// had events that were not present in the new master. That is, a rejoin
|
||||
// is not possible in principle without corrective action.
|
||||
cout << "OK (could not be joined due to GTID issue)";
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << result;
|
||||
test.assert(false, "Merely 'Running' node did not error in expected way.");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
test.assert(false, "Could not execute \"SHOW SLAVE STATUS\"");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
test.assert(false, "Unexpected server state for %s.", server.c_str());
|
||||
}
|
||||
|
||||
cout << endl;
|
||||
|
||||
return is_master;
|
||||
}
|
||||
|
||||
void check_server_statuses(TestConnections& test)
|
||||
{
|
||||
int masters = 0;
|
||||
|
||||
masters += check_server_status(test, 1);
|
||||
masters += check_server_status(test, 2);
|
||||
masters += check_server_status(test, 3);
|
||||
masters += check_server_status(test, 4);
|
||||
|
||||
test.assert(masters == 1, "Unpexpected number of masters: %d", masters);
|
||||
}
|
||||
|
||||
void run(TestConnections& test)
|
||||
{
|
||||
int n_threads = Client::DEFAULT_N_CLIENTS;
|
||||
|
||||
cout << "\nConnecting to MaxScale." << endl;
|
||||
test.maxscales->connect_maxscale();
|
||||
|
||||
Client::init(test, Client::DEFAULT_N_CLIENTS, Client::DEFAULT_N_ROWS);
|
||||
|
||||
if (test.ok())
|
||||
{
|
||||
const char* zHost = test.maxscales->IP[0];
|
||||
int port = test.maxscales->rwsplit_port[0];
|
||||
const char* zUser = test.maxscales->user_name;
|
||||
const char* zPassword = test.maxscales->password;
|
||||
|
||||
cout << "Connecting to " << zHost << ":" << port << " as " << zUser << ":" << zPassword << endl;
|
||||
cout << "Starting clients." << endl;
|
||||
Client::start(test.verbose, zHost, port, zUser, zPassword);
|
||||
|
||||
time_t start = time(NULL);
|
||||
|
||||
list_servers(test);
|
||||
|
||||
while (time(NULL) - start < TEST_DURATION)
|
||||
{
|
||||
sleep(FAILOVER_DURATION);
|
||||
|
||||
int master_id = get_master_server_id(test);
|
||||
|
||||
if (master_id > 0 && master_id <= 4)
|
||||
{
|
||||
cout << "\nStopping node: " << master_id << endl;
|
||||
test.repl->stop_node(master_id - 1);
|
||||
|
||||
sleep(2 * MONITOR_INTERVAL);
|
||||
list_servers(test);
|
||||
|
||||
sleep(FAILOVER_DURATION);
|
||||
list_servers(test);
|
||||
|
||||
sleep(FAILOVER_DURATION);
|
||||
cout << "\nStarting node: " << master_id << endl;
|
||||
test.repl->start_node(master_id - 1);
|
||||
|
||||
sleep(2 * MONITOR_INTERVAL);
|
||||
list_servers(test);
|
||||
|
||||
sleep(FAILOVER_DURATION);
|
||||
list_servers(test);
|
||||
}
|
||||
else
|
||||
{
|
||||
test.assert(false, "Unexpected master id: %d");
|
||||
}
|
||||
}
|
||||
|
||||
sleep(FAILOVER_DURATION);
|
||||
|
||||
cout << "\nStopping clients.\n" << flush;
|
||||
Client::stop();
|
||||
|
||||
test.repl->close_connections();
|
||||
test.repl->connect();
|
||||
|
||||
check_server_statuses(test);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
std::ios::sync_with_stdio(true);
|
||||
|
||||
Mariadb_nodes::require_gtid(true);
|
||||
TestConnections test(argc, argv);
|
||||
|
||||
run(test);
|
||||
|
||||
return test.global_result;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user