From 099c2f87bc3fccd2ea0665fa14f2c70497c86af6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Wed, 27 Sep 2017 19:08:09 +0300 Subject: [PATCH 01/18] MXS-1457: Add test case Added a test that reproduces the problem. --- maxscale-system-test/CMakeLists.txt | 4 ++ ...xscale.cnf.template.mxs1457_ignore_deleted | 60 +++++++++++++++++++ .../mxs1457_ignore_deleted.cpp | 46 ++++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 maxscale-system-test/cnf/maxscale.cnf.template.mxs1457_ignore_deleted create mode 100644 maxscale-system-test/mxs1457_ignore_deleted.cpp diff --git a/maxscale-system-test/CMakeLists.txt b/maxscale-system-test/CMakeLists.txt index 66b2315e2..90d31c0ff 100644 --- a/maxscale-system-test/CMakeLists.txt +++ b/maxscale-system-test/CMakeLists.txt @@ -498,6 +498,10 @@ add_test_executable(mxs1295_sp_call.cpp mxs1295_sp_call mxs1295 LABELS maxscale # https://jira.mariadb.org/browse/MXS-1451 add_test_executable(mxs1451_skip_auth.cpp mxs1451_skip_auth mxs1451_skip_auth LABELS maxscale REPL_BACKEND) +# MXS-1457: Deleted servers are not ignored when users are loaded +# https://jira.mariadb.org/browse/MXS-1457 +add_test_executable(mxs1457_ignore_deleted.cpp mxs1457_ignore_deleted mxs1457_ignore_deleted LABELS REPL_BACKEND) + # 'namedserverfilter' test add_test_executable(namedserverfilter.cpp namedserverfilter namedserverfilter LABELS namedserverfilter LIGHT REPL_BACKEND) diff --git a/maxscale-system-test/cnf/maxscale.cnf.template.mxs1457_ignore_deleted b/maxscale-system-test/cnf/maxscale.cnf.template.mxs1457_ignore_deleted new file mode 100644 index 000000000..14b88472a --- /dev/null +++ b/maxscale-system-test/cnf/maxscale.cnf.template.mxs1457_ignore_deleted @@ -0,0 +1,60 @@ +[maxscale] +threads=###threads### +log_info=1 + +[MySQL Monitor] +type=monitor +module=mysqlmon +###repl51### +servers=server1,server2,server3,server4 +user=maxskysql +passwd=skysql +monitor_interval=1000 + +[RW Split Router] +type=service +router=readwritesplit +servers=server1,server2,server3,server4 +user=maxskysql +passwd=skysql +router_options=master_failure_mode=error_on_write + +[RW Split Listener] +type=listener +service=RW Split Router +protocol=MySQLClient +port=4006 + +[CLI] +type=service +router=cli + +[CLI Listener] +type=listener +service=CLI +protocol=maxscaled +socket=default + +[server1] +type=server +address=###node_server_IP_1### +port=###node_server_port_1### +protocol=MySQLBackend + +[server2] +type=server +address=###node_server_IP_2### +port=###node_server_port_2### +protocol=MySQLBackend + +[server3] +type=server +address=###node_server_IP_3### +port=###node_server_port_3### +protocol=MySQLBackend + +[server4] +type=server +address=###node_server_IP_4### +port=###node_server_port_4### +protocol=MySQLBackend diff --git a/maxscale-system-test/mxs1457_ignore_deleted.cpp b/maxscale-system-test/mxs1457_ignore_deleted.cpp new file mode 100644 index 000000000..753ea5d07 --- /dev/null +++ b/maxscale-system-test/mxs1457_ignore_deleted.cpp @@ -0,0 +1,46 @@ +/** + * MXS-1457: Deleted servers are not ignored when users are loaded + * + * Check that a corrupt and deleted server is not used to load users + */ + +#include "testconnections.h" + +int main(int argc, char *argv[]) +{ + TestConnections test(argc, argv); + + test.set_timeout(60); + test.repl->connect(); + execute_query(test.repl->nodes[0], "CREATE USER 'auth_test'@'%%' IDENTIFIED BY 'test'"); + execute_query(test.repl->nodes[0], "GRANT ALL ON *.* to 'auth_test'@'%%'"); + test.repl->sync_slaves(); + test.repl->close_connections(); + + // Stop slaves and drop the user on the master + test.repl->stop_slaves(); + test.repl->connect(); + execute_query(test.repl->nodes[0], "DROP USER 'auth_test'@'%%'"); + test.repl->close_connections(); + + test.set_timeout(60); + MYSQL* conn = open_conn_db(test.rwsplit_port, test.maxscale_ip(), "test", "auth_test", "test", false); + test.add_result(mysql_errno(conn) == 0, "Connection with users from master should fail"); + mysql_close(conn); + + test.ssh_maxscale(true, "maxadmin remove server server1 \"RW Split Router\""); + conn = open_conn_db(test.rwsplit_port, test.maxscale_ip(), "test", "auth_test", "test", false); + test.add_result(mysql_errno(conn), "Connection should be OK: %s", mysql_error(conn)); + test.try_query(conn, "SELECT 1"); + mysql_close(conn); + + test.set_timeout(60); + test.repl->connect(); + execute_query(test.repl->nodes[1], "START SLAVE"); + execute_query(test.repl->nodes[2], "START SLAVE"); + execute_query(test.repl->nodes[3], "START SLAVE"); + test.repl->sync_slaves(); + test.repl->close_connections(); + + return test.global_result; +} From 395b44533634d9a91649508dc716595f0ba5db0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Wed, 27 Sep 2017 19:12:45 +0300 Subject: [PATCH 02/18] MXS-1457: Ignore removed servers when loading users When users are loaded, removed or deleted servers are ignored. --- .../authenticator/GSSAPI/GSSAPIAuth/gssapi_auth.c | 13 +++++++++++++ server/modules/authenticator/MySQLAuth/dbusers.c | 14 +++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/server/modules/authenticator/GSSAPI/GSSAPIAuth/gssapi_auth.c b/server/modules/authenticator/GSSAPI/GSSAPIAuth/gssapi_auth.c index d3407a2a4..180b30a89 100644 --- a/server/modules/authenticator/GSSAPI/GSSAPIAuth/gssapi_auth.c +++ b/server/modules/authenticator/GSSAPI/GSSAPIAuth/gssapi_auth.c @@ -601,8 +601,16 @@ int gssapi_auth_load_users(SERV_LISTENER *listener) if (serviceGetUser(listener->service, &user, &pw) && (pw = decrypt_password(pw))) { + bool no_active_servers = true; + for (SERVER_REF *servers = listener->service->dbref; servers; servers = servers->next) { + if (!SERVER_REF_IS_ACTIVE(servers) || !SERVER_IS_ACTIVE(servers->server)) + { + continue; + } + + no_active_servers = false; MYSQL *mysql = mysql_init(NULL); if (mxs_mysql_real_connect(mysql, servers->server, user, pw)) @@ -645,6 +653,11 @@ int gssapi_auth_load_users(SERV_LISTENER *listener) } MXS_FREE(pw); + + if (no_active_servers) + { + rval = MXS_AUTH_LOADUSERS_OK; + } } return rval; diff --git a/server/modules/authenticator/MySQLAuth/dbusers.c b/server/modules/authenticator/MySQLAuth/dbusers.c index a2acf87b2..63f0eeb42 100644 --- a/server/modules/authenticator/MySQLAuth/dbusers.c +++ b/server/modules/authenticator/MySQLAuth/dbusers.c @@ -855,15 +855,18 @@ static int get_users(SERV_LISTENER *listener, bool skip_local) SERVER_REF *server = service->dbref; int total_users = -1; + bool no_active_servers = true; for (server = service->dbref; !service->svc_do_shutdown && server; server = server->next) { - if (skip_local && server_is_mxs_service(server->server)) + if (!SERVER_REF_IS_ACTIVE(server) || !SERVER_IS_ACTIVE(server->server) || + (skip_local && server_is_mxs_service(server->server))) { - total_users = 0; continue; } + no_active_servers = false; + MYSQL *con = gw_mysql_init(); if (con) { @@ -897,7 +900,12 @@ static int get_users(SERV_LISTENER *listener, bool skip_local) MXS_FREE(dpwd); - if (server == NULL && total_users == -1) + if (no_active_servers) + { + // This service has no servers or all servers are local MaxScale services + total_users = 0; + } + else if (server == NULL && total_users == -1) { MXS_ERROR("Unable to get user data from backend database for service [%s]." " Failed to connect to any of the backend databases.", service->name); From 016ad77b62c75d19efe7149e0db93bea6fc60558 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Wed, 27 Sep 2017 20:00:39 +0300 Subject: [PATCH 03/18] MXS-1457: Inject service credentials if no users are loaded If the authenticator option is enabled, no users are loaded and no errors have occurred in the user loading process, the service credentials are injected. --- .../authenticator/MySQLAuth/mysql_auth.c | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/server/modules/authenticator/MySQLAuth/mysql_auth.c b/server/modules/authenticator/MySQLAuth/mysql_auth.c index 1da67f120..d8e4f6e17 100644 --- a/server/modules/authenticator/MySQLAuth/mysql_auth.c +++ b/server/modules/authenticator/MySQLAuth/mysql_auth.c @@ -621,11 +621,15 @@ static int mysql_auth_load_users(SERV_LISTENER *port) } int loaded = replace_mysql_users(port, skip_local); + bool injected = false; - if (loaded < 0) + if (loaded <= 0) { - MXS_ERROR("[%s] Unable to load users for listener %s listening at [%s]:%d.", service->name, - port->name, port->address ? port->address : "::", port->port); + if (loaded < 0) + { + MXS_ERROR("[%s] Unable to load users for listener %s listening at [%s]:%d.", service->name, + port->name, port->address ? port->address : "::", port->port); + } if (instance->inject_service_user) { @@ -635,10 +639,20 @@ static int mysql_auth_load_users(SERV_LISTENER *port) { MXS_ERROR("[%s] Failed to inject service user.", port->service->name); } + else + { + injected = true; + } } } - if (loaded == 0 && !skip_local) + if (injected) + { + MXS_NOTICE("[%s] No users were loaded but 'inject_service_user' is enabled. " + "Enabling service credentials for authentication until " + "database users have been successfully loaded.", service->name); + } + else if (loaded == 0 && !skip_local) { MXS_WARNING("[%s]: failed to load any user information. Authentication" " will probably fail as a result.", service->name); From 32709b3e469cd3c6b8b4015ab86315ae0ecf0485 Mon Sep 17 00:00:00 2001 From: MassimilianoPinto Date: Thu, 28 Sep 2017 10:01:29 +0200 Subject: [PATCH 04/18] MXS-1458: Mark backend server as inactive if router is not configured. MXS-1458: Mark backend server as inactive if router is not configured. --- server/modules/routing/binlogrouter/blr.c | 8 +++++++- server/modules/routing/binlogrouter/blr_slave.c | 9 +++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/server/modules/routing/binlogrouter/blr.c b/server/modules/routing/binlogrouter/blr.c index e9f1daf17..2e581d1c0 100644 --- a/server/modules/routing/binlogrouter/blr.c +++ b/server/modules/routing/binlogrouter/blr.c @@ -715,9 +715,12 @@ createInstance(SERVICE *service, char **options) /** Set SSL pointer in in server struct */ server->server_ssl = ssl_cfg; - /* Set server unique name */ /* Add server to service backend list */ serviceAddBackend(inst->service, server); + + /* Hide backend server struct */ + service->dbref->server->is_active = false; + service->dbref->active = false; } /* @@ -766,6 +769,9 @@ createInstance(SERVICE *service, char **options) else { inst->master_state = BLRM_UNCONNECTED; + /* Set backend server as active */ + service->dbref->server->is_active = true; + service->dbref->active = true; } /** diff --git a/server/modules/routing/binlogrouter/blr_slave.c b/server/modules/routing/binlogrouter/blr_slave.c index b3d94ae31..f1f6eefe5 100644 --- a/server/modules/routing/binlogrouter/blr_slave.c +++ b/server/modules/routing/binlogrouter/blr_slave.c @@ -1037,6 +1037,15 @@ blr_slave_query(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue) return 1; } + /* Mark as active the master server struct */ + spinlock_acquire(&router->lock); + if (!router->service->dbref->server->is_active) + { + router->service->dbref->server->is_active = true; + router->service->dbref->active = true; + } + spinlock_release(&router->lock); + /** * check if router is BLRM_UNCONFIGURED * and change state to BLRM_SLAVE_STOPPED From 1827f042e8c99109098f912ee47913aea4eab06a Mon Sep 17 00:00:00 2001 From: MassimilianoPinto Date: Fri, 29 Sep 2017 18:57:09 +0200 Subject: [PATCH 05/18] MXS-1459: Assign binlog checksum value at startup Binlog checksum default value is wrong if a slave connects with checksum = NONE before master registration or master is not accessible at startup --- server/modules/routing/binlogrouter/blr.c | 14 ++++++++ .../modules/routing/binlogrouter/blr_master.c | 33 ++++++++++++++----- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/server/modules/routing/binlogrouter/blr.c b/server/modules/routing/binlogrouter/blr.c index 2e581d1c0..fce007d14 100644 --- a/server/modules/routing/binlogrouter/blr.c +++ b/server/modules/routing/binlogrouter/blr.c @@ -124,6 +124,8 @@ static void stats_func(void *); static bool rses_begin_locked_router_action(ROUTER_SLAVE *); static void rses_end_locked_router_action(ROUTER_SLAVE *); GWBUF *blr_cache_read_response(ROUTER_INSTANCE *router, char *response); +/* Set checksum value in router instance from a master reply or saved buffer */ +extern void blr_set_checksum(ROUTER_INSTANCE *instance, GWBUF *buf); static SPINLOCK instlock; static ROUTER_INSTANCE *instances; @@ -828,6 +830,18 @@ createInstance(SERVICE *service, char **options) /* Read any cached response messages */ blr_cache_read_master_data(inst); + /** + * The value of master checksum is known only at registration time, so + * as soon as replication succeds the value is updated. + * Set now the binlog checksum from the saved value. + * This is very useful in case of possible failure in the + * registration phase for any reason: master is down, wrong password etc. + * In this case a connecting slave will get the checksum value + * from previous registration instead of default one (CRC32) + * which can be wrong if slave has binlog_checksum = NONE. + */ + blr_set_checksum(inst, inst->saved_master.chksum2); + /* Find latest binlog file or create a new one (000001) */ if (blr_file_init(inst) == 0) { diff --git a/server/modules/routing/binlogrouter/blr_master.c b/server/modules/routing/binlogrouter/blr_master.c index c2e8234f8..ae0d1c352 100644 --- a/server/modules/routing/binlogrouter/blr_master.c +++ b/server/modules/routing/binlogrouter/blr_master.c @@ -513,16 +513,9 @@ blr_master_response(ROUTER_INSTANCE *router, GWBUF *buf) break; case BLRM_CHKSUM2: { - char *val = blr_extract_column(buf, 1); + // Set checksum from master reply + blr_set_checksum(router, buf); - if (val && strncasecmp(val, "NONE", 4) == 0) - { - router->master_chksum = false; - } - if (val) - { - MXS_FREE(val); - } // Response to the master_binlog_checksum, should be stored if (router->saved_master.chksum2) { @@ -2619,3 +2612,25 @@ void blr_notify_all_slaves(ROUTER_INSTANCE *router) MXS_DEBUG("Notified %d slaves about new data.", notified); } } + +/** + * Set checksum value in router instance + * + * @param inst The router instance + * @param buf The buffer with checksum value + */ +void blr_set_checksum(ROUTER_INSTANCE *inst, GWBUF *buf) +{ + if (buf) + { + char *val = blr_extract_column(buf, 1); + if (val && strncasecmp(val, "NONE", 4) == 0) + { + inst->master_chksum = false; + } + if (val) + { + MXS_FREE(val); + } + } +} From 94a55f6602833dea6764a421cea1b9a7c3de909f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Sat, 30 Sep 2017 08:37:12 +0300 Subject: [PATCH 06/18] Add missing declaration of blr_set_checksum The function was used before it was declared. --- server/modules/routing/binlogrouter/blr_master.c | 1 + 1 file changed, 1 insertion(+) diff --git a/server/modules/routing/binlogrouter/blr_master.c b/server/modules/routing/binlogrouter/blr_master.c index ae0d1c352..fb8e367e0 100644 --- a/server/modules/routing/binlogrouter/blr_master.c +++ b/server/modules/routing/binlogrouter/blr_master.c @@ -108,6 +108,7 @@ static int blr_get_master_semisync(GWBUF *buf); static void blr_terminate_master_replication(ROUTER_INSTANCE *router, uint8_t* ptr, int len); void blr_notify_all_slaves(ROUTER_INSTANCE *router); extern bool blr_notify_waiting_slave(ROUTER_SLAVE *slave); +void blr_set_checksum(ROUTER_INSTANCE *inst, GWBUF *buf); static int keepalive = 1; From 69557c650e56bcbb5b760bb4f47c7be9953203ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Sat, 30 Sep 2017 12:37:40 +0300 Subject: [PATCH 07/18] Fix stack trace generation The stack traces weren't logged as the LOG_ALERT priority wasn't enabled by default. As an alert is intended to be something that must leave a trace somewhere, and as such, it must not be possible to disable it. For this reason, it is acceptable to always log the message if the priority is LOG_ALERT. Added the -rdynamic linker flag so that all symbols are exported when linking MaxScale. As the stack trace is printed in a signal handler, the first attempt should be to print the stack trace to the standard output. This way the output is printed before an attempt to use malloc is made when it is logged to the logfile. --- CMakeLists.txt | 2 +- include/maxscale/log_manager.h | 2 +- server/core/gateway.cc | 35 ++++++++++++++++------------------ 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c98aa7f62..e9f8b7813 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -144,7 +144,7 @@ configure_file(${CMAKE_SOURCE_DIR}/etc/postrm.in ${CMAKE_BINARY_DIR}/postrm @ONL configure_file(${CMAKE_SOURCE_DIR}/etc/upstart/maxscale.conf.in ${CMAKE_BINARY_DIR}/upstart/maxscale.conf @ONLY) configure_file(${CMAKE_SOURCE_DIR}/server/test/maxscale_test.cnf ${CMAKE_BINARY_DIR}/maxscale.cnf @ONLY) -set(FLAGS "-Wall -Wno-unused-variable -Wno-unused-function -Werror -fPIC" CACHE STRING "Compilation flags") +set(FLAGS "-rdynamic -Wall -Wno-unused-variable -Wno-unused-function -Werror -fPIC" CACHE STRING "Compilation flags") set(DEBUG_FLAGS "-ggdb -pthread -pipe -Wformat -fstack-protector --param=ssp-buffer-size=4" CACHE STRING "Debug compilation flags") if(CMAKE_VERSION VERSION_GREATER 2.6) diff --git a/include/maxscale/log_manager.h b/include/maxscale/log_manager.h index 329155dc7..4f2acf64d 100644 --- a/include/maxscale/log_manager.h +++ b/include/maxscale/log_manager.h @@ -109,7 +109,7 @@ void mxs_log_get_throttling(MXS_LOG_THROTTLING* throttling); static inline bool mxs_log_priority_is_enabled(int priority) { assert((priority & ~LOG_PRIMASK) == 0); - return MXS_LOG_PRIORITY_IS_ENABLED(priority); + return MXS_LOG_PRIORITY_IS_ENABLED(priority) || priority == LOG_ALERT; } int mxs_log_message(int priority, diff --git a/server/core/gateway.cc b/server/core/gateway.cc index 0c838245c..da4aefd08 100644 --- a/server/core/gateway.cc +++ b/server/core/gateway.cc @@ -398,32 +398,31 @@ sigfatal_handler(int i) } fatal_handling = 1; MXS_CONFIG* cnf = config_get_global_options(); - fprintf(stderr, "\n\nMaxScale " MAXSCALE_VERSION " received fatal signal %d\n", i); + fprintf(stderr, "Fatal: MaxScale " MAXSCALE_VERSION " received fatal signal %d. " + "Attempting backtrace.\n", i); + fprintf(stderr, "Commit ID: %s System name: %s Release string: %s\n\n", + maxscale_commit, cnf->sysname, cnf->release_string); + void *addrs[128]; + int count = backtrace(addrs, 128); - MXS_ALERT("Fatal: MaxScale " MAXSCALE_VERSION " received fatal signal %d. Attempting backtrace.", i); + // First print the stack trace to stderr as malloc is likely broken + backtrace_symbols_fd(addrs, count, STDERR_FILENO); + MXS_ALERT("Fatal: MaxScale " MAXSCALE_VERSION " received fatal signal %d. " + "Attempting backtrace.", i); MXS_ALERT("Commit ID: %s System name: %s " "Release string: %s", maxscale_commit, cnf->sysname, cnf->release_string); + // Then see if we can log them + char** symbols = backtrace_symbols(addrs, count); + if (symbols) { - void *addrs[128]; - int count = backtrace(addrs, 128); - char** symbols = backtrace_symbols(addrs, count); - - if (symbols) + for (int n = 0; n < count; n++) { - for (int n = 0; n < count; n++) - { - MXS_ALERT(" %s\n", symbols[n]); - } - MXS_FREE(symbols); - } - else - { - fprintf(stderr, "\nresolving symbols to error log failed, writing call trace to stderr:\n"); - backtrace_symbols_fd(addrs, count, fileno(stderr)); + MXS_ALERT(" %s\n", symbols[n]); } + MXS_FREE(symbols); } mxs_log_flush_sync(); @@ -434,8 +433,6 @@ sigfatal_handler(int i) raise(i); } - - /** * @node Wraps sigaction calls * From af08647fa2191f38f5b346805243a4bbdb909612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Sun, 1 Oct 2017 10:49:11 +0300 Subject: [PATCH 08/18] Add variadic ssh function to mariadb_nodes The ssh_node function now supports printf style arguments. This is used to simplify command execution on the nodes. Curreltny, in addition to its old usage, it is used to drop extra databases when replication is started. --- maxscale-system-test/bug519.cpp | 7 +- maxscale-system-test/kerberos_setup.cpp | 25 +- maxscale-system-test/mariadb_func.cpp | 10 +- maxscale-system-test/mariadb_func.h | 4 +- maxscale-system-test/mariadb_nodes.cpp | 276 +++++++++--------- maxscale-system-test/mariadb_nodes.h | 8 +- .../mxs280_select_outfile.cpp | 4 +- maxscale-system-test/mxs951_utfmb4.cpp | 6 +- maxscale-system-test/testconnections.cpp | 90 +++--- maxscale-system-test/testconnections.h | 6 +- 10 files changed, 208 insertions(+), 228 deletions(-) diff --git a/maxscale-system-test/bug519.cpp b/maxscale-system-test/bug519.cpp index 79b4c34da..18d91b874 100644 --- a/maxscale-system-test/bug519.cpp +++ b/maxscale-system-test/bug519.cpp @@ -88,14 +88,11 @@ int main(int argc, char *argv[]) Test->repl->sync_slaves(); Test->set_timeout(200); - sprintf(str, "%s rm -f /tmp/t*.csv; %s chmod 777 /tmp", Test->repl->access_sudo[0], - Test->repl->access_sudo[0]); - Test->tprintf("%s\n", str); for (int k = 0; k < Test->repl->N; k++) { - Test->repl->ssh_node(k, str, false); + Test->repl->ssh_node(k, false, "%s rm -f /tmp/t*.csv; %s chmod 777 /tmp", + Test->repl->access_sudo[0], Test->repl->access_sudo[0]); } - //system(str); Test->tprintf("Copying data from t1 to file...\n"); Test->tprintf("using RWSplit: SELECT * INTO OUTFILE '/tmp/t1.csv' FROM t1;\n"); diff --git a/maxscale-system-test/kerberos_setup.cpp b/maxscale-system-test/kerberos_setup.cpp index a6a2fe7c7..7ec96d82b 100644 --- a/maxscale-system-test/kerberos_setup.cpp +++ b/maxscale-system-test/kerberos_setup.cpp @@ -32,13 +32,12 @@ int main(int argc, char *argv[]) sprintf(str, "%s/krb5.conf", test_dir); for (i = 0; i < Test->repl->N; i++) { - Test->repl->ssh_node(i, (char *) - "yum install -y MariaDB-gssapi-server MariaDB-gssapi-client krb5-workstation pam_krb5", true); + Test->repl->ssh_node(i, true, "yum install -y MariaDB-gssapi-server MariaDB-gssapi-client krb5-workstation pam_krb5"); Test->repl->copy_to_node(str, (char *) "~/", i); - Test->repl->ssh_node(i, (char *) "cp ~/krb5.conf /etc/", true); + Test->repl->ssh_node(i, true, "cp ~/krb5.conf /etc/"); Test->repl->copy_to_node((char *) "hosts", (char *) "~/", i); - Test->repl->ssh_node(i, (char *) "cp ~/hosts /etc/", true); + Test->repl->ssh_node(i, true, "cp ~/hosts /etc/"); } Test->tprintf("Copying 'hosts' and krb5.conf files to Maxscale node\n"); @@ -96,12 +95,12 @@ int main(int argc, char *argv[]) { sprintf(str, "%s/kerb.cnf", test_dir); Test->repl->copy_to_node(str, (char *) "~/", i); - Test->repl->ssh_node(i, (char *) "cp ~/kerb.cnf /etc/my.cnf.d/", true); + Test->repl->ssh_node(i, true, "cp ~/kerb.cnf /etc/my.cnf.d/"); Test->repl->copy_to_node((char *) "krb5.keytab", (char *) "~/", i); - Test->repl->ssh_node(i, (char *) "cp ~/krb5.keytab /etc/", true); + Test->repl->ssh_node(i, true, "cp ~/krb5.keytab /etc/"); - Test->repl->ssh_node(i, (char *) "kinit mariadb/maxscale.test@MAXSCALE.TEST -k -t /etc/krb5.keytab", false); + Test->repl->ssh_node(i, false, "kinit mariadb/maxscale.test@MAXSCALE.TEST -k -t /etc/krb5.keytab"); } Test->tprintf("Installing gssapi plugin to all nodes\n"); @@ -118,18 +117,18 @@ int main(int argc, char *argv[]) Test->tprintf("Trying use usr1 to execute query: RW Split\n"); Test->add_result( - Test->repl->ssh_node(1, - "echo select User,Host from mysql.user | mysql -uusr1 -h maxscale.maxscale.test -P 4006", false), + Test->repl->ssh_node(1, false, + "echo select User,Host from mysql.user | mysql -uusr1 -h maxscale.maxscale.test -P 4006"), "Error executing query against RW Split\n"); Test->tprintf("Trying use usr1 to execute query: Read Connection Master\n"); Test->add_result( - Test->repl->ssh_node(1, - "echo select User,Host from mysql.user | mysql -uusr1 -h maxscale.maxscale.test -P 4008", false), + Test->repl->ssh_node(1, false, + "echo select User,Host from mysql.user | mysql -uusr1 -h maxscale.maxscale.test -P 4008"), "Error executing query against Read Connection Master\n"); Test->tprintf("Trying use usr1 to execute query: Read Connection Slave\n"); Test->add_result( - Test->repl->ssh_node(1, - "echo select User,Host from mysql.user | mysql -uusr1 -h maxscale.maxscale.test -P 4009", false), + Test->repl->ssh_node(1, false, + "echo select User,Host from mysql.user | mysql -uusr1 -h maxscale.maxscale.test -P 4009"), "Error executing query against Read Connection Slave\n"); int rval = Test->global_result; diff --git a/maxscale-system-test/mariadb_func.cpp b/maxscale-system-test/mariadb_func.cpp index 0a1ef7882..c098c6dd4 100644 --- a/maxscale-system-test/mariadb_func.cpp +++ b/maxscale-system-test/mariadb_func.cpp @@ -456,7 +456,7 @@ int execute_query_count_rows(MYSQL *conn, const char *sql) return rval; } -int get_conn_num(MYSQL *conn, char * ip, char *hostname, char * db) +int get_conn_num(MYSQL *conn, const char* ip, const char* hostname, const char* db) { MYSQL_RES *res; MYSQL_ROW row; @@ -584,16 +584,16 @@ unsigned int get_seconds_behind_master(MYSQL *conn) char SBM_str[16]; unsigned int SBM = 0; if (find_field( - conn, (char *) "show slave status;", - (char *) "Seconds_Behind_Master", &SBM_str[0] - ) != 1) + conn, (char *) "show slave status;", + (char *) "Seconds_Behind_Master", &SBM_str[0] + ) != 1) { sscanf(SBM_str, "%u", &SBM); } return SBM; } -int read_log(char * name, char ** err_log_content_p) +int read_log(const char* name, char ** err_log_content_p) { FILE *f; *err_log_content_p = NULL; diff --git a/maxscale-system-test/mariadb_func.h b/maxscale-system-test/mariadb_func.h index 3090dda9d..a59ff2732 100644 --- a/maxscale-system-test/mariadb_func.h +++ b/maxscale-system-test/mariadb_func.h @@ -200,7 +200,7 @@ int execute_query_check_one(MYSQL *conn, const char *sql, const char *expected); * @param db name of DB to which connections are counted * @return number of connections */ -int get_conn_num(MYSQL *conn, char * ip, char * hostname, char * db); +int get_conn_num(MYSQL *conn, const char* ip, const char* hostname, const char* db); /** * @brief Find given filed in the SQL query reply @@ -227,7 +227,7 @@ unsigned int get_seconds_behind_master(MYSQL *conn); * @param err_log_content pointer to the buffer to store log file content * @return 0 in case of success, 1 in case of error */ -int read_log(char * name, char **err_log_content_p); +int read_log(const char* name, char **err_log_content_p); int get_int_version(const std::string& version); int get_int_version(const char* version); diff --git a/maxscale-system-test/mariadb_nodes.cpp b/maxscale-system-test/mariadb_nodes.cpp index cd21436af..1d45b99ce 100644 --- a/maxscale-system-test/mariadb_nodes.cpp +++ b/maxscale-system-test/mariadb_nodes.cpp @@ -17,7 +17,7 @@ #include #include -Mariadb_nodes::Mariadb_nodes(const char *pref, const char *test_cwd, bool verbose): +Mariadb_nodes::Mariadb_nodes(const char *pref, const char *test_cwd, bool verbose) : v51(false), use_ipv6(false) { strcpy(prefix, pref); @@ -119,7 +119,7 @@ int Mariadb_nodes::read_env() ssl = false; sprintf(env_name, "%s_ssl", prefix); env = getenv(env_name); - if ((env != NULL) && ((strcasecmp(env, "yes") == 0) || (strcasecmp(env, "true") == 0) )) + if ((env != NULL) && ((strcasecmp(env, "yes") == 0) || (strcasecmp(env, "true") == 0))) { ssl = true; } @@ -290,9 +290,9 @@ int Mariadb_nodes::find_master() while ((found == 0) && (i < N)) { if (find_field( - nodes[i], (char *) "show slave status;", - (char *) "Master_Host", &str[0] - ) == 0 ) + nodes[i], "show slave status;", + "Master_Host", &str[0] + ) == 0) { found = 1; strcpy(master_IP, str); @@ -328,13 +328,13 @@ int Mariadb_nodes::change_master(int NewMaster, int OldMaster) { if (i != OldMaster) { - execute_query(nodes[i], (char *) "stop slave;"); + execute_query(nodes[i], "stop slave;"); } } execute_query(nodes[NewMaster], create_repl_user); - execute_query(nodes[OldMaster], (char *) "reset master;"); - find_field(nodes[NewMaster], (char *) "show master status", (char *) "File", &log_file[0]); - find_field(nodes[NewMaster], (char *) "show master status", (char *) "Position", &log_pos[0]); + execute_query(nodes[OldMaster], "reset master;"); + find_field(nodes[NewMaster], "show master status", "File", &log_file[0]); + find_field(nodes[NewMaster], "show master status", "Position", &log_pos[0]); for (i = 0; i < N; i++) { if (i != NewMaster) @@ -343,15 +343,15 @@ int Mariadb_nodes::change_master(int NewMaster, int OldMaster) execute_query(nodes[i], str); } } - //for (i = 0; i < N; i++) {if (i != NewMaster) {execute_query(nodes[i], (char *) "start slave;"); }} + //for (i = 0; i < N; i++) {if (i != NewMaster) {execute_query(nodes[i], "start slave;"); }} } int Mariadb_nodes::stop_node(int node) { - return ssh_node(node, stop_db_command[node], true); + return ssh_node(node, true, stop_db_command[node]); } -int Mariadb_nodes::start_node(int node, char * param) +int Mariadb_nodes::start_node(int node, const char* param) { char cmd[1024]; if (v51) @@ -362,7 +362,7 @@ int Mariadb_nodes::start_node(int node, char * param) { sprintf(cmd, "%s %s", start_db_command[node], param); } - return ssh_node(node, cmd, true); + return ssh_node(node, true, cmd); } int Mariadb_nodes::stop_nodes() @@ -374,7 +374,7 @@ int Mariadb_nodes::stop_nodes() { printf("Stopping node %d\n", i); fflush(stdout); - local_result += execute_query(nodes[i], (char *) "stop slave;"); + local_result += execute_query(nodes[i], "stop slave;"); fflush(stdout); local_result += stop_node(i); fflush(stdout); @@ -391,7 +391,7 @@ int Mariadb_nodes::stop_slaves() { printf("Stopping slave %d\n", i); fflush(stdout); - global_result += execute_query(nodes[i], (char *) "stop slave;"); + global_result += execute_query(nodes[i], "stop slave;"); } close_connections(); return global_result; @@ -399,7 +399,7 @@ int Mariadb_nodes::stop_slaves() int Mariadb_nodes::cleanup_db_node(int node) { - return ssh_node(node, cleanup_db_command[node], true); + return ssh_node(node, true, cleanup_db_command[node]); } int Mariadb_nodes::cleanup_db_nodes() @@ -417,8 +417,6 @@ int Mariadb_nodes::cleanup_db_nodes() return local_result; } - - int Mariadb_nodes::start_replication() { char str[1024]; @@ -428,27 +426,27 @@ int Mariadb_nodes::start_replication() // Start all nodes for (int i = 0; i < N; i++) { - local_result += start_node(i, (char *) ""); - sprintf(str, - "mysql -u root %s -e \"STOP SLAVE; RESET SLAVE; RESET SLAVE ALL; RESET MASTER; SET GLOBAL read_only=OFF;\"", - socket_cmd[i]); - ssh_node(i, str, true); - ssh_node(i, "sudo rm -f /etc/my.cnf.d/kerb.cnf", true); + local_result += start_node(i, ""); + ssh_node(i, true, + "mysql -u root %s -e \"STOP SLAVE; RESET SLAVE; RESET SLAVE ALL; RESET MASTER; SET GLOBAL read_only=OFF;\"", + socket_cmd[i]); + ssh_node(i, true, "sudo rm -f /etc/my.cnf.d/kerb.cnf"); + ssh_node(i, true, + "for i in `mysql -ss -u root %s -e \"SHOW DATABASES\"|grep -iv 'mysql\\|information_schema\\|performance_schema'`; " + "do mysql -u root %s -e \"DROP DATABASE $i\";" + "done", socket_cmd[i], socket_cmd[i]); } sprintf(str, "%s/create_user.sh", test_dir); sprintf(dtr, "%s", access_homedir[0]); - copy_to_node(str, dtr , 0); - sprintf(str, "export node_user=\"%s\"; export node_password=\"%s\"; %s/create_user.sh %s", - user_name, password, access_homedir[0], socket_cmd[0]); - printf("cmd: %s\n", str); - ssh_node(0, str, false); + copy_to_node(str, dtr, 0); + ssh_node(0, false, "export node_user=\"%s\"; export node_password=\"%s\"; %s/create_user.sh %s", + user_name, password, access_homedir[0], socket_cmd[0]); // Create a database dump from the master and distribute it to the slaves - sprintf(str, - "mysqldump --all-databases --add-drop-database --flush-privileges --master-data=1 --gtid %s > /tmp/master_backup.sql", - socket_cmd[0]); - ssh_node(0, str, true); + ssh_node(0, true, "mysql -u root %s -e \"CREATE DATABASE test\"; " + "mysqldump --all-databases --add-drop-database --flush-privileges --master-data=1 --gtid %s > /tmp/master_backup.sql", + socket_cmd[0], socket_cmd[0]); sprintf(str, "%s/master_backup.sql", test_dir); copy_from_node("/tmp/master_backup.sql", str, 0); @@ -458,16 +456,11 @@ int Mariadb_nodes::start_replication() printf("Starting node %d\n", i); fflush(stdout); copy_to_node(str, "/tmp/master_backup.sql", i); - sprintf(dtr, - "mysql -u root %s < /tmp/master_backup.sql", - socket_cmd[i]); - ssh_node(i, dtr, true); - char query[512]; - - sprintf(query, "mysql -u root %s -e \"CHANGE MASTER TO MASTER_HOST=\\\"%s\\\", MASTER_PORT=%d, " - "MASTER_USER=\\\"repl\\\", MASTER_PASSWORD=\\\"repl\\\";" - "START SLAVE;\"", socket_cmd[i], IP_private[0], port[0]); - ssh_node(i, query, true); + ssh_node(i, true, "mysql -u root %s < /tmp/master_backup.sql", + socket_cmd[i]); + ssh_node(i, true, "mysql -u root %s -e \"CHANGE MASTER TO MASTER_HOST=\\\"%s\\\", MASTER_PORT=%d, " + "MASTER_USER=\\\"repl\\\", MASTER_PASSWORD=\\\"repl\\\";" + "START SLAVE;\"", socket_cmd[i], IP_private[0], port[0]); } return local_result; @@ -482,39 +475,30 @@ int Galera_nodes::start_galera() local_result += stop_nodes(); // Remove the grastate.dat file - ssh_node(0, "rm -f /var/lib/mysql/grastate.dat", true); + ssh_node(0, true, "rm -f /var/lib/mysql/grastate.dat"); printf("Starting new Galera cluster\n"); fflush(stdout); - ssh_node(0, "echo [mysqld] > cluster_address.cnf", false); - ssh_node(0, "echo wsrep_cluster_address=gcomm:// >> cluster_address.cnf", false); - ssh_node(0, "cp cluster_address.cnf /etc/my.cnf.d/", true); - local_result += start_node(0, (char *) " --wsrep-cluster-address=gcomm://"); + ssh_node(0, false, "echo [mysqld] > cluster_address.cnf"); + ssh_node(0, false, "echo wsrep_cluster_address=gcomm:// >> cluster_address.cnf"); + ssh_node(0, true, "cp cluster_address.cnf /etc/my.cnf.d/"); + local_result += start_node(0, " --wsrep-cluster-address=gcomm://"); sprintf(str, "%s/create_user_galera.sh", test_dir); copy_to_node(str, "~/", 0); - sprintf(str, "export galera_user=\"%s\"; export galera_password=\"%s\"; ./create_user_galera.sh %s", user_name, - password, socket_cmd[0]); - ssh_node(0, str, false); + ssh_node(0, false, "export galera_user=\"%s\"; export galera_password=\"%s\"; ./create_user_galera.sh %s", + user_name, + password, socket_cmd[0]); for (i = 1; i < N; i++) { printf("Starting node %d\n", i); - fflush(stdout); - ssh_node(i, "echo [mysqld] > cluster_address.cnf", true); - sprintf(str, "echo wsrep_cluster_address=gcomm://%s >> cluster_address.cnf", IP_private[0]); - ssh_node(i, str, true); - ssh_node(i, "cp cluster_address.cnf /etc/my.cnf.d/", true); - - sprintf(&sys1[0], " --wsrep-cluster-address=gcomm://%s", IP_private[0]); - if (this->verbose) - { - printf("%s\n", sys1); - fflush(stdout); - } - local_result += start_node(i, sys1); - fflush(stdout); + ssh_node(i, true, "echo [mysqld] > cluster_address.cnf"); + ssh_node(i, true, "echo wsrep_cluster_address=gcomm://%s >> cluster_address.cnf", IP_private[0]); + ssh_node(i, true, "cp cluster_address.cnf /etc/my.cnf.d/"); + sprintf(str, " --wsrep-cluster-address=gcomm://%s", IP_private[0]); + local_result += start_node(i, str); } sleep(5); @@ -527,44 +511,30 @@ int Galera_nodes::start_galera() int Mariadb_nodes::clean_iptables(int node) { - char sys1[1024]; int local_result = 0; - local_result += ssh_node(node, (char *) "echo \"#!/bin/bash\" > clean_iptables.sh", false); - sprintf(sys1, - "echo \"while [ \\\"\\$(iptables -n -L INPUT 1|grep '%d')\\\" != \\\"\\\" ]; do iptables -D INPUT 1; done\" >> clean_iptables.sh", - port[node]); - local_result += ssh_node(node, (char *) sys1, false); - sprintf(sys1, - "echo \"while [ \\\"\\$(ip6tables -n -L INPUT 1|grep '%d')\\\" != \\\"\\\" ]; do ip6tables -D INPUT 1; done\" >> clean_iptables.sh", - port[node]); - local_result += ssh_node(node, (char *) sys1, false); + local_result += ssh_node(node, false, "echo \"#!/bin/bash\" > clean_iptables.sh"); + local_result += ssh_node(node, false, + "echo \"while [ \\\"\\$(iptables -n -L INPUT 1|grep '%d')\\\" != \\\"\\\" ]; " + "do iptables -D INPUT 1; done\" >> clean_iptables.sh", + port[node]); + local_result += ssh_node(node, false, + "echo \"while [ \\\"\\$(ip6tables -n -L INPUT 1|grep '%d')\\\" != \\\"\\\" ]; " + "do ip6tables -D INPUT 1; done\" >> clean_iptables.sh", + port[node]); - local_result += ssh_node(node, (char *) "chmod a+x clean_iptables.sh", false); - local_result += ssh_node(node, (char *) "./clean_iptables.sh", true); + local_result += ssh_node(node, false, "chmod a+x clean_iptables.sh"); + local_result += ssh_node(node, true, "./clean_iptables.sh"); return local_result; } int Mariadb_nodes::block_node(int node) { - char sys1[1024]; int local_result = 0; local_result += clean_iptables(node); - sprintf(&sys1[0], "iptables -I INPUT -p tcp --dport %d -j REJECT", port[node]); - if (this->verbose) - { - printf("%s\n", sys1); - fflush(stdout); - } - local_result += ssh_node(node, sys1, true); - sprintf(&sys1[0], "ip6tables -I INPUT -p tcp --dport %d -j REJECT", port[node]); - if (this->verbose) - { - printf("%s\n", sys1); - fflush(stdout); - } - local_result += ssh_node(node, sys1, true); + local_result += ssh_node(node, true, "iptables -I INPUT -p tcp --dport %d -j REJECT", port[node]); + local_result += ssh_node(node, true, "ip6tables -I INPUT -p tcp --dport %d -j REJECT", port[node]); blocked[node] = true; return local_result; @@ -572,29 +542,16 @@ int Mariadb_nodes::block_node(int node) int Mariadb_nodes::unblock_node(int node) { - char sys1[1024]; int local_result = 0; local_result += clean_iptables(node); - sprintf(&sys1[0], "iptables -I INPUT -p tcp --dport %d -j ACCEPT", port[node]); - if (this->verbose) - { - printf("%s\n", sys1); - fflush(stdout); - } - local_result += ssh_node(node, sys1, true); - sprintf(&sys1[0], "ip6tables -I INPUT -p tcp --dport %d -j ACCEPT", port[node]); - if (this->verbose) - { - printf("%s\n", sys1); - fflush(stdout); - } - local_result += ssh_node(node, sys1, true); + + local_result += ssh_node(node, true, "iptables -I INPUT -p tcp --dport %d -j ACCEPT", port[node]); + local_result += ssh_node(node, true, "ip6tables -I INPUT -p tcp --dport %d -j ACCEPT", port[node]); blocked[node] = false; return local_result; } - int Mariadb_nodes::unblock_all_nodes() { int rval = 0; @@ -611,7 +568,7 @@ int Mariadb_nodes::check_node_ssh(int node) printf("Checking node %d\n", node); fflush(stdout); - if (ssh_node(0, (char *) "ls > /dev/null", false) != 0) + if (ssh_node(0, false, "ls > /dev/null") != 0) { printf("Node %d is not available\n", node); fflush(stdout); @@ -847,7 +804,7 @@ int Galera_nodes::check_galera() { char str[1024] = ""; - if (find_field(conn, (char *) "SHOW STATUS WHERE Variable_name='wsrep_cluster_size';", (char *) "Value", + if (find_field(conn, "SHOW STATUS WHERE Variable_name='wsrep_cluster_size';", "Value", str) != 0) { printf("wsrep_cluster_size is not found in SHOW STATUS LIKE 'wsrep%%' results\n"); @@ -907,7 +864,7 @@ int Mariadb_nodes::get_server_id(int index) int id = -1; char str[1024]; - if (find_field(this->nodes[index], "SELECT @@server_id", "@@server_id", (char*) str) == 0) + if (find_field(this->nodes[index], "SELECT @@server_id", "@@server_id", (char*)str) == 0) { id = atoi(str); } @@ -992,19 +949,55 @@ char * Mariadb_nodes::ssh_node_output(int node, const char *ssh, bool sudo, int return result; } -int Mariadb_nodes::ssh_node(int node, const char *ssh, bool sudo) +int Mariadb_nodes::ssh_node(int node, bool sudo, const char *format, ...) { - char sys[strlen(ssh) + 1024]; - generate_ssh_cmd(sys, node, ssh, sudo); - int return_code = system(sys); - if (WIFEXITED(return_code)) + va_list valist; + + va_start(valist, format); + int message_len = vsnprintf(NULL, 0, format, valist); + va_end(valist); + + if (message_len < 0) { - return WEXITSTATUS(return_code); + return -1; + } + + char *sys = (char*)malloc(message_len + 1); + + va_start(valist, format); + vsnprintf(sys, message_len + 1, format, valist); + va_end(valist); + + char *cmd = (char*)malloc(message_len + 1024); + + if (strcmp(IP[node], "127.0.0.1") == 0) + { + sprintf(cmd, "bash"); } else { - return 256; + sprintf(cmd, + "ssh -i %s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o LogLevel=quiet %s@%s > /dev/null", + sshkey[node], access_user[node], IP[node]); } + int rc = 1; + FILE *in = popen(cmd, "w"); + + if (in) + { + if (sudo) + { + fprintf(in, "sudo su -\n"); + fprintf(in, "cd /home/%s\n", access_user[node]); + } + + fprintf(in, "%s\n", sys); + rc = pclose(in); + } + + free(sys); + free(cmd); + return rc; } int Mariadb_nodes::flush_hosts() @@ -1029,7 +1022,8 @@ int Mariadb_nodes::flush_hosts() local_result++; } - if (mysql_query(nodes[i], "SELECT CONCAT('\\'', user, '\\'@\\'', host, '\\'') FROM mysql.user WHERE user = ''") == 0) + if (mysql_query(nodes[i], + "SELECT CONCAT('\\'', user, '\\'@\\'', host, '\\'') FROM mysql.user WHERE user = ''") == 0) { MYSQL_RES *res = mysql_store_result(nodes[i]); @@ -1049,7 +1043,7 @@ int Mariadb_nodes::flush_hosts() { printf("Detected anonymous users, dropping them.\n"); - for (auto& s: users) + for (auto& s : users) { std::string query = "DROP USER "; query += s; @@ -1089,7 +1083,7 @@ int Mariadb_nodes::get_versions() for (int i = 0; i < N; i++) { - if ((local_result += find_field(nodes[i], (char *) "SELECT @@version", (char *) "@@version", version[i]))) + if ((local_result += find_field(nodes[i], "SELECT @@version", "@@version", version[i]))) { printf("Failed to get version: %s\n", mysql_error(nodes[i])); } @@ -1100,11 +1094,11 @@ int Mariadb_nodes::get_versions() str[0] = 0; } strcpy(version_major[i], version_number[i]); - if (strstr(version_major[i], "5.") == version_major[i]) + if (strstr(version_major[i], "5.") == version_major[i]) { version_major[i][3] = 0; } - if (strstr(version_major[i], "10.") == version_major[i]) + if (strstr(version_major[i], "10.") == version_major[i]) { version_major[i][4] = 0; } @@ -1153,7 +1147,7 @@ int Mariadb_nodes::truncate_mariadb_logs() for (int node = 0; node < N; node++) { char sys[1024]; - if (strcmp(IP[node], "127.0.0.1") !=0) + if (strcmp(IP[node], "127.0.0.1") != 0) { sprintf(sys, "ssh -i %s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o LogLevel=quiet %s@%s 'sudo truncate /var/lib/mysql/*.err --size 0;sudo rm -f /etc/my.cnf.d/binlog_enc*\' &", @@ -1176,13 +1170,13 @@ int Mariadb_nodes::configure_ssl(bool require) printf("Node %d\n", i); stop_node(i); sprintf(str, "%s/ssl-cert", test_dir); - local_result += copy_to_node(str, (char *) "~/", i); + local_result += copy_to_node(str, "~/", i); sprintf(str, "%s/ssl.cnf", test_dir); - local_result += copy_to_node(str, (char *) "~/", i); - local_result += ssh_node(i, (char *) "cp ~/ssl.cnf /etc/my.cnf.d/", true); - local_result += ssh_node(i, (char *) "cp -r ~/ssl-cert /etc/", true); - local_result += ssh_node(i, (char *) "chown mysql:mysql -R /etc/ssl-cert", true); - start_node(i, (char *) ""); + local_result += copy_to_node(str, "~/", i); + local_result += ssh_node(i, true, "cp ~/ssl.cnf /etc/my.cnf.d/"); + local_result += ssh_node(i, true, "cp -r ~/ssl-cert /etc/"); + local_result += ssh_node(i, true, "chown mysql:mysql -R /etc/ssl-cert"); + start_node(i, ""); } if (require) @@ -1190,14 +1184,9 @@ int Mariadb_nodes::configure_ssl(bool require) // Create DB user on first node printf("Set user to require ssl: %s\n", str); sprintf(str, "%s/create_user_ssl.sh", test_dir); - copy_to_node(str, (char *) "~/", 0); - - sprintf(str, "export node_user=\"%s\"; export node_password=\"%s\"; ./create_user_ssl.sh %s", - user_name, - password, - socket_cmd[0]); - printf("cmd: %s\n", str); - ssh_node(0, str, false); + copy_to_node(str, "~/", 0); + ssh_node(0, false, "export node_user=\"%s\"; export node_password=\"%s\"; " + "./create_user_ssl.sh %s", user_name, password, socket_cmd[0]); } return local_result; @@ -1211,14 +1200,14 @@ int Mariadb_nodes::disable_ssl() local_result += connect(); sprintf(str, "DROP USER %s; grant all privileges on *.* to '%s'@'%%' identified by '%s';", user_name, user_name, password); - local_result += execute_query(nodes[0], (char *) ""); + local_result += execute_query(nodes[0], ""); close_connections(); for (int i = 0; i < N; i++) { stop_node(i); - local_result += ssh_node(i, (char *) "rm -f /etc/my.cnf.d/ssl.cnf", true); - start_node(i, (char *) ""); + local_result += ssh_node(i, true, "rm -f /etc/my.cnf.d/ssl.cnf"); + start_node(i, ""); } return local_result; @@ -1240,7 +1229,7 @@ int Mariadb_nodes::copy_to_node(const char* src, const char* dest, int i) else { sprintf(sys, "scp -q -r -i %s -o UserKnownHostsFile=/dev/null " - "-o StrictHostKeyChecking=no -o LogLevel=quiet %s %s@%s:%s", + "-o StrictHostKeyChecking=no -o LogLevel=quiet %s %s@%s:%s", sshkey[i], src, access_user[i], IP[i], dest); } if (verbose) @@ -1251,7 +1240,6 @@ int Mariadb_nodes::copy_to_node(const char* src, const char* dest, int i) return system(sys); } - int Mariadb_nodes::copy_from_node(const char* src, const char* dest, int i) { if (i >= N) @@ -1267,7 +1255,7 @@ int Mariadb_nodes::copy_from_node(const char* src, const char* dest, int i) else { sprintf(sys, "scp -q -r -i %s -o UserKnownHostsFile=/dev/null " - "-o StrictHostKeyChecking=no -o LogLevel=quiet %s@%s:%s %s", + "-o StrictHostKeyChecking=no -o LogLevel=quiet %s@%s:%s %s", sshkey[i], access_user[i], IP[i], src, dest); } if (verbose) diff --git a/maxscale-system-test/mariadb_nodes.h b/maxscale-system-test/mariadb_nodes.h index f7052c0c2..c5d4d1ecf 100644 --- a/maxscale-system-test/mariadb_nodes.h +++ b/maxscale-system-test/mariadb_nodes.h @@ -291,7 +291,7 @@ public: * @param param command line parameters for DB server start command * @return 0 if success */ - int start_node(int node, char * param); + int start_node(int node, const char* param); /** * @brief Check node via ssh and restart it if it is not resposible @@ -354,11 +354,11 @@ public: /** * @brief executes shell command on the node using ssh * @param index number of the node (index) - * @param ssh command to execute * @param sudo if true the command is executed with root privelegues - * @return exit code of the coomand + * @param ssh command to execute + * @return exit code of the command */ - int ssh_node(int node, const char *ssh, bool sudo); + int ssh_node(int node, bool sudo, const char *ssh, ...); /** * @brief Execute 'mysqladmin flush-hosts' on all nodes diff --git a/maxscale-system-test/mxs280_select_outfile.cpp b/maxscale-system-test/mxs280_select_outfile.cpp index b00728e9b..4dc30c499 100644 --- a/maxscale-system-test/mxs280_select_outfile.cpp +++ b/maxscale-system-test/mxs280_select_outfile.cpp @@ -25,7 +25,7 @@ int main(int argc, char *argv[]) for (i = 0; i < Test->repl->N; i++) { Test->set_timeout(30); - Test->repl->ssh_node(i, (char *) "touch /tmp/t1.csv", true); + Test->repl->ssh_node(i, true, "touch /tmp/t1.csv"); } Test->add_result(create_t1(Test->conn_rwsplit), "Error creating t1\n"); @@ -42,7 +42,7 @@ int main(int argc, char *argv[]) for (i = 0; i < Test->repl->N; i++) { Test->set_timeout(30); - Test->repl->ssh_node(i, (char *) "rm -rf /tmp/t1.csv", true); + Test->repl->ssh_node(i, true, "rm -rf /tmp/t1.csv"); } Test->set_timeout(30); diff --git a/maxscale-system-test/mxs951_utfmb4.cpp b/maxscale-system-test/mxs951_utfmb4.cpp index c29fa0d8f..b07904589 100644 --- a/maxscale-system-test/mxs951_utfmb4.cpp +++ b/maxscale-system-test/mxs951_utfmb4.cpp @@ -28,8 +28,8 @@ int main(int argc, char *argv[]) sprintf(cmd, "%s/utf64.cnf", test_dir); for (int i = 0; i < Test->repl->N; i++) { - Test->repl->copy_to_node(cmd, (char *) "./", i); - Test->repl->ssh_node(i, (char *) "cp ./utf64.cnf /etc/my.cnf.d/", true); + Test->repl->copy_to_node(cmd, "./", i); + Test->repl->ssh_node(i, true, "cp ./utf64.cnf /etc/my.cnf.d/"); } Test->repl->start_replication(); @@ -55,7 +55,7 @@ int main(int argc, char *argv[]) Test->tprintf("Restore backend configuration\n"); for (int i = 0; i < Test->repl->N; i++) { - Test->repl->ssh_node(i, (char *) "rm /etc/my.cnf.d/utf64.cnf", true); + Test->repl->ssh_node(i, true, "rm /etc/my.cnf.d/utf64.cnf"); } Test->repl->start_replication(); diff --git a/maxscale-system-test/testconnections.cpp b/maxscale-system-test/testconnections.cpp index 9fc9042b3..b9adb73df 100644 --- a/maxscale-system-test/testconnections.cpp +++ b/maxscale-system-test/testconnections.cpp @@ -213,7 +213,7 @@ TestConnections::TestConnections(int argc, char *argv[]): if (use_snapshots) { - snapshot_reverted = revert_snapshot((char *) "clean"); + snapshot_reverted = revert_snapshot("clean"); } if (!snapshot_reverted && maxscale::check_nodes) @@ -602,7 +602,7 @@ void TestConnections::process_template(const char *template_name, const char *de } mdn[j]->connect(); - execute_query(mdn[j]->nodes[0], (char *) "CREATE DATABASE IF NOT EXISTS test"); + execute_query(mdn[j]->nodes[0], "CREATE DATABASE IF NOT EXISTS test"); mdn[j]->close_connections(); } @@ -616,7 +616,7 @@ void TestConnections::process_template(const char *template_name, const char *de { system("sed -i \"s/###repl51###/mysql51_replication=true/g\" maxscale.cnf"); } - copy_to_maxscale((char *) "maxscale.cnf", (char *) dest); + copy_to_maxscale("maxscale.cnf", (char *) dest); } int TestConnections::init_maxscale() @@ -711,7 +711,7 @@ int TestConnections::stop_maxscale() return res; } -int TestConnections::copy_mariadb_logs(Mariadb_nodes * repl, char * prefix) +int TestConnections::copy_mariadb_logs(Mariadb_nodes * repl, const char* prefix) { int local_result = 0; char * mariadb_log; @@ -726,7 +726,7 @@ int TestConnections::copy_mariadb_logs(Mariadb_nodes * repl, char * prefix) { if (strcmp(repl->IP[i], "127.0.0.1") != 0) // Do not copy MariaDB logs in case of local backend { - mariadb_log = repl->ssh_node_output(i, (char *) "cat /var/lib/mysql/*.err", true, &exit_code); + mariadb_log = repl->ssh_node_output(i, "cat /var/lib/mysql/*.err", true, &exit_code); sprintf(str, "LOGS/%s/%s%d_mariadb_log", test_name, prefix, i); f = fopen(str, "w"); if (f != NULL) @@ -752,8 +752,8 @@ int TestConnections::copy_all_logs() if (!no_backend_log_copy) { - copy_mariadb_logs(repl, (char *) "node"); - copy_mariadb_logs(galera, (char *) "galera"); + copy_mariadb_logs(repl, "node"); + copy_mariadb_logs(galera, "galera"); } sprintf(str, "%s/copy_logs.sh %s", test_dir, test_name); @@ -857,9 +857,9 @@ int TestConnections::start_binlog() repl->stop_nodes(); binlog = open_conn_no_db(binlog_port, maxscale_IP, repl->user_name, repl->password, ssl); - execute_query(binlog, (char *) "stop slave"); - execute_query(binlog, (char *) "reset slave all"); - execute_query(binlog, (char *) "reset master"); + execute_query(binlog, "stop slave"); + execute_query(binlog, "reset slave all"); + execute_query(binlog, "reset master"); mysql_close(binlog); tprintf("Stopping maxscale\n"); @@ -889,13 +889,13 @@ int TestConnections::start_binlog() add_result(ssh_maxscale(true, "ls -la %s/", maxscale_binlog_dir), "ls failed\n"); tprintf("show master status\n"); - find_field(repl->nodes[0], (char *) "show master status", (char *) "File", &log_file[0]); - find_field(repl->nodes[0], (char *) "show master status", (char *) "Position", &log_pos[0]); + find_field(repl->nodes[0], "show master status", "File", &log_file[0]); + find_field(repl->nodes[0], "show master status", "Position", &log_pos[0]); tprintf("Real master file: %s\n", log_file); tprintf("Real master pos : %s\n", log_pos); tprintf("Stopping first slave (node 1)\n"); - try_query(repl->nodes[1], (char *) "stop slave;"); + try_query(repl->nodes[1], "stop slave;"); //repl->no_set_pos = true; repl->no_set_pos = false; tprintf("Configure first backend slave node to be slave of real master\n"); @@ -931,8 +931,8 @@ int TestConnections::start_binlog() // get Master status from Maxscale binlog tprintf("show master status\n"); fflush(stdout); - find_field(binlog, (char *) "show master status", (char *) "File", &log_file[0]); - find_field(binlog, (char *) "show master status", (char *) "Position", &log_pos[0]); + find_field(binlog, "show master status", "File", &log_file[0]); + find_field(binlog, "show master status", "Position", &log_pos[0]); tprintf("Maxscale binlog master file: %s\n", log_file); fflush(stdout); @@ -943,7 +943,7 @@ int TestConnections::start_binlog() fflush(stdout); for (i = 2; i < repl->N; i++) { - try_query(repl->nodes[i], (char *) "stop slave;"); + try_query(repl->nodes[i], "stop slave;"); repl->set_slave(repl->nodes[i], maxscale_IP, binlog_port, log_file, log_pos); } repl->close_connections(); @@ -1012,23 +1012,23 @@ int TestConnections::start_mm() for (i = 0; i < 2; i++) { tprintf("Starting back node %d\n", i); - global_result += repl->start_node(i, (char *) ""); + global_result += repl->start_node(i, ""); } repl->connect(); for (i = 0; i < 2; i++) { - execute_query(repl->nodes[i], (char *) "stop slave"); - execute_query(repl->nodes[i], (char *) "reset master"); + execute_query(repl->nodes[i], "stop slave"); + execute_query(repl->nodes[i], "reset master"); } - execute_query(repl->nodes[0], (char *) "SET GLOBAL READ_ONLY=ON"); + execute_query(repl->nodes[0], "SET GLOBAL READ_ONLY=ON"); - find_field(repl->nodes[0], (char *) "show master status", (char *) "File", log_file1); - find_field(repl->nodes[0], (char *) "show master status", (char *) "Position", log_pos1); + find_field(repl->nodes[0], "show master status", "File", log_file1); + find_field(repl->nodes[0], "show master status", "Position", log_pos1); - find_field(repl->nodes[1], (char *) "show master status", (char *) "File", log_file2); - find_field(repl->nodes[1], (char *) "show master status", (char *) "Position", log_pos2); + find_field(repl->nodes[1], "show master status", "File", log_file2); + find_field(repl->nodes[1], "show master status", "Position", log_pos2); repl->set_slave(repl->nodes[0], repl->IP[1], repl->port[1], log_file2, log_pos2); repl->set_slave(repl->nodes[1], repl->IP[0], repl->port[0], log_file1, log_pos1); @@ -1056,11 +1056,11 @@ void TestConnections::check_log_err(const char * err_msg, bool expected) set_timeout(50); tprintf("Reading maxscale.log\n"); - if ( ( read_log((char *) "maxscale.log", &err_log_content) != 0) || (strlen(err_log_content) < 2) ) + if ( ( read_log("maxscale.log", &err_log_content) != 0) || (strlen(err_log_content) < 2) ) { tprintf("Reading maxscale1.log\n"); free(err_log_content); - if (read_log((char *) "maxscale1.log", &err_log_content) != 0) + if (read_log("maxscale1.log", &err_log_content) != 0) { add_result(1, "Error reading log\n"); } @@ -1103,7 +1103,7 @@ int TestConnections::find_connected_slave(int * global_result) repl->connect(); for (int i = 0; i < repl->N; i++) { - conn_num = get_conn_num(repl->nodes[i], maxscale_ip(), maxscale_hostname, (char *) "test"); + conn_num = get_conn_num(repl->nodes[i], maxscale_ip(), maxscale_hostname, "test"); tprintf("connections to %d: %u\n", i, conn_num); if ((i == 0) && (conn_num != 1)) { @@ -1134,7 +1134,7 @@ int TestConnections::find_connected_slave1() repl->connect(); for (int i = 0; i < repl->N; i++) { - conn_num = get_conn_num(repl->nodes[i], maxscale_ip(), maxscale_hostname, (char *) "test"); + conn_num = get_conn_num(repl->nodes[i], maxscale_ip(), maxscale_hostname, "test"); tprintf("connections to %d: %u\n", i, conn_num); all_conn += conn_num; if ((i != 0) && (conn_num != 0)) @@ -1183,13 +1183,13 @@ int TestConnections::check_maxscale_alive() tprintf("Trying simple query against all sevices\n"); tprintf("RWSplit \n"); set_timeout(10); - try_query(conn_rwsplit, (char *) "show databases;"); + try_query(conn_rwsplit, "show databases;"); tprintf("ReadConn Master \n"); set_timeout(10); - try_query(conn_master, (char *) "show databases;"); + try_query(conn_master, "show databases;"); tprintf("ReadConn Slave \n"); set_timeout(10); - try_query(conn_slave, (char *) "show databases;"); + try_query(conn_slave, "show databases;"); set_timeout(10); close_maxscale_connections() ; add_result(global_result - gr, "Maxscale is not alive\n"); @@ -1377,7 +1377,7 @@ int TestConnections::copy_to_maxscale(const char* src, const char* dest) { sprintf(sys, "scp -q -i %s -o UserKnownHostsFile=/dev/null " - "-o StrictHostKeyChecking=no -o LogLevel=quiet %s %s@%s:%s", + "-o StrictHostKeyChecking=no -o LogLevel=quiet %s %s@%s:%s", maxscale_keyfile, src, maxscale_access_user, maxscale_IP, dest); } return system(sys); @@ -1395,7 +1395,7 @@ int TestConnections::copy_from_maxscale(char* src, char* dest) else { sprintf(sys, "scp -i %s -o UserKnownHostsFile=/dev/null " - "-o StrictHostKeyChecking=no -o LogLevel=quiet %s@%s:%s %s", + "-o StrictHostKeyChecking=no -o LogLevel=quiet %s@%s:%s %s", maxscale_keyfile, maxscale_access_user, maxscale_IP, src, dest); } return system(sys); @@ -1562,12 +1562,12 @@ int TestConnections::get_client_ip(char * ip) unsigned int conn_num = 0; connect_rwsplit(); - if (execute_query(conn_rwsplit, (char *) "CREATE DATABASE IF NOT EXISTS db_to_check_clent_ip") != 0 ) + if (execute_query(conn_rwsplit, "CREATE DATABASE IF NOT EXISTS db_to_check_clent_ip") != 0 ) { return ret; } close_rwsplit(); - conn = open_conn_db(rwsplit_port, maxscale_IP, (char *) "db_to_check_clent_ip", maxscale_user, + conn = open_conn_db(rwsplit_port, maxscale_IP, "db_to_check_clent_ip", maxscale_user, maxscale_password, ssl); if (conn != NULL) @@ -1830,7 +1830,8 @@ int TestConnections::try_query(MYSQL *conn, const char *format, ...) va_end(valist); int res = execute_query1(conn, sql, false); - add_result(res, "Query '%.*s%s' failed!\n", message_len < 100 ? message_len : 100, sql, message_len < 100 ? "" : "..."); + add_result(res, "Query '%.*s%s' failed!\n", message_len < 100 ? message_len : 100, sql, + message_len < 100 ? "" : "..."); return res; } @@ -1851,7 +1852,7 @@ int TestConnections::find_master_maxadmin(Mariadb_nodes * nodes) char show_server[256]; char res[256]; sprintf(show_server, "show server server%d", i + 1); - get_maxadmin_param(show_server, (char *) "Status", res); + get_maxadmin_param(show_server, "Status", res); if (strstr(res, "Master")) { @@ -1879,7 +1880,7 @@ int TestConnections::find_slave_maxadmin(Mariadb_nodes * nodes) char show_server[256]; char res[256]; sprintf(show_server, "show server server%d", i + 1); - get_maxadmin_param(show_server, (char *) "Status", res); + get_maxadmin_param(show_server, "Status", res); if (strstr(res, "Slave")) { @@ -1934,14 +1935,9 @@ int TestConnections::check_maxadmin_param(const char *command, const char *param return rval; } -int TestConnections::get_maxadmin_param(char *command, char *param, char *result) +int TestConnections::get_maxadmin_param(const char* command, const char* param, char* result) { - char * buf; - - buf = ssh_maxscale_output(true, "maxadmin %s", command); - - //printf("%s\n", buf); - + char* buf = ssh_maxscale_output(true, "maxadmin %s", command); char *x = strstr(buf, param); if (x == NULL) @@ -1981,7 +1977,7 @@ int TestConnections::list_dirs() for (int i = 0; i < repl->N; i++) { tprintf("ls on node %d\n", i); - repl->ssh_node(i, (char *) "ls -la /var/lib/mysql", true); + repl->ssh_node(i, true, "ls -la /var/lib/mysql"); fflush(stdout); } tprintf("ls maxscale \n"); @@ -2034,7 +2030,7 @@ int TestConnections::take_snapshot(char * snapshot_name) return system(str); } -int TestConnections::revert_snapshot(char * snapshot_name) +int TestConnections::revert_snapshot(const char* snapshot_name) { char str[4096]; sprintf(str, "%s %s", revert_snapshot_command, snapshot_name); diff --git a/maxscale-system-test/testconnections.h b/maxscale-system-test/testconnections.h index a79c01a93..a975a93ae 100644 --- a/maxscale-system-test/testconnections.h +++ b/maxscale-system-test/testconnections.h @@ -220,7 +220,7 @@ public: * @param prefix file name prefix * @return 0 if success */ - int copy_mariadb_logs(Mariadb_nodes *repl, char * prefix); + int copy_mariadb_logs(Mariadb_nodes *repl, const char* prefix); /** * @brief no_backend_log_copy if true logs from backends are not copied (needed if case of Aurora RDS backend or similar) @@ -647,7 +647,7 @@ public: int execute_maxadmin_command(char * cmd); int execute_maxadmin_command_print(char * cmd); int check_maxadmin_param(const char *command, const char *param, const char *value); - int get_maxadmin_param(char *command, char *param, char *result); + int get_maxadmin_param(const char* command, const char* param, char* result); void check_current_operations(int value); void check_current_connections(int value); @@ -682,7 +682,7 @@ public: * @param snapshot_name name of snapshot to revert * @return 0 in case of success or mdbci error code in case of error */ - int revert_snapshot(char * snapshot_name); + int revert_snapshot(const char* snapshot_name); /** * @brief Test a bad configuration From d3acb9573f7cbb9a7e2045017d468b69c635fe1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Mon, 2 Oct 2017 00:49:17 +0300 Subject: [PATCH 09/18] Do minor improvements to monitor tests The mm test was on the heavy side and the workload can be reduced for the sake of testing only the functionality. The mm_mysqlmon test did not check the initial state of the server. Although not necessary, adding it will help detect test failures caused by broken replication. mxs1457_ignore_deleted did not stop the monitors before breaking the replication and attempting to use MaxScale. As the test expects authentication to work regardless of the actual state of the servers, the monitors need to be disabled before a query is attempted. --- maxscale-system-test/mm.cpp | 20 +++++++++---------- maxscale-system-test/mm_mysqlmon.cpp | 6 ++++++ .../mxs1457_ignore_deleted.cpp | 5 +++++ 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/maxscale-system-test/mm.cpp b/maxscale-system-test/mm.cpp index 0b61a64f6..225d77e52 100644 --- a/maxscale-system-test/mm.cpp +++ b/maxscale-system-test/mm.cpp @@ -34,12 +34,12 @@ int check_conf(TestConnections* Test, int blocked_node) Test->repl->connect(); Test->connect_rwsplit(); create_t1(Test->conn_rwsplit); - global_result += insert_into_t1(Test->conn_rwsplit, 4); + global_result += insert_into_t1(Test->conn_rwsplit, 3); printf("Sleeping to let replication happen\n"); fflush(stdout); Test->stop_timeout(); - sleep(30); + sleep(15); for (int i = 0; i < 2; i++) { @@ -47,13 +47,13 @@ int check_conf(TestConnections* Test, int blocked_node) { Test->tprintf("Checking data from node %d (%s)\n", i, Test->repl->IP[i]); Test->set_timeout(100); - global_result += select_from_t1(Test->repl->nodes[i], 4); + global_result += select_from_t1(Test->repl->nodes[i], 3); } } Test->set_timeout(100); printf("Checking data from rwsplit\n"); fflush(stdout); - global_result += select_from_t1(Test->conn_rwsplit, 4); + global_result += select_from_t1(Test->conn_rwsplit, 3); global_result += execute_query(Test->conn_rwsplit, "DROP TABLE t1"); Test->repl->close_connections(); @@ -95,7 +95,7 @@ int main(int argc, char *argv[]) Test->tprintf("Block slave\n"); Test->repl->block_node(0); Test->stop_timeout(); - sleep(15); + sleep(10); Test->set_timeout(120); Test->get_maxadmin_param((char *) "show server server1", (char *) "Status:", maxadmin_result); printf("node0 %s\n", maxadmin_result); @@ -110,12 +110,12 @@ int main(int argc, char *argv[]) Test->set_timeout(120); Test->tprintf("Unlock slave\n"); Test->repl->unblock_node(0); - sleep(15); + sleep(10); Test->set_timeout(120); Test->tprintf("Block master\n"); Test->repl->block_node(1); - sleep(15); + sleep(10); Test->get_maxadmin_param((char *) "show server server2", (char *) "Status:", maxadmin_result); printf("node1 %s\n", maxadmin_result); if (strstr(maxadmin_result, "Down") == NULL ) @@ -129,21 +129,21 @@ int main(int argc, char *argv[]) execute_query(Test->repl->nodes[0], (char *) "SET GLOBAL READ_ONLY=OFF"); Test->repl->close_connections(); - sleep(15); + sleep(10); Test->set_timeout(120); Test->tprintf("Put some data and check\n"); Test->add_result(check_conf(Test, 1), "configuration broken\n"); printf("Unlock slave\n"); Test->repl->unblock_node(1); - sleep(15); + sleep(10); Test->set_timeout(120); printf("Make node 2 slave\n"); Test->repl->connect(); execute_query(Test->repl->nodes[1], (char *) "SET GLOBAL READ_ONLY=ON"); Test->repl->close_connections(); - sleep(15); + sleep(10); Test->set_timeout(120); printf("Put some data and check\n"); diff --git a/maxscale-system-test/mm_mysqlmon.cpp b/maxscale-system-test/mm_mysqlmon.cpp index 0c2677c31..d2dffeebe 100644 --- a/maxscale-system-test/mm_mysqlmon.cpp +++ b/maxscale-system-test/mm_mysqlmon.cpp @@ -92,6 +92,12 @@ int main(int argc, char *argv[]) { TestConnections * Test = new TestConnections(argc, argv); + Test->tprintf("Checking initial state of the servers"); + check_status(Test, "server1", "Master, Running"); + check_status(Test, "server2", "Slave, Running"); + check_status(Test, "server3", "Slave, Running"); + check_status(Test, "server4", "Slave, Running"); + Test->tprintf("Test 1 - Configure all servers into a multi-master ring with one slave"); Test->set_timeout(120); diff --git a/maxscale-system-test/mxs1457_ignore_deleted.cpp b/maxscale-system-test/mxs1457_ignore_deleted.cpp index 753ea5d07..a0ee712eb 100644 --- a/maxscale-system-test/mxs1457_ignore_deleted.cpp +++ b/maxscale-system-test/mxs1457_ignore_deleted.cpp @@ -17,6 +17,11 @@ int main(int argc, char *argv[]) test.repl->sync_slaves(); test.repl->close_connections(); + /** + * The monitor needs to be stopped before the slaves are stopped to prevent + * it from detecting the broken replication. + */ + test.ssh_maxscale(true, "maxadmin shutdown monitor \"MySQL Monitor\""); // Stop slaves and drop the user on the master test.repl->stop_slaves(); test.repl->connect(); From 1fd50222f839b46b391cc50eb347362684a477f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Mon, 2 Oct 2017 10:01:54 +0300 Subject: [PATCH 10/18] Remove .secrets on test startup The `.secrets` file does not match shell globbing by default so it should be explicitly removed. --- maxscale-system-test/testconnections.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/maxscale-system-test/testconnections.cpp b/maxscale-system-test/testconnections.cpp index b9adb73df..9c75a67d6 100644 --- a/maxscale-system-test/testconnections.cpp +++ b/maxscale-system-test/testconnections.cpp @@ -643,6 +643,7 @@ int TestConnections::init_maxscale() "%s" "iptables -I INPUT -p tcp --dport 4001 -j ACCEPT;" "rm -f %s/maxscale.log %s/maxscale1.log;" + "rm -f /var/log/maxscale/.secrets;" // Needs to be explicitly deleted "rm -rf /tmp/core* /dev/shm/* /var/lib/maxscale/maxscale.cnf.d/ /var/lib/maxscale/*;" "%s", maxscale_access_homedir, maxscale_access_homedir, maxscale_access_homedir, From 6f84f960ff80e4122ba59bb3bed33a1b30edd32e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Mon, 2 Oct 2017 10:32:46 +0300 Subject: [PATCH 11/18] Add fatal signal hanlder to TestConnections The tests now print a stack trace to standard output if it receives a fatal signal. This will help debug test failures caused by broken tests and unexpected results. --- maxscale-system-test/testconnections.cpp | 32 ++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/maxscale-system-test/testconnections.cpp b/maxscale-system-test/testconnections.cpp index 9c75a67d6..73a997e77 100644 --- a/maxscale-system-test/testconnections.cpp +++ b/maxscale-system-test/testconnections.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include "mariadb_func.h" #include "maxadmin_operations.h" @@ -18,6 +20,28 @@ static std::string required_repl_version; static std::string required_galera_version; } +static int signal_set(int sig, void (*handler)(int)) +{ + struct sigaction sigact = {}; + sigact.sa_handler = handler; + + do + { + errno = 0; + sigaction(sig, &sigact, NULL); + } + while (errno == EINTR); +} + +void sigfatal_handler(int i) +{ + void *addrs[128]; + int count = backtrace(addrs, 128); + backtrace_symbols_fd(addrs, count, STDERR_FILENO); + signal_set(i, SIG_DFL); + raise(i); +} + void TestConnections::check_nodes(bool value) { maxscale::check_nodes = value; @@ -44,6 +68,14 @@ TestConnections::TestConnections(int argc, char *argv[]): global_result(0), binlog_cmd_option(0), enable_timeouts(true), use_ipv6(false), no_galera(false) { + signal_set(SIGSEGV, sigfatal_handler); + signal_set(SIGABRT, sigfatal_handler); + signal_set(SIGFPE, sigfatal_handler); + signal_set(SIGILL, sigfatal_handler); +#ifdef SIGBUS + signal_set(SIGBUS, sigfatal_handler); +#endif + chdir(test_dir); gettimeofday(&start_time, NULL); ports[0] = rwsplit_port; From 1772cc9021ec5759458d9a5320492a0f9ae3b778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Mon, 2 Oct 2017 10:47:27 +0300 Subject: [PATCH 12/18] Move blr_set_checksum into blr.h The function is used in two different files so it needs to be in the header. --- server/modules/routing/binlogrouter/blr.c | 1 - server/modules/routing/binlogrouter/blr.h | 1 + server/modules/routing/binlogrouter/blr_master.c | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/server/modules/routing/binlogrouter/blr.c b/server/modules/routing/binlogrouter/blr.c index fce007d14..fb17426c7 100644 --- a/server/modules/routing/binlogrouter/blr.c +++ b/server/modules/routing/binlogrouter/blr.c @@ -125,7 +125,6 @@ static bool rses_begin_locked_router_action(ROUTER_SLAVE *); static void rses_end_locked_router_action(ROUTER_SLAVE *); GWBUF *blr_cache_read_response(ROUTER_INSTANCE *router, char *response); /* Set checksum value in router instance from a master reply or saved buffer */ -extern void blr_set_checksum(ROUTER_INSTANCE *instance, GWBUF *buf); static SPINLOCK instlock; static ROUTER_INSTANCE *instances; diff --git a/server/modules/routing/binlogrouter/blr.h b/server/modules/routing/binlogrouter/blr.h index 67c986430..029fcb788 100644 --- a/server/modules/routing/binlogrouter/blr.h +++ b/server/modules/routing/binlogrouter/blr.h @@ -788,6 +788,7 @@ extern const char *blr_get_encryption_algorithm(int); extern int blr_check_encryption_algorithm(char *); extern const char *blr_encryption_algorithm_list(void); extern bool blr_get_encryption_key(ROUTER_INSTANCE *); +extern void blr_set_checksum(ROUTER_INSTANCE *instance, GWBUF *buf); MXS_END_DECLS diff --git a/server/modules/routing/binlogrouter/blr_master.c b/server/modules/routing/binlogrouter/blr_master.c index fb8e367e0..ae0d1c352 100644 --- a/server/modules/routing/binlogrouter/blr_master.c +++ b/server/modules/routing/binlogrouter/blr_master.c @@ -108,7 +108,6 @@ static int blr_get_master_semisync(GWBUF *buf); static void blr_terminate_master_replication(ROUTER_INSTANCE *router, uint8_t* ptr, int len); void blr_notify_all_slaves(ROUTER_INSTANCE *router); extern bool blr_notify_waiting_slave(ROUTER_SLAVE *slave); -void blr_set_checksum(ROUTER_INSTANCE *inst, GWBUF *buf); static int keepalive = 1; From 6a33e55b6f6eadb667ab818fe73cf1db4ea8c26e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Mon, 2 Oct 2017 10:55:00 +0300 Subject: [PATCH 13/18] Backport no_vm_revert option The VM revert on test failure was added to 2.1 but it was only disabled in 2.2. --- maxscale-system-test/README.md | 1 + maxscale-system-test/testconnections.cpp | 20 +++++++++++++++++--- maxscale-system-test/testconnections.h | 6 ++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/maxscale-system-test/README.md b/maxscale-system-test/README.md index 1e332b408..6f3314a4b 100644 --- a/maxscale-system-test/README.md +++ b/maxscale-system-test/README.md @@ -65,3 +65,4 @@ System level tests for MaxScale |no_nodes_check|if yes backend checks are not executed (needed in case of RDS or similar backend)| |no_backend_log_copy|if yes logs from backend nodes are not copied (needed in case of RDS or similar backend)| |no_maxscale_start|Do not start Maxscale automatically| +|no_vm_revert|If true tests do not revert VMs after the test even if test failed| diff --git a/maxscale-system-test/testconnections.cpp b/maxscale-system-test/testconnections.cpp index 73a997e77..8aee42d81 100644 --- a/maxscale-system-test/testconnections.cpp +++ b/maxscale-system-test/testconnections.cpp @@ -66,7 +66,8 @@ TestConnections::TestConnections(int argc, char *argv[]): no_backend_log_copy(false), use_snapshots(false), verbose(false), rwsplit_port(4006), readconn_master_port(4008), readconn_slave_port(4009), binlog_port(5306), global_result(0), binlog_cmd_option(0), enable_timeouts(true), use_ipv6(false), - no_galera(false) + no_galera(false), + no_vm_revert(true) { signal_set(SIGSEGV, sigfatal_handler); signal_set(SIGABRT, sigfatal_handler); @@ -303,8 +304,15 @@ TestConnections::~TestConnections() if (global_result != 0 ) { - tprintf("Reverting snapshot\n"); - revert_snapshot((char*) "clean"); + if (no_vm_revert) + { + tprintf("no_vm_revert flag is set, not reverting VMs\n"); + } + else + { + tprintf("Reverting snapshot\n"); + revert_snapshot((char*) "clean"); + } } delete repl; @@ -551,6 +559,12 @@ int TestConnections::read_env() { maxscale::start = false; } + + env = getenv("no_vm_revert"); + if ((env != NULL) && ((strcasecmp(env, "no") == 0) || (strcasecmp(env, "false") == 0) )) + { + no_vm_revert = false; + } } int TestConnections::print_env() diff --git a/maxscale-system-test/testconnections.h b/maxscale-system-test/testconnections.h index a975a93ae..6d41ec5d0 100644 --- a/maxscale-system-test/testconnections.h +++ b/maxscale-system-test/testconnections.h @@ -257,6 +257,12 @@ public: */ bool no_galera; + /** + * @brief no_vm_revert If true tests do not revert VMs after the test even if test failed + * (use it for debugging) + */ + bool no_vm_revert; + /** * @brief ssl_options string with ssl configuration for command line client */ From 4f675997fb4fe7b1575e07f47339f673dac06923 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Mon, 2 Oct 2017 11:22:13 +0300 Subject: [PATCH 14/18] Use correct binlog position in mm_mysqlmon Position 310 is not guaranteed to always exist on the master if the binlog is just created. Position 4 will always be a valid position. --- maxscale-system-test/mm_mysqlmon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxscale-system-test/mm_mysqlmon.cpp b/maxscale-system-test/mm_mysqlmon.cpp index d2dffeebe..f987b2692 100644 --- a/maxscale-system-test/mm_mysqlmon.cpp +++ b/maxscale-system-test/mm_mysqlmon.cpp @@ -84,7 +84,7 @@ void check_group(TestConnections *Test, const char *server, const char *group) void change_master(TestConnections *Test, int slave, int master) { execute_query(Test->repl->nodes[slave], "CHANGE MASTER TO master_host='%s', master_port=3306, " - "master_log_file='mar-bin.000001', master_log_pos=310, master_user='repl', master_password='repl';START SLAVE", + "master_log_file='mar-bin.000001', master_log_pos=4, master_user='repl', master_password='repl';START SLAVE", Test->repl->IP[master], Test->repl->user_name, Test->repl->password); } From 0cd62c6f2948ca84572ebb24e0dce582384ce1dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Mon, 2 Oct 2017 11:46:38 +0300 Subject: [PATCH 15/18] Make bug509 more Galera friendly The test now skips execution of inserts to separate nodes. This should guarantee that the nodes are synced at all times. The test now does the inserts and selects inside a transaction to force the reads to the same node where the inserts are done. --- maxscale-system-test/bug509.cpp | 102 +++++++------------------------- 1 file changed, 22 insertions(+), 80 deletions(-) diff --git a/maxscale-system-test/bug509.cpp b/maxscale-system-test/bug509.cpp index 56bd204a7..f7d501748 100644 --- a/maxscale-system-test/bug509.cpp +++ b/maxscale-system-test/bug509.cpp @@ -1,39 +1,14 @@ /** - * @file bug509.cpp regression case for bug 509 and 507 ( "Referring to a nonexisting server in servers=... doesn't even raise a warning" - * and "rw-split router does not send last_insert_id() to master" ) + * @file bug509.cpp regression case for bug 509 "rw-split router does not send last_insert_id() to master" * * - "CREATE TABLE t2 (id INT(10) NOT NULL AUTO_INCREMENT, x int, PRIMARY KEY (id));", * - do a number of INSERTs first using RWsplit, then directly Galera nodes. * - do "select @@wsrep_node_address, last_insert_id();" and "select last_insert_id(), @@wsrep_node_address;" and compares results. - * - do "insert into t2 (x) values (i);" 1000 times and compares results of + * - do "insert into t2 (x) values (i);" 50 times and compares results of * "select @@wsrep_node_address, last_insert_id();" and "select last_insert_id(), @@wsrep_node_address;" * - * Test fails if results are different (after 5 seconds of waiting after last INSERT) */ -/* -Kolbe Kegel 2014-09-01 14:48:12 UTC -For some reason, the order of terms in the field list of a SELECT statement influences how the rw-split router decides where to send a statement. - -mariadb> select @@wsrep_node_address, last_insert_id(); -+----------------------+------------------+ -| @@wsrep_node_address | last_insert_id() | -+----------------------+------------------+ -| 192.168.30.31 | 7 | -+----------------------+------------------+ -1 row in set (0.00 sec) - -mariadb> select last_insert_id(), @@wsrep_node_address; -+------------------+----------------------+ -| last_insert_id() | @@wsrep_node_address | -+------------------+----------------------+ -| 0 | 192.168.30.33 | -+------------------+----------------------+ -1 row in set (0.00 sec) -Comment 1 Vilho Raatikka 2014-09-03 20:44:17 UTC -Added code to detect last_insert_id() function and now both types of elements are routed to master and their order of appearance doesn't matter. -*/ - #include #include "testconnections.h" @@ -49,59 +24,22 @@ int main(int argc, char *argv[]) Test->galera->connect(); Test->connect_maxscale(); - if (Test->galera->N < 3) - { - Test->tprintf("There is not enoght nodes for test\n"); - delete Test; - exit(1); - } + Test->tprintf("Creating table"); + Test->try_query(Test->conn_rwsplit, "DROP TABLE IF EXISTS t2;"); + Test->try_query(Test->conn_rwsplit, "CREATE TABLE t2 (id INT(10) NOT NULL AUTO_INCREMENT, x int, PRIMARY KEY (id));"); + Test->tprintf("Doing INSERT through readwritesplit"); + Test->try_query(Test->conn_rwsplit, "START TRANSACTION"); + Test->try_query(Test->conn_rwsplit, "insert into t2 (x) values (1);"); - Test->tprintf("Creating table\n"); - Test->try_query(Test->conn_rwsplit, (char *) "DROP TABLE IF EXISTS t2;"); - Test->try_query(Test->conn_rwsplit, - (char *) "CREATE TABLE t2 (id INT(10) NOT NULL AUTO_INCREMENT, x int, PRIMARY KEY (id));"); - Test->tprintf("Doing INSERTs\n"); - Test->try_query(Test->conn_rwsplit, (char *) "insert into t2 (x) values (1);"); + char last_insert_id1[1024] = ""; + char last_insert_id2[1024] = ""; + find_field(Test->conn_rwsplit, sel1, "last_insert_id()", last_insert_id1); + find_field(Test->conn_rwsplit, sel2, "last_insert_id()", last_insert_id2); + Test->try_query(Test->conn_rwsplit, "COMMIT"); - Test->try_query(Test->galera->nodes[0], (char *) "insert into t2 (x) values (2);"); - Test->try_query(Test->galera->nodes[0], (char *) "insert into t2 (x) values (3);"); - - Test->try_query(Test->galera->nodes[1], (char *) "insert into t2 (x) values (4);"); - Test->try_query(Test->galera->nodes[1], (char *) "insert into t2 (x) values (5);"); - Test->try_query(Test->galera->nodes[1], (char *) "insert into t2 (x) values (6);"); - - Test->try_query(Test->galera->nodes[2], (char *) "insert into t2 (x) values (7);"); - Test->try_query(Test->galera->nodes[2], (char *) "insert into t2 (x) values (8);"); - Test->try_query(Test->galera->nodes[2], (char *) "insert into t2 (x) values (9);"); - Test->try_query(Test->galera->nodes[2], (char *) "insert into t2 (x) values (10);"); - - Test->stop_timeout(); - Test->repl->sync_slaves(); - - Test->tprintf("Trying \n"); - char last_insert_id1[1024]; - char last_insert_id2[1024]; - if ( ( - find_field( - Test->conn_rwsplit, sel1, - "last_insert_id()", &last_insert_id1[0]) - != 0 ) || ( - find_field( - Test->conn_rwsplit, sel2, - "last_insert_id()", &last_insert_id2[0]) - != 0 )) - { - Test->tprintf("last_insert_id() fied not found!!\n"); - delete Test; - exit(1); - } - else - { - Test->tprintf("'%s' gave last_insert_id() %s\n", sel1, last_insert_id1); - Test->tprintf("'%s' gave last_insert_id() %s\n", sel2, last_insert_id2); - Test->add_result(strcmp(last_insert_id1, last_insert_id2), - "last_insert_id() are different depending in which order terms are in SELECT\n"); - } + Test->tprintf("'%s' gave last_insert_id() %s", sel1, last_insert_id1); + Test->tprintf("'%s' gave last_insert_id() %s", sel2, last_insert_id2); + Test->add_result(strcmp(last_insert_id1, last_insert_id2), "last_insert_id() are different depending in which order terms are in SELECT"); char id_str[1024]; char str1[1024]; @@ -110,12 +48,13 @@ int main(int argc, char *argv[]) for (int i = 100; i < iterations; i++) { Test->set_timeout(50); + Test->try_query(Test->conn_rwsplit, "START TRANSACTION"); Test->add_result(execute_query(Test->conn_rwsplit, "insert into t2 (x) values (%d);", i), "Query failed"); sprintf(str1, "select * from t2 where x=%d;", i); - find_field(Test->conn_rwsplit, sel1, "last_insert_id()", &last_insert_id1[0]); - find_field(Test->conn_rwsplit, str1, "id", &id_str[0]); + find_field(Test->conn_rwsplit, sel1, "last_insert_id()", last_insert_id1); + find_field(Test->conn_rwsplit, str1, "id", id_str); int n = 0; @@ -127,6 +66,8 @@ int main(int argc, char *argv[]) n++; } + Test->try_query(Test->conn_rwsplit, "COMMIT"); + Test->add_result(strcmp(last_insert_id1, id_str), "last_insert_id is not equal to id even after waiting 5 seconds"); @@ -136,6 +77,7 @@ int main(int argc, char *argv[]) } } + Test->try_query(Test->conn_rwsplit, "DROP TABLE t2;"); Test->check_maxscale_alive(); int rval = Test->global_result; From f1f8a4b5b261697d046f195105b46e39e4720e23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Tue, 3 Oct 2017 01:37:12 +0300 Subject: [PATCH 16/18] MXS-1367: Retry interrupted queries The new `query_retries` parameter controls how many times an interrupted query is retried. This retrying of interrupted queries will reduce the rate of false positives that MaxScale monitors detect. --- .../Getting-Started/Configuration-Guide.md | 9 ++++ .../MaxScale-2.1.10-Release-Notes.md | 12 +++++ include/maxscale/config.h | 1 + include/maxscale/mysql_utils.h | 24 ++++++++- server/core/config.c | 16 ++++++ server/core/maxscale/config.h | 1 + server/core/mysql_utils.c | 49 ++++++++++++++----- 7 files changed, 99 insertions(+), 13 deletions(-) diff --git a/Documentation/Getting-Started/Configuration-Guide.md b/Documentation/Getting-Started/Configuration-Guide.md index c87bf1a56..6a66d1319 100644 --- a/Documentation/Getting-Started/Configuration-Guide.md +++ b/Documentation/Getting-Started/Configuration-Guide.md @@ -172,6 +172,15 @@ write or modify the data in the backend server. The default is 2 seconds. auth_write_timeout=10 ``` +#### `query_retries` + +The number of times an interrupted query will be retried. This feature was added +in MaxScale 2.1.10 and is disabled by default. + +An interrupted query is any query that is interrupted by a network +error. Connection timeouts will not trigger a reconnection as it is advisable to +increase the timeouts rather than to try timed out queries again. + #### `ms_timestamp` Enable or disable the high precision timestamps in logfiles. Enabling this adds diff --git a/Documentation/Release-Notes/MaxScale-2.1.10-Release-Notes.md b/Documentation/Release-Notes/MaxScale-2.1.10-Release-Notes.md index 49f3c4660..b5394f4eb 100644 --- a/Documentation/Release-Notes/MaxScale-2.1.10-Release-Notes.md +++ b/Documentation/Release-Notes/MaxScale-2.1.10-Release-Notes.md @@ -22,6 +22,18 @@ release notes: For any problems you encounter, please consider submitting a bug report at [Jira](https://jira.mariadb.org). +## Changed Features + +### Internal Query Retries + +The internal SQL queries that MaxScale executes to load database users as well +as monitor the database itself can now be automatically retried if they are +interrupted. The new global parameter, `query_retries` controls the number of +retry attempts each query will receive if it fails due to a network problem. + +To enable this functionality, add `query_retries=` under the +`[maxscale]` section where __ is a positive integer. + ## Bug fixes [Here is a list of bugs fixed in MaxScale 2.1.10.](https://jira.mariadb.org/issues/?jql=project%20%3D%20MXS%20AND%20issuetype%20%3D%20Bug%20AND%20status%20%3D%20Closed%20AND%20fixVersion%20%3D%202.1.10) diff --git a/include/maxscale/config.h b/include/maxscale/config.h index 813eaaf43..4ed2f3a93 100644 --- a/include/maxscale/config.h +++ b/include/maxscale/config.h @@ -74,6 +74,7 @@ typedef struct bool skip_permission_checks; /**< Skip service and monitor permission checks */ char qc_name[PATH_MAX]; /**< The name of the query classifier to load */ char* qc_args; /**< Arguments for the query classifier */ + int query_retries; /**< Number of times a interrupted query is retried */ } MXS_CONFIG; /** diff --git a/include/maxscale/mysql_utils.h b/include/maxscale/mysql_utils.h index bf89bd6b6..a3a86b8ca 100644 --- a/include/maxscale/mysql_utils.h +++ b/include/maxscale/mysql_utils.h @@ -29,7 +29,29 @@ uint64_t mxs_leint_consume(uint8_t ** c); char* mxs_lestr_consume_dup(uint8_t** c); char* mxs_lestr_consume(uint8_t** c, size_t *size); -MYSQL *mxs_mysql_real_connect(MYSQL *mysql, SERVER *server, const char *user, const char *passwd); +/** + * Creates a connection to a MySQL database engine. If necessary, initializes SSL. + * + * @param con A valid MYSQL structure. + * @param server The server on which the MySQL engine is running. + * @param user The MySQL login ID. + * @param passwd The password for the user. + * + * @return New connection or NULL on error + */ +MYSQL* mxs_mysql_real_connect(MYSQL *mysql, SERVER *server, const char *user, const char *passwd); + +/** + * Execute a query + * + * This function wraps mysql_query in a way that automatic query retry is possible. + * + * @param conn MySQL connection + * @param query Query to execute + * + * @return return value of mysql_query + */ +int mxs_mysql_query(MYSQL* conn, const char* query); /** * Trim MySQL quote characters surrounding a string. diff --git a/server/core/config.c b/server/core/config.c index 29e0761db..bbe2a8d8c 100644 --- a/server/core/config.c +++ b/server/core/config.c @@ -1305,6 +1305,20 @@ handle_global_item(const char *name, const char *value) { gateway.qc_args = MXS_STRDUP_A(value); } + else if (strcmp(name, "query_retries") == 0) + { + char* endptr; + int intval = strtol(value, &endptr, 0); + if (*endptr == '\0' && intval >= 0) + { + gateway.query_retries = intval; + } + else + { + MXS_ERROR("Invalid timeout value for 'query_retries': %s", value); + return 0; + } + } else if (strcmp(name, "log_throttling") == 0) { if (*value == 0) @@ -1591,6 +1605,8 @@ global_defaults() gateway.auth_read_timeout = DEFAULT_AUTH_READ_TIMEOUT; gateway.auth_write_timeout = DEFAULT_AUTH_WRITE_TIMEOUT; gateway.skip_permission_checks = false; + gateway.query_retries = DEFAULT_QUERY_RETRIES; + if (version_string != NULL) { gateway.version_string = MXS_STRDUP_A(version_string); diff --git a/server/core/maxscale/config.h b/server/core/maxscale/config.h index d66bb1068..cfff4bece 100644 --- a/server/core/maxscale/config.h +++ b/server/core/maxscale/config.h @@ -25,6 +25,7 @@ MXS_BEGIN_DECLS #define DEFAULT_NBPOLLS 3 /**< Default number of non block polls before we block */ #define DEFAULT_POLLSLEEP 1000 /**< Default poll wait time (milliseconds) */ #define DEFAULT_NTHREADS 1 /**< Default number of polling threads */ +#define DEFAULT_QUERY_RETRIES 0 /**< Number of retries for interrupted queries */ /** * @brief Generate default module parameters diff --git a/server/core/mysql_utils.c b/server/core/mysql_utils.c index 42e57d5bc..f38f0e2be 100644 --- a/server/core/mysql_utils.c +++ b/server/core/mysql_utils.c @@ -21,12 +21,15 @@ */ #include + #include #include +#include + #include -#include -#include #include +#include +#include /** * @brief Calculate the length of a length-encoded integer in bytes @@ -147,16 +150,6 @@ char* mxs_lestr_consume(uint8_t** c, size_t *size) return start; } - - -/** - * Creates a connection to a MySQL database engine. If necessary, initializes SSL. - * - * @param con A valid MYSQL structure. - * @param server The server on which the MySQL engine is running. - * @param user The MySQL login ID. - * @param passwd The password for the user. - */ MYSQL *mxs_mysql_real_connect(MYSQL *con, SERVER *server, const char *user, const char *passwd) { SSL_LISTENER *listener = server->server_ssl; @@ -183,6 +176,38 @@ MYSQL *mxs_mysql_real_connect(MYSQL *con, SERVER *server, const char *user, cons return mysql; } +static bool is_connection_error(int errcode) +{ + switch (errcode) + { + case CR_SOCKET_CREATE_ERROR: + case CR_CONNECTION_ERROR: + case CR_CONN_HOST_ERROR: + case CR_IPSOCK_ERROR: + case CR_SERVER_GONE_ERROR: + case CR_TCP_CONNECTION: + case CR_SERVER_LOST: + return true; + + default: + return false; + } +} + +int mxs_mysql_query(MYSQL* conn, const char* query) +{ + MXS_CONFIG* cnf = config_get_global_options(); + int rc = mysql_query(conn, query); + + for (int n = 0; rc != 0 && n < cnf->query_retries && + is_connection_error(mysql_errno(conn)); n++) + { + rc = mysql_query(conn, query); + } + + return rc; +} + bool mxs_mysql_trim_quotes(char *s) { bool dequoted = true; From 67ef7bd0582799e6f85cea77a66b675223b12b9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Tue, 3 Oct 2017 01:52:54 +0300 Subject: [PATCH 17/18] MXS-1367: Take mxs_mysql_query into use The use of a wrapper function allows automated retrying of the queries without requiring any changes to the code that uses it. --- server/core/monitor.c | 2 +- .../GSSAPI/GSSAPIAuth/gssapi_auth.c | 2 +- .../modules/authenticator/MySQLAuth/dbusers.c | 10 ++-- server/modules/monitor/auroramon/auroramon.c | 7 +-- server/modules/monitor/galeramon/galeramon.c | 11 +++-- server/modules/monitor/mmmon/mmmon.c | 9 ++-- server/modules/monitor/mysqlmon/mysql_mon.c | 49 ++++++++++--------- .../monitor/ndbclustermon/ndbclustermon.c | 5 +- 8 files changed, 50 insertions(+), 45 deletions(-) diff --git a/server/core/monitor.c b/server/core/monitor.c index 100cf74bb..2c71d8f58 100644 --- a/server/core/monitor.c +++ b/server/core/monitor.c @@ -700,7 +700,7 @@ bool check_monitor_permissions(MXS_MONITOR* monitor, const char* query) break; } } - else if (mysql_query(mondb->con, query) != 0) + else if (mxs_mysql_query(mondb->con, query) != 0) { switch (mysql_errno(mondb->con)) { diff --git a/server/modules/authenticator/GSSAPI/GSSAPIAuth/gssapi_auth.c b/server/modules/authenticator/GSSAPI/GSSAPIAuth/gssapi_auth.c index 180b30a89..730e40372 100644 --- a/server/modules/authenticator/GSSAPI/GSSAPIAuth/gssapi_auth.c +++ b/server/modules/authenticator/GSSAPI/GSSAPIAuth/gssapi_auth.c @@ -615,7 +615,7 @@ int gssapi_auth_load_users(SERV_LISTENER *listener) if (mxs_mysql_real_connect(mysql, servers->server, user, pw)) { - if (mysql_query(mysql, gssapi_users_query)) + if (mxs_mysql_query(mysql, gssapi_users_query)) { MXS_ERROR("Failed to query server '%s' for GSSAPI users: %s", servers->server->unique_name, mysql_error(mysql)); diff --git a/server/modules/authenticator/MySQLAuth/dbusers.c b/server/modules/authenticator/MySQLAuth/dbusers.c index 63f0eeb42..e275d7ba7 100644 --- a/server/modules/authenticator/MySQLAuth/dbusers.c +++ b/server/modules/authenticator/MySQLAuth/dbusers.c @@ -538,7 +538,7 @@ static bool check_server_permissions(SERVICE *service, SERVER* server, bool rval = true; sprintf(query, template, query_pw); - if (mysql_query(mysql, query) != 0) + if (mxs_mysql_query(mysql, query) != 0) { if (mysql_errno(mysql) == ER_TABLEACCESS_DENIED_ERROR) { @@ -568,7 +568,7 @@ static bool check_server_permissions(SERVICE *service, SERVER* server, } } - if (mysql_query(mysql, "SELECT user, host, db FROM mysql.db limit 1") != 0) + if (mxs_mysql_query(mysql, "SELECT user, host, db FROM mysql.db limit 1") != 0) { if (mysql_errno(mysql) == ER_TABLEACCESS_DENIED_ERROR) { @@ -596,7 +596,7 @@ static bool check_server_permissions(SERVICE *service, SERVER* server, } } - if (mysql_query(mysql, "SELECT user, host, db FROM mysql.tables_priv limit 1") != 0) + if (mxs_mysql_query(mysql, "SELECT user, host, db FROM mysql.tables_priv limit 1") != 0) { if (mysql_errno(mysql) == ER_TABLEACCESS_DENIED_ERROR) { @@ -747,7 +747,7 @@ int get_users_from_server(MYSQL *con, SERVER_REF *server, SERVICE *service, SERV if (query) { - if (mysql_query(con, query) == 0) + if (mxs_mysql_query(con, query) == 0) { MYSQL_RES *result = mysql_store_result(con); @@ -801,7 +801,7 @@ int get_users_from_server(MYSQL *con, SERVER_REF *server, SERVICE *service, SERV } /** Load the list of databases */ - if (mysql_query(con, "SHOW DATABASES") == 0) + if (mxs_mysql_query(con, "SHOW DATABASES") == 0) { MYSQL_RES *result = mysql_store_result(con); if (result) diff --git a/server/modules/monitor/auroramon/auroramon.c b/server/modules/monitor/auroramon/auroramon.c index c982482ba..a3d49f8f6 100644 --- a/server/modules/monitor/auroramon/auroramon.c +++ b/server/modules/monitor/auroramon/auroramon.c @@ -23,6 +23,7 @@ #include #include #include +#include typedef struct aurora_monitor { @@ -59,9 +60,9 @@ void update_server_status(MXS_MONITOR *monitor, MXS_MONITOR_SERVERS *database) MYSQL_RES *result; /** Connection is OK, query for replica status */ - if (mysql_query(database->con, "SELECT @@aurora_server_id, server_id FROM " - "information_schema.replica_host_status " - "WHERE session_id = 'MASTER_SESSION_ID'") == 0 && + if (mxs_mysql_query(database->con, "SELECT @@aurora_server_id, server_id FROM " + "information_schema.replica_host_status " + "WHERE session_id = 'MASTER_SESSION_ID'") == 0 && (result = mysql_store_result(database->con))) { ss_dassert(mysql_field_count(database->con) == 2); diff --git a/server/modules/monitor/galeramon/galeramon.c b/server/modules/monitor/galeramon/galeramon.c index dfd04685d..a986900bd 100644 --- a/server/modules/monitor/galeramon/galeramon.c +++ b/server/modules/monitor/galeramon/galeramon.c @@ -38,6 +38,7 @@ #include "galeramon.h" #include #include +#include #define DONOR_NODE_NAME_MAX_LEN 60 #define DONOR_LIST_SET_VAR "SET GLOBAL wsrep_sst_donor = \"" @@ -269,7 +270,7 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database) } /* Check if the the Galera FSM shows this node is joined to the cluster */ - if (mysql_query(database->con, "SHOW STATUS LIKE 'wsrep_local_state'") == 0 + if (mxs_mysql_query(database->con, "SHOW STATUS LIKE 'wsrep_local_state'") == 0 && (result = mysql_store_result(database->con)) != NULL) { if (mysql_field_count(database->con) < 2) @@ -290,7 +291,7 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database) /* Check if the node is a donor and is using xtrabackup, in this case it can stay alive */ else if (strcmp(row[1], "2") == 0 && handle->availableWhenDonor == 1) { - if (mysql_query(database->con, "SHOW VARIABLES LIKE 'wsrep_sst_method'") == 0 + if (mxs_mysql_query(database->con, "SHOW VARIABLES LIKE 'wsrep_sst_method'") == 0 && (result2 = mysql_store_result(database->con)) != NULL) { if (mysql_field_count(database->con) < 2) @@ -327,7 +328,7 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database) if (isjoined) { /* Check the the Galera node index in the cluster */ - if (mysql_query(database->con, "SHOW STATUS LIKE 'wsrep_local_index'") == 0 + if (mxs_mysql_query(database->con, "SHOW STATUS LIKE 'wsrep_local_index'") == 0 && (result = mysql_store_result(database->con)) != NULL) { if (mysql_field_count(database->con) < 2) @@ -772,7 +773,7 @@ static void update_sst_donor_nodes(MXS_MONITOR *mon, int is_cluster) MXS_MONITOR_SERVERS *ptr = node_list[k]; /* Get the Galera node name */ - if (mysql_query(ptr->con, "SHOW VARIABLES LIKE 'wsrep_node_name'") == 0 + if (mxs_mysql_query(ptr->con, "SHOW VARIABLES LIKE 'wsrep_node_name'") == 0 && (result = mysql_store_result(ptr->con)) != NULL) { if (mysql_field_count(ptr->con) < 2) @@ -817,7 +818,7 @@ static void update_sst_donor_nodes(MXS_MONITOR *mon, int is_cluster) { MXS_MONITOR_SERVERS *ptr = node_list[k]; /* Set the Galera SST donor node list */ - if (mysql_query(ptr->con, donor_list) == 0) + if (mxs_mysql_query(ptr->con, donor_list) == 0) { MXS_DEBUG("SET GLOBAL rep_sst_donor OK in node %s", ptr->server->unique_name); diff --git a/server/modules/monitor/mmmon/mmmon.c b/server/modules/monitor/mmmon/mmmon.c index eb7cff77c..2fb183f98 100644 --- a/server/modules/monitor/mmmon/mmmon.c +++ b/server/modules/monitor/mmmon/mmmon.c @@ -30,6 +30,7 @@ #include "mmmon.h" #include #include +#include static void monitorMain(void *); @@ -258,7 +259,7 @@ monitorDatabase(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database) } /* get server_id form current node */ - if (mysql_query(database->con, "SELECT @@server_id") == 0 + if (mxs_mysql_query(database->con, "SELECT @@server_id") == 0 && (result = mysql_store_result(database->con)) != NULL) { long server_id = -1; @@ -295,7 +296,7 @@ monitorDatabase(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database) if (server_version >= 100000) { - if (mysql_query(database->con, "SHOW ALL SLAVES STATUS") == 0 + if (mxs_mysql_query(database->con, "SHOW ALL SLAVES STATUS") == 0 && (result = mysql_store_result(database->con)) != NULL) { int i = 0; @@ -358,7 +359,7 @@ monitorDatabase(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database) } else { - if (mysql_query(database->con, "SHOW SLAVE STATUS") == 0 + if (mxs_mysql_query(database->con, "SHOW SLAVE STATUS") == 0 && (result = mysql_store_result(database->con)) != NULL) { long master_id = -1; @@ -423,7 +424,7 @@ monitorDatabase(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database) } /* get variable 'read_only' set by an external component */ - if (mysql_query(database->con, "SHOW GLOBAL VARIABLES LIKE 'read_only'") == 0 + if (mxs_mysql_query(database->con, "SHOW GLOBAL VARIABLES LIKE 'read_only'") == 0 && (result = mysql_store_result(database->con)) != NULL) { if (mysql_field_count(database->con) < 2) diff --git a/server/modules/monitor/mysqlmon/mysql_mon.c b/server/modules/monitor/mysqlmon/mysql_mon.c index c4e128b9b..5abab0601 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.c +++ b/server/modules/monitor/mysqlmon/mysql_mon.c @@ -52,6 +52,7 @@ #include #include #include +#include /** Column positions for SHOW SLAVE STATUS */ #define MYSQL55_STATUS_BINLOG_POS 5 @@ -405,7 +406,7 @@ static inline void monitor_mysql_db(MXS_MONITOR_SERVERS* database, MYSQL_SERVER_ MYSQL_RES* result; - if (mysql_query(database->con, query) == 0 + if (mxs_mysql_query(database->con, query) == 0 && (result = mysql_store_result(database->con)) != NULL) { if (mysql_field_count(database->con) < columns) @@ -521,7 +522,7 @@ static MXS_MONITOR_SERVERS *build_mysql51_replication_tree(MXS_MONITOR *mon) int nslaves = 0; if (database->con) { - if (mysql_query(database->con, "SHOW SLAVE HOSTS") == 0 + if (mxs_mysql_query(database->con, "SHOW SLAVE HOSTS") == 0 && (result = mysql_store_result(database->con)) != NULL) { if (mysql_field_count(database->con) < 4) @@ -697,7 +698,7 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database) ss_dassert(serv_info); /* Get server_id and read_only from current node */ - if (mysql_query(database->con, "SELECT @@server_id, @@read_only") == 0 + if (mxs_mysql_query(database->con, "SELECT @@server_id, @@read_only") == 0 && (result = mysql_store_result(database->con)) != NULL) { long server_id = -1; @@ -1476,8 +1477,8 @@ static void set_master_heartbeat(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *dat } /* check if the maxscale_schema database and replication_heartbeat table exist */ - if (mysql_query(database->con, "SELECT table_name FROM information_schema.tables " - "WHERE table_schema = 'maxscale_schema' AND table_name = 'replication_heartbeat'")) + if (mxs_mysql_query(database->con, "SELECT table_name FROM information_schema.tables " + "WHERE table_schema = 'maxscale_schema' AND table_name = 'replication_heartbeat'")) { MXS_ERROR( "Error checking for replication_heartbeat in Master server" ": %s", mysql_error(database->con)); @@ -1499,13 +1500,13 @@ static void set_master_heartbeat(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *dat if (0 == returned_rows) { /* create repl_heartbeat table in maxscale_schema database */ - if (mysql_query(database->con, "CREATE TABLE IF NOT EXISTS " - "maxscale_schema.replication_heartbeat " - "(maxscale_id INT NOT NULL, " - "master_server_id INT NOT NULL, " - "master_timestamp INT UNSIGNED NOT NULL, " - "PRIMARY KEY ( master_server_id, maxscale_id ) ) " - "ENGINE=MYISAM DEFAULT CHARSET=latin1")) + if (mxs_mysql_query(database->con, "CREATE TABLE IF NOT EXISTS " + "maxscale_schema.replication_heartbeat " + "(maxscale_id INT NOT NULL, " + "master_server_id INT NOT NULL, " + "master_timestamp INT UNSIGNED NOT NULL, " + "PRIMARY KEY ( master_server_id, maxscale_id ) ) " + "ENGINE=MYISAM DEFAULT CHARSET=latin1")) { MXS_ERROR("Error creating maxscale_schema.replication_heartbeat " "table in Master server: %s", mysql_error(database->con)); @@ -1520,7 +1521,7 @@ static void set_master_heartbeat(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *dat sprintf(heartbeat_purge_query, "DELETE FROM maxscale_schema.replication_heartbeat WHERE master_timestamp < %lu", purge_time); - if (mysql_query(database->con, heartbeat_purge_query)) + if (mxs_mysql_query(database->con, heartbeat_purge_query)) { MXS_ERROR("Error deleting from maxscale_schema.replication_heartbeat " "table: [%s], %s", @@ -1538,7 +1539,7 @@ static void set_master_heartbeat(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *dat heartbeat, handle->master->server->node_id, id); /* Try to insert MaxScale timestamp into master */ - if (mysql_query(database->con, heartbeat_insert_query)) + if (mxs_mysql_query(database->con, heartbeat_insert_query)) { database->server->rlag = MAX_RLAG_NOT_AVAILABLE; @@ -1556,7 +1557,7 @@ static void set_master_heartbeat(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *dat "REPLACE INTO maxscale_schema.replication_heartbeat (master_server_id, maxscale_id, master_timestamp ) VALUES ( %li, %lu, %lu)", handle->master->server->node_id, id, heartbeat); - if (mysql_query(database->con, heartbeat_insert_query)) + if (mxs_mysql_query(database->con, heartbeat_insert_query)) { database->server->rlag = MAX_RLAG_NOT_AVAILABLE; @@ -1617,7 +1618,7 @@ static void set_slave_heartbeat(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database) id, handle->master->server->node_id); /* if there is a master then send the query to the slave with master_id */ - if (handle->master != NULL && (mysql_query(database->con, select_heartbeat_query) == 0 + if (handle->master != NULL && (mxs_mysql_query(database->con, select_heartbeat_query) == 0 && (result = mysql_store_result(database->con)) != NULL)) { int rows_found = 0; @@ -1886,8 +1887,8 @@ bool check_replicate_ignore_table(MXS_MONITOR_SERVERS* database) MYSQL_RES *result; bool rval = true; - if (mysql_query(database->con, - "show variables like 'replicate_ignore_table'") == 0 && + if (mxs_mysql_query(database->con, + "show variables like 'replicate_ignore_table'") == 0 && (result = mysql_store_result(database->con)) && mysql_num_fields(result) > 1) { @@ -1930,8 +1931,8 @@ bool check_replicate_do_table(MXS_MONITOR_SERVERS* database) MYSQL_RES *result; bool rval = true; - if (mysql_query(database->con, - "show variables like 'replicate_do_table'") == 0 && + if (mxs_mysql_query(database->con, + "show variables like 'replicate_do_table'") == 0 && (result = mysql_store_result(database->con)) && mysql_num_fields(result) > 1) { @@ -1973,8 +1974,8 @@ bool check_replicate_wild_do_table(MXS_MONITOR_SERVERS* database) MYSQL_RES *result; bool rval = true; - if (mysql_query(database->con, - "show variables like 'replicate_wild_do_table'") == 0 && + if (mxs_mysql_query(database->con, + "show variables like 'replicate_wild_do_table'") == 0 && (result = mysql_store_result(database->con)) && mysql_num_fields(result) > 1) { @@ -2020,8 +2021,8 @@ bool check_replicate_wild_ignore_table(MXS_MONITOR_SERVERS* database) MYSQL_RES *result; bool rval = true; - if (mysql_query(database->con, - "show variables like 'replicate_wild_ignore_table'") == 0 && + if (mxs_mysql_query(database->con, + "show variables like 'replicate_wild_ignore_table'") == 0 && (result = mysql_store_result(database->con)) && mysql_num_fields(result) > 1) { diff --git a/server/modules/monitor/ndbclustermon/ndbclustermon.c b/server/modules/monitor/ndbclustermon/ndbclustermon.c index 9f162afc0..f0d68d262 100644 --- a/server/modules/monitor/ndbclustermon/ndbclustermon.c +++ b/server/modules/monitor/ndbclustermon/ndbclustermon.c @@ -29,6 +29,7 @@ #include "../mysqlmon.h" #include +#include static void monitorMain(void *); @@ -224,7 +225,7 @@ monitorDatabase(MXS_MONITOR_SERVERS *database, char *defaultUser, char *defaultP } /* Check if the the SQL node is able to contact one or more data nodes */ - if (mysql_query(database->con, "SHOW STATUS LIKE 'Ndb_number_of_ready_data_nodes'") == 0 + if (mxs_mysql_query(database->con, "SHOW STATUS LIKE 'Ndb_number_of_ready_data_nodes'") == 0 && (result = mysql_store_result(database->con)) != NULL) { if (mysql_field_count(database->con) < 2) @@ -251,7 +252,7 @@ monitorDatabase(MXS_MONITOR_SERVERS *database, char *defaultUser, char *defaultP } /* Check the the SQL node id in the MySQL cluster */ - if (mysql_query(database->con, "SHOW STATUS LIKE 'Ndb_cluster_node_id'") == 0 + if (mxs_mysql_query(database->con, "SHOW STATUS LIKE 'Ndb_cluster_node_id'") == 0 && (result = mysql_store_result(database->con)) != NULL) { if (mysql_field_count(database->con) < 2) From 9280f1a5d73a8fde3f60aa8c34b8410aaba4ea41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Tue, 3 Oct 2017 11:12:45 +0300 Subject: [PATCH 18/18] MXS-1367: Add timeouts for retried queries The total timeout for the retrying of interrupted queries can now be configured with the `query_retry_timeout` parameter. It controls the total timeout in seconds that the query can take. The actual connection, read and write timeouts of the connector aren't a good configuration value to use for abstracted queries as the time that it takes to execute a query can be composed of both connections, reads and writes. This is caused by the usage of MYSQL_OPT_RECONNECT that hides the fact that the connector reconnects to the server when a query is attempted. --- .../Getting-Started/Configuration-Guide.md | 13 +++++++++++-- .../MaxScale-2.1.10-Release-Notes.md | 2 ++ include/maxscale/config.h | 2 ++ server/core/config.c | 15 +++++++++++++++ server/core/maxscale/config.h | 9 +++++---- server/core/mysql_utils.c | 4 +++- 6 files changed, 38 insertions(+), 7 deletions(-) diff --git a/Documentation/Getting-Started/Configuration-Guide.md b/Documentation/Getting-Started/Configuration-Guide.md index 6a66d1319..face2aa65 100644 --- a/Documentation/Getting-Started/Configuration-Guide.md +++ b/Documentation/Getting-Started/Configuration-Guide.md @@ -178,8 +178,17 @@ The number of times an interrupted query will be retried. This feature was added in MaxScale 2.1.10 and is disabled by default. An interrupted query is any query that is interrupted by a network -error. Connection timeouts will not trigger a reconnection as it is advisable to -increase the timeouts rather than to try timed out queries again. +error. Connection timeouts are included in network errors and thus is it +advisable to make sure that the value of `query_retry_timeout` is set to an +adequate value. + +#### `query_retry_timeout` + +The total timeout in seconds for any retried queries. The default value is 5 +seconds. + +An interrupted query is retried for either the configured amount of attempts or +until the configured timeout is reached. #### `ms_timestamp` diff --git a/Documentation/Release-Notes/MaxScale-2.1.10-Release-Notes.md b/Documentation/Release-Notes/MaxScale-2.1.10-Release-Notes.md index b5394f4eb..294c92397 100644 --- a/Documentation/Release-Notes/MaxScale-2.1.10-Release-Notes.md +++ b/Documentation/Release-Notes/MaxScale-2.1.10-Release-Notes.md @@ -30,6 +30,8 @@ The internal SQL queries that MaxScale executes to load database users as well as monitor the database itself can now be automatically retried if they are interrupted. The new global parameter, `query_retries` controls the number of retry attempts each query will receive if it fails due to a network problem. +The `query_retry_timeout` global parameter controls the total timeout for the +retries. To enable this functionality, add `query_retries=` under the `[maxscale]` section where __ is a positive integer. diff --git a/include/maxscale/config.h b/include/maxscale/config.h index 4ed2f3a93..98930914b 100644 --- a/include/maxscale/config.h +++ b/include/maxscale/config.h @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -75,6 +76,7 @@ typedef struct char qc_name[PATH_MAX]; /**< The name of the query classifier to load */ char* qc_args; /**< Arguments for the query classifier */ int query_retries; /**< Number of times a interrupted query is retried */ + time_t query_retry_timeout; /**< Timeout for query retries */ } MXS_CONFIG; /** diff --git a/server/core/config.c b/server/core/config.c index bbe2a8d8c..34f4bb97d 100644 --- a/server/core/config.c +++ b/server/core/config.c @@ -1319,6 +1319,20 @@ handle_global_item(const char *name, const char *value) return 0; } } + else if (strcmp(name, "query_retry_timeout") == 0) + { + char* endptr; + int intval = strtol(value, &endptr, 0); + if (*endptr == '\0' && intval > 0) + { + gateway.query_retries = intval; + } + else + { + MXS_ERROR("Invalid timeout value for 'query_retry_timeout': %s", value); + return 0; + } + } else if (strcmp(name, "log_throttling") == 0) { if (*value == 0) @@ -1606,6 +1620,7 @@ global_defaults() gateway.auth_write_timeout = DEFAULT_AUTH_WRITE_TIMEOUT; gateway.skip_permission_checks = false; gateway.query_retries = DEFAULT_QUERY_RETRIES; + gateway.query_retry_timeout = DEFAULT_QUERY_RETRY_TIMEOUT; if (version_string != NULL) { diff --git a/server/core/maxscale/config.h b/server/core/maxscale/config.h index cfff4bece..3a05e7209 100644 --- a/server/core/maxscale/config.h +++ b/server/core/maxscale/config.h @@ -22,10 +22,11 @@ MXS_BEGIN_DECLS -#define DEFAULT_NBPOLLS 3 /**< Default number of non block polls before we block */ -#define DEFAULT_POLLSLEEP 1000 /**< Default poll wait time (milliseconds) */ -#define DEFAULT_NTHREADS 1 /**< Default number of polling threads */ -#define DEFAULT_QUERY_RETRIES 0 /**< Number of retries for interrupted queries */ +#define DEFAULT_NBPOLLS 3 /**< Default number of non block polls before we block */ +#define DEFAULT_POLLSLEEP 1000 /**< Default poll wait time (milliseconds) */ +#define DEFAULT_NTHREADS 1 /**< Default number of polling threads */ +#define DEFAULT_QUERY_RETRIES 0 /**< Number of retries for interrupted queries */ +#define DEFAULT_QUERY_RETRY_TIMEOUT 5 /**< Timeout for query retries */ /** * @brief Generate default module parameters diff --git a/server/core/mysql_utils.c b/server/core/mysql_utils.c index f38f0e2be..c7a4b66b3 100644 --- a/server/core/mysql_utils.c +++ b/server/core/mysql_utils.c @@ -197,10 +197,12 @@ static bool is_connection_error(int errcode) int mxs_mysql_query(MYSQL* conn, const char* query) { MXS_CONFIG* cnf = config_get_global_options(); + time_t start = time(NULL); int rc = mysql_query(conn, query); for (int n = 0; rc != 0 && n < cnf->query_retries && - is_connection_error(mysql_errno(conn)); n++) + is_connection_error(mysql_errno(conn)) && + time(NULL) - start < cnf->query_retry_timeout; n++) { rc = mysql_query(conn, query); }