From d77a9a3040114b9dd0d425d72cebac5a342bb55f Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Thu, 31 May 2018 15:21:17 +0300 Subject: [PATCH 1/4] MXS-1889 Handle master used as slave Up until 2.1.12, if it in the configuration file said 'router_options=slave', the master was used if there were no slaves at session creation time. That broke in 2.1.13 as a side-effect of MXS-1516 that checks at routing time whether the server initially selected as master still is the master. Now the required server status is stored separately for each session, so that if the master was chosen, even though we have 'router_options=slave', we can turn on the SERVER_MASTER bit. That allows us to handle the case correctly in connection_is_valid(). --- Documentation/Routers/ReadConnRoute.md | 2 +- .../routing/readconnroute/readconnection.h | 1 + .../routing/readconnroute/readconnroute.c | 20 +++++++++++++++++-- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/Documentation/Routers/ReadConnRoute.md b/Documentation/Routers/ReadConnRoute.md index f6c20b249..7ac74c3b3 100644 --- a/Documentation/Routers/ReadConnRoute.md +++ b/Documentation/Routers/ReadConnRoute.md @@ -21,7 +21,7 @@ Here is a list of all possible values for the `router_options`. Role|Description ------|--------- master|A server assigned as a master by one of MariaDB MaxScale monitors. Depending on the monitor implementation, this could be a master server of a Master-Slave replication cluster or a Write-Master of a Galera cluster. -slave|A server assigned as a slave of a master. +slave|A server assigned as a slave of a master. If all slaves are down, but the master is still available, then the router will use the master. synced| A Galera cluster node which is in a synced state with the cluster. ndb|A MySQL Replication Cluster node running|A server that is up and running. All servers that MariaDB MaxScale can connect to are labeled as running. diff --git a/server/modules/routing/readconnroute/readconnection.h b/server/modules/routing/readconnroute/readconnection.h index 74498ae2b..b0c0a9f99 100644 --- a/server/modules/routing/readconnroute/readconnection.h +++ b/server/modules/routing/readconnroute/readconnection.h @@ -49,6 +49,7 @@ typedef struct router_client_session SERVER_REF *backend; /*< Backend used by the client session */ DCB *backend_dcb; /*< DCB Connection to the backend */ DCB *client_dcb; /**< Client DCB */ + unsigned int bitvalue; /*< Session specific required value of server->status */ struct router_client_session *next; #if defined(SS_DEBUG) skygw_chk_t rses_chk_tail; diff --git a/server/modules/routing/readconnroute/readconnroute.c b/server/modules/routing/readconnroute/readconnroute.c index 0d0112cd9..06846d123 100644 --- a/server/modules/routing/readconnroute/readconnroute.c +++ b/server/modules/routing/readconnroute/readconnroute.c @@ -286,6 +286,7 @@ newSession(MXS_ROUTER *instance, MXS_SESSION *session) client_rses->rses_chk_tail = CHK_NUM_ROUTER_SES; #endif client_rses->client_dcb = session->client_dcb; + client_rses->bitvalue = inst->bitvalue; /** * Find the Master host from available servers @@ -397,6 +398,14 @@ newSession(MXS_ROUTER *instance, MXS_SESSION *session) if (master_host) { candidate = master_host; + // Even if we had 'router_options=slave' in the configuration file, we + // will still end up here if there are no slaves, but a sole master. So + // that the server will be considered valid in connection_is_valid(), we + // turn on the SERVER_MASTER bit. + // + // We must do that so that readconnroute in MaxScale 2.2 will again behave + // the same way as it did up until 2.1.12. + client_rses->bitvalue |= SERVER_MASTER; } else { @@ -542,10 +551,17 @@ static inline bool connection_is_valid(ROUTER_INSTANCE* inst, ROUTER_CLIENT_SES* { bool rval = false; + // inst->bitvalue and router_cli_ses->bitvalue are different, if we had + // 'router_options=slave' in the configuration file and there was only + // the sole master available at session creation time. + if (SERVER_IS_RUNNING(router_cli_ses->backend->server) && - (router_cli_ses->backend->server->status & inst->bitmask & inst->bitvalue)) + (router_cli_ses->backend->server->status & inst->bitmask & router_cli_ses->bitvalue)) { - if ((inst->bitvalue == SERVER_MASTER) && router_cli_ses->backend->active) + // Note the use of '==' and not '|'. We must use the former to exclude a + // 'router_options=slave' that uses the master due to no slave having been + // available at session creation time. Its bitvalue is (SERVER_MASTER | SERVER_SLAVE). + if ((router_cli_ses->bitvalue == SERVER_MASTER) && router_cli_ses->backend->active) { // If we're using an active master server, verify that it is still a master rval = router_cli_ses->backend == get_root_master(inst->service->dbref); From a319c5ad192111fc855eabee0ebeab99a1e29396 Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Fri, 1 Jun 2018 09:22:29 +0300 Subject: [PATCH 2/4] MXS-1889 Add test If all slaves are down, a connection should be created to the master. --- maxscale-system-test/CMakeLists.txt | 4 + .../cnf/maxscale.cnf.template.mxs1889 | 89 +++++++++++++++++++ maxscale-system-test/mxs1889.cpp | 36 ++++++++ 3 files changed, 129 insertions(+) create mode 100644 maxscale-system-test/cnf/maxscale.cnf.template.mxs1889 create mode 100644 maxscale-system-test/mxs1889.cpp diff --git a/maxscale-system-test/CMakeLists.txt b/maxscale-system-test/CMakeLists.txt index 02d9bf536..827a588fc 100644 --- a/maxscale-system-test/CMakeLists.txt +++ b/maxscale-system-test/CMakeLists.txt @@ -922,6 +922,10 @@ add_test_executable(mxs1628_bad_handshake.cpp mxs1628_bad_handshake replication # MXS-1836: MaxInfo "show eventTimes" returns garbage. add_test_executable(mxs1836_show_eventTimes.cpp mxs1836_show_eventTimes mxs1836_show_eventTimes LABELS REPL_BACKEND) + # MXS-1889: A single remaining master is valid for readconnroute configured with 'router_options=slave' + # https://jira.mariadb.org/browse/MXS-1889 +add_test_executable(mxs1889.cpp mxs1889 mxs1889 LABELS REPL_BACKEND) + configure_file(templates.h.in templates.h @ONLY) include(CTest) diff --git a/maxscale-system-test/cnf/maxscale.cnf.template.mxs1889 b/maxscale-system-test/cnf/maxscale.cnf.template.mxs1889 new file mode 100644 index 000000000..e31fc2c9e --- /dev/null +++ b/maxscale-system-test/cnf/maxscale.cnf.template.mxs1889 @@ -0,0 +1,89 @@ +[maxscale] +threads=###threads### +log_info=on + +[MySQL Monitor] +type=monitor +module=mysqlmon +###repl51### +servers=server1,server2,server3,server4 +user=maxskysql +passwd=skysql +monitor_interval=1000 + +[RW Split Router] +type=service +router=readwritesplit +servers=server1,server2,server3,server4 +user=maxskysql +passwd=skysql +router_options=slave_selection_criteria=LEAST_GLOBAL_CONNECTIONS +max_slave_connections=1 + +[Read Connection Router Slave] +type=service +router=readconnroute +router_options=slave +servers=server1,server2,server3,server4 +user=maxskysql +passwd=skysql + +[Read Connection Router Master] +type=service +router=readconnroute +router_options=master +servers=server1,server2,server3,server4 +user=maxskysql +passwd=skysql + +[RW Split Listener] +type=listener +service=RW Split Router +protocol=MySQLClient +port=4006 + +[Read Connection Listener Slave] +type=listener +service=Read Connection Router Slave +protocol=MySQLClient +port=4009 + +[Read Connection Listener Master] +type=listener +service=Read Connection Router Master +protocol=MySQLClient +port=4008 + +[CLI] +type=service +router=cli + +[CLI Listener] +type=listener +service=CLI +protocol=maxscaled +socket=default + +[server1] +type=server +address=###node_server_IP_1### +port=###node_server_port_1### +protocol=MySQLBackend + +[server2] +type=server +address=###node_server_IP_2### +port=###node_server_port_2### +protocol=MySQLBackend + +[server3] +type=server +address=###node_server_IP_3### +port=###node_server_port_3### +protocol=MySQLBackend + +[server4] +type=server +address=###node_server_IP_4### +port=###node_server_port_4### +protocol=MySQLBackend diff --git a/maxscale-system-test/mxs1889.cpp b/maxscale-system-test/mxs1889.cpp new file mode 100644 index 000000000..2a99c38ad --- /dev/null +++ b/maxscale-system-test/mxs1889.cpp @@ -0,0 +1,36 @@ +/** + * MXS-1889: A single remaining master is valid for readconnroute configured with 'router_options=slave' + * + * https://jira.mariadb.org/browse/MXS-1889 + */ + +#include "testconnections.h" + +int main(int argc, char** argv) +{ + TestConnections test(argc, argv); + + // Give some time for things to stabilize. + sleep(2); + + // Take down all slaves. + test.repl->stop_node(1); + test.repl->stop_node(2); + test.repl->stop_node(3); + + // Give the monitor some time to detect it + sleep(5); + + test.maxscales->connect(); + + // Should succeed. + test.try_query(test.maxscales->conn_slave[0], "SELECT 1"); + + int rv = test.global_result; + + test.repl->start_node(3); + test.repl->start_node(2); + test.repl->start_node(1); + + return rv; +} From dc94ec9963f83d20702c5609da063f5c9ab26f27 Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Fri, 1 Jun 2018 12:45:19 +0300 Subject: [PATCH 3/4] MXS-1889 Improve test program Verify that once there are slaves, we are connected to a slave as expected. --- maxscale-system-test/mxs1889.cpp | 67 +++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 9 deletions(-) diff --git a/maxscale-system-test/mxs1889.cpp b/maxscale-system-test/mxs1889.cpp index 2a99c38ad..78524dafa 100644 --- a/maxscale-system-test/mxs1889.cpp +++ b/maxscale-system-test/mxs1889.cpp @@ -5,32 +5,81 @@ */ #include "testconnections.h" +#include +#include + +using namespace std; + +namespace +{ + +string get_server_id(TestConnections& test, MYSQL* pMysql) +{ + string id; + + int rv = mysql_query(pMysql, "SELECT @@server_id"); + test.add_result(rv, "Could not execute query."); + + if (rv == 0) + { + MYSQL_RES* pResult = mysql_store_result(pMysql); + test.assert(pResult, "Could not store result."); + + if (pResult) + { + unsigned int n = mysql_field_count(pMysql); + test.assert(n == 1, "Unexpected number of fields."); + + MYSQL_ROW pzRow = mysql_fetch_row(pResult); + test.assert(pzRow, "Returned row was NULL."); + + if (pzRow) + { + id = pzRow[0]; + } + + mysql_free_result(pResult); + } + } + + return id; +} + +} int main(int argc, char** argv) { TestConnections test(argc, argv); - // Give some time for things to stabilize. - sleep(2); - - // Take down all slaves. + test.tprintf("Taking down all slaves."); test.repl->stop_node(1); test.repl->stop_node(2); test.repl->stop_node(3); - // Give the monitor some time to detect it + test.tprintf("Giving monitor time to detect the situation..."); sleep(5); test.maxscales->connect(); - // Should succeed. - test.try_query(test.maxscales->conn_slave[0], "SELECT 1"); + // All slaves down, so we expect a connection to the master. + string master_id = get_server_id(test, test.maxscales->conn_slave[0]); + test.tprintf("Master id: %s", master_id.c_str()); - int rv = test.global_result; + test.maxscales->disconnect(); + test.tprintf("Starting all slaves."); test.repl->start_node(3); test.repl->start_node(2); test.repl->start_node(1); - return rv; + test.tprintf("Giving monitor time to detect the situation..."); + sleep(5); + + test.maxscales->connect(); + + string slave_id = get_server_id(test, test.maxscales->conn_slave[0]); + test.tprintf("Server id: %s", slave_id.c_str()); + test.assert(slave_id != master_id, "Expected something else but %s", master_id.c_str()); + + return test.global_result; } From 089be56103689e40a5f5b72a4bd75c676d1f002e Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Fri, 1 Jun 2018 13:43:02 +0300 Subject: [PATCH 4/4] MXS-1889 Only turn on master bit for slaves The bitmask ensures that the master bit would be ignored in cases where it is not relevant, but nicer if it is set only when it is relevant. --- server/modules/routing/readconnroute/readconnroute.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server/modules/routing/readconnroute/readconnroute.c b/server/modules/routing/readconnroute/readconnroute.c index 06846d123..278c65d24 100644 --- a/server/modules/routing/readconnroute/readconnroute.c +++ b/server/modules/routing/readconnroute/readconnroute.c @@ -405,7 +405,10 @@ newSession(MXS_ROUTER *instance, MXS_SESSION *session) // // We must do that so that readconnroute in MaxScale 2.2 will again behave // the same way as it did up until 2.1.12. - client_rses->bitvalue |= SERVER_MASTER; + if (client_rses->bitvalue & SERVER_SLAVE) + { + client_rses->bitvalue |= SERVER_MASTER; + } } else {