From 7f9fdd0f3d844e56f5fc6b98a2da9a87342033bf Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Fri, 3 Mar 2017 15:28:18 +0200 Subject: [PATCH 01/12] Add router template Using the class RouterSession and the template Router, a router module can be defined. The way they are intended to be used are as follows: class MyRouterSession : public maxscale::RouterSession { ... }; class MyRouter : public maxscale::Router { ... } ... extern "C" MXS_MODULE* MXS_CREATE_MODULE() { static MXS_MODULE module = { ... &MyRouter::s_object, ... }; return &module; } --- include/maxscale/router.hh | 248 +++++++++++++++++++++++++++++++++++++ server/core/CMakeLists.txt | 2 +- server/core/router.cc | 51 ++++++++ 3 files changed, 300 insertions(+), 1 deletion(-) create mode 100644 include/maxscale/router.hh create mode 100644 server/core/router.cc diff --git a/include/maxscale/router.hh b/include/maxscale/router.hh new file mode 100644 index 000000000..f26533db5 --- /dev/null +++ b/include/maxscale/router.hh @@ -0,0 +1,248 @@ +#pragma once +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2019-07-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#include +#include + +namespace maxscale +{ + +/** + * @class RouterSession router.hh + * + * RouterSession is a base class for router sessions. A concrete router session + * class should be derived from this class and override all relevant functions. + * + * Note that even though this class is intended to be derived from, no functions + * are virtual. That is by design, as the class will be used in a context where + * the concrete class is known. That is, there is no need for the virtual mechanism. + */ +class RouterSession : public MXS_ROUTER_SESSION +{ +public: + /** + * The RouterSession instance will be deleted when a client session + * has terminated. Will be called only after @c close() has been called. + */ + ~RouterSession(); + + /** + * Called when a client session has been closed. + */ + void close(); + + /** + * Called when a packet being is routed to the backend. The router should + * forward the packet to the appropriate server(s). + * + * @param pPacket A client packet. + */ + int32_t routeQuery(GWBUF* pPacket); + + /** + * Called when a packet is routed to the client. The router should + * forward the packet to the client using `MXS_SESSION_ROUTE_REPLY`. + * + * @param pPacket A client packet. + * @param pBackend The backend the packet is coming from. + */ + void clientReply(GWBUF* pPacket, DCB* pBackend); + + /** + * + * @param pMessage The rror message. + * @param pProblem The DCB on which the error occurred. + * @param action The context. + * @param pSuccess On output, if false, the session will be terminated. + */ + void handleError(GWBUF* pMessage, + DCB* pProblem, + mxs_error_action_t action, + bool* pSuccess); + +protected: + RouterSession(MXS_SESSION* pSession); + +protected: + MXS_SESSION* m_pSession; /*< The MXS_SESSION this router session is associated with. */ +}; + + +/** + * @class Router router.hh + * + * An instantiation of the Router template is used for creating a router. + * Router is an example of the "Curiously recurring template pattern" + * https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern + * that is used for compile time polymorfism. + * + * The typical way for using the template is as follows: + * + * @code + * class MyRouterSession : public maxscale::RouterSession + * { + * // Override the relevant functions. + * }; + * + * class MyRouter : public maxscale::Router + * { + * public: + * static MyRouter* create(SERVICE* pService, char** pzOptions); + * + * MyRouterSession* newSession(MXS_SESSION* pSession); + * + * void diagnostics(DCB* pDcb); + * uint64_t getCapabilities(); + * }; + * @endcode + * + * The concrete router class must implement the methods @c create, @c newSession, + * @c diagnostics and @c getCapabilities, with the prototypes as shown above. + * + * The plugin function @c GetModuleObject is then implemented as follows: + * + * @code + * extern "C" MXS_MODULE* MXS_CREATE_MODULE() + * { + * static MXS_MODULE module_object = + * { + * ... + * &MyRouter::s_object, + * ... + * }; + * + * return &module_object; + * } + * @endcode + */ +template +class Router : public MXS_ROUTER +{ +public: + static MXS_ROUTER* createInstance(SERVICE* pService, char** pzOptions) + { + RouterType* pRouter = NULL; + + MXS_EXCEPTION_GUARD(pRouter = RouterType::create(pService, pzOptions)); + + return pRouter; + } + + static MXS_ROUTER_SESSION* newSession(MXS_ROUTER* pInstance, MXS_SESSION* pSession) + { + RouterType* pRouter = static_cast(pInstance); + RouterSessionType* pRouter_session; + + MXS_EXCEPTION_GUARD(pRouter_session = pRouter->newSession(pSession)); + + return pRouter_session; + } + + static void closeSession(MXS_ROUTER*, MXS_ROUTER_SESSION* pData) + { + RouterSessionType* pRouter_session = static_cast(pData); + + MXS_EXCEPTION_GUARD(pRouter_session->close()); + } + + static void freeSession(MXS_ROUTER*, MXS_ROUTER_SESSION* pData) + { + RouterSessionType* pRouter_session = static_cast(pData); + + MXS_EXCEPTION_GUARD(delete pRouter_session); + } + + static int32_t routeQuery(MXS_ROUTER*, MXS_ROUTER_SESSION* pData, GWBUF* pPacket) + { + RouterSessionType* pRouter_session = static_cast(pData); + + int32_t rv = 0; + MXS_EXCEPTION_GUARD(rv = pRouter_session->routeQuery(pPacket)); + + return rv; + } + + static void diagnostics(MXS_ROUTER* pInstance, DCB* pDcb) + { + RouterType* pRouter = static_cast(pInstance); + + MXS_EXCEPTION_GUARD(pRouter->diagnostics(pDcb)); + } + + static void clientReply(MXS_ROUTER*, MXS_ROUTER_SESSION* pData, GWBUF* pPacket, DCB* pBackend) + { + RouterSessionType* pRouter_session = static_cast(pData); + + MXS_EXCEPTION_GUARD(pRouter_session->clientReply(pPacket, pBackend)); + } + + static void handleError(MXS_ROUTER* pInstance, + MXS_ROUTER_SESSION* pData, + GWBUF* pMessage, + DCB* pProblem, + mxs_error_action_t action, + bool* pSuccess) + { + RouterSessionType* pRouter_session = static_cast(pData); + + MXS_EXCEPTION_GUARD(pRouter_session->handleError(pMessage, pProblem, action, pSuccess)); + } + + static uint64_t getCapabilities(MXS_ROUTER* pInstance) + { + uint64_t rv = 0; + + RouterType* pRouter = static_cast(pInstance); + + MXS_EXCEPTION_GUARD(rv = pRouter->getCapabilities()); + + return rv; + } + + static void destroyInstance(MXS_ROUTER* pInstance) + { + RouterType* pRouter = static_cast(pInstance); + + MXS_EXCEPTION_GUARD(delete pRouter); + } + + static MXS_ROUTER_OBJECT s_object; + +protected: + Router(SERVICE *pService) + : m_pService(pService) + { + } + + SERVICE* m_pService; +}; + + +template +MXS_ROUTER_OBJECT Router::s_object = +{ + &Router::createInstance, + &Router::newSession, + &Router::closeSession, + &Router::freeSession, + &Router::routeQuery, + &Router::diagnostics, + &Router::clientReply, + &Router::handleError, + &Router::getCapabilities, + &Router::destroyInstance, +}; + + +} diff --git a/server/core/CMakeLists.txt b/server/core/CMakeLists.txt index 11ebb97a8..b32196c08 100644 --- a/server/core/CMakeLists.txt +++ b/server/core/CMakeLists.txt @@ -1,4 +1,4 @@ -add_library(maxscale-common SHARED adminusers.c alloc.c authenticator.c atomic.c buffer.c config.c config_runtime.c dcb.c filter.c filter.cc externcmd.c paths.c hashtable.c hint.c housekeeper.c load_utils.c log_manager.cc maxscale_pcre2.c misc.c mlist.c modutil.c monitor.c queuemanager.c query_classifier.c poll.c random_jkiss.c resultset.c secrets.c server.c service.c session.c spinlock.c thread.c users.c utils.c skygw_utils.cc statistics.c listener.c ssl.c mysql_utils.c mysql_binlog.c modulecmd.c ) +add_library(maxscale-common SHARED adminusers.c alloc.c authenticator.c atomic.c buffer.c config.c config_runtime.c dcb.c filter.c filter.cc externcmd.c paths.c hashtable.c hint.c housekeeper.c load_utils.c log_manager.cc maxscale_pcre2.c misc.c mlist.c modutil.c monitor.c queuemanager.c query_classifier.c poll.c random_jkiss.c resultset.c router.cc secrets.c server.c service.c session.c spinlock.c thread.c users.c utils.c skygw_utils.cc statistics.c listener.c ssl.c mysql_utils.c mysql_binlog.c modulecmd.c ) if(WITH_JEMALLOC) target_link_libraries(maxscale-common ${JEMALLOC_LIBRARIES}) diff --git a/server/core/router.cc b/server/core/router.cc new file mode 100644 index 000000000..e189680c5 --- /dev/null +++ b/server/core/router.cc @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2019-07-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#include + +namespace maxscale +{ + +// +// RouterSession +// +RouterSession::RouterSession(MXS_SESSION* pSession) + : m_pSession(pSession) +{ +} + +RouterSession::~RouterSession() +{ +} + +void RouterSession::close() +{ +} + +int32_t RouterSession::routeQuery(GWBUF* pPacket) +{ + return 0; +} + +void RouterSession::clientReply(GWBUF* pPacket, DCB* pBackend) +{ +} + +void RouterSession::handleError(GWBUF* pMessage, + DCB* pProblem, + mxs_error_action_t action, + bool* pSuccess) +{ +} + +} From 4cd9309b30fa6affc75e7d08e71147b0cae8ad4d Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Fri, 3 Mar 2017 15:35:37 +0200 Subject: [PATCH 02/12] Add skeleton hint router The hint router will perform its scheduling solely based upon hints provided by a filter (e.g. namedserverfilter) placed before it. --- server/modules/routing/CMakeLists.txt | 1 + .../modules/routing/hintrouter/CMakeLists.txt | 8 +++ .../modules/routing/hintrouter/hintrouter.cc | 68 +++++++++++++++++++ .../modules/routing/hintrouter/hintrouter.hh | 36 ++++++++++ .../routing/hintrouter/hintroutersession.cc | 55 +++++++++++++++ .../routing/hintrouter/hintroutersession.hh | 45 ++++++++++++ 6 files changed, 213 insertions(+) create mode 100644 server/modules/routing/hintrouter/CMakeLists.txt create mode 100644 server/modules/routing/hintrouter/hintrouter.cc create mode 100644 server/modules/routing/hintrouter/hintrouter.hh create mode 100644 server/modules/routing/hintrouter/hintroutersession.cc create mode 100644 server/modules/routing/hintrouter/hintroutersession.hh diff --git a/server/modules/routing/CMakeLists.txt b/server/modules/routing/CMakeLists.txt index 0060edf8f..b3bfc3ddc 100644 --- a/server/modules/routing/CMakeLists.txt +++ b/server/modules/routing/CMakeLists.txt @@ -7,6 +7,7 @@ endif() add_subdirectory(cli) add_subdirectory(debugcli) +add_subdirectory(hintrouter) add_subdirectory(maxinfo) add_subdirectory(readconnroute) add_subdirectory(readwritesplit) diff --git a/server/modules/routing/hintrouter/CMakeLists.txt b/server/modules/routing/hintrouter/CMakeLists.txt new file mode 100644 index 000000000..3c32e39b9 --- /dev/null +++ b/server/modules/routing/hintrouter/CMakeLists.txt @@ -0,0 +1,8 @@ +add_library(hintrouter SHARED + hintrouter.cc + hintroutersession.cc + ) + +target_link_libraries(hintrouter maxscale-common) +set_target_properties(hintrouter PROPERTIES VERSION "1.0.0") +install_module(hintrouter core) diff --git a/server/modules/routing/hintrouter/hintrouter.cc b/server/modules/routing/hintrouter/hintrouter.cc new file mode 100644 index 000000000..e11e21cef --- /dev/null +++ b/server/modules/routing/hintrouter/hintrouter.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2019-07-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#define MXS_MODULE_NAME "hintrouter" +#include "hintrouter.hh" +#include + + +HintRouter::HintRouter(SERVICE* pService) + : maxscale::Router(pService) +{ + MXS_NOTICE("Hint router [%s] created.", pService->name); +} + + +//static +HintRouter* HintRouter::create(SERVICE* pService, char** pzOptions) +{ + return new HintRouter(pService); +} + + +HintRouterSession* HintRouter::newSession(MXS_SESSION *pSession) +{ + return new HintRouterSession(pSession, this); +} + +void HintRouter::diagnostics(DCB* pOut) +{ +} + +uint64_t HintRouter::getCapabilities() +{ + return 0; +} + + +extern "C" MXS_MODULE* MXS_CREATE_MODULE() +{ + static MXS_MODULE module = + { + MXS_MODULE_API_ROUTER, /* Module type */ + MXS_MODULE_BETA_RELEASE, /* Release status */ + MXS_ROUTER_VERSION, /* Implemented module API version */ + "A hint router", /* Description */ + "V1.0.0", /* Module version */ + &HintRouter::s_object, + NULL, /* Process init, can be null */ + NULL, /* Process finish, can be null */ + NULL, /* Thread init */ + NULL, /* Thread finish */ + { + {MXS_END_MODULE_PARAMS} + } + }; + + return &module; +} diff --git a/server/modules/routing/hintrouter/hintrouter.hh b/server/modules/routing/hintrouter/hintrouter.hh new file mode 100644 index 000000000..f0d1314b4 --- /dev/null +++ b/server/modules/routing/hintrouter/hintrouter.hh @@ -0,0 +1,36 @@ +#pragma once +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2019-07-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#include +#include +#include "hintroutersession.hh" + +class HintRouter : public maxscale::Router +{ +public: + static HintRouter* create(SERVICE* pService, char** pzOptions); + + HintRouterSession* newSession(MXS_SESSION *pSession); + + void diagnostics(DCB* pOut); + + uint64_t getCapabilities(); + +private: + HintRouter(SERVICE* pService); + +private: + HintRouter(const HintRouter&); + HintRouter& operator = (const HintRouter&); +}; diff --git a/server/modules/routing/hintrouter/hintroutersession.cc b/server/modules/routing/hintrouter/hintroutersession.cc new file mode 100644 index 000000000..542212cfa --- /dev/null +++ b/server/modules/routing/hintrouter/hintroutersession.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2019-07-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#define MXS_MODULE_NAME "hintrouter" +#include "hintroutersession.hh" +#include + + +HintRouterSession::HintRouterSession(MXS_SESSION* pSession, HintRouter* pRouter) + : maxscale::RouterSession(pSession) + , m_pRouter(pRouter) +{ +} + + +HintRouterSession::~HintRouterSession() +{ +} + + +void HintRouterSession::close() +{ +} + + +int32_t HintRouterSession::routeQuery(GWBUF* pPacket) +{ + MXS_ERROR("routeQuery not implemented yet."); + return 0; +} + + +void HintRouterSession::clientReply(GWBUF* pPacket, DCB* pBackend) +{ + MXS_ERROR("clientReply not implemented yet."); +} + + +void HintRouterSession::handleError(GWBUF* pMessage, + DCB* pProblem, + mxs_error_action_t action, + bool* pSuccess) +{ + MXS_ERROR("handleError not implemented yet."); +} diff --git a/server/modules/routing/hintrouter/hintroutersession.hh b/server/modules/routing/hintrouter/hintroutersession.hh new file mode 100644 index 000000000..43eeae83e --- /dev/null +++ b/server/modules/routing/hintrouter/hintroutersession.hh @@ -0,0 +1,45 @@ +#pragma once +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2019-07-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#include +#include + +class HintRouter; + +class HintRouterSession : public maxscale::RouterSession +{ +public: + HintRouterSession(MXS_SESSION* pSession, + HintRouter* pRouter); + + ~HintRouterSession(); + + void close(); + + int32_t routeQuery(GWBUF* pPacket); + + void clientReply(GWBUF* pPacket, DCB* pBackend); + + void handleError(GWBUF* pMessage, + DCB* pProblem, + mxs_error_action_t action, + bool* pSuccess); + +private: + HintRouterSession(const HintRouterSession&); + HintRouterSession& operator = (const HintRouterSession&); + +private: + HintRouter* m_pRouter; +}; From 09ef292283311483354e6cd628318a5cafd96b2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Thu, 2 Mar 2017 20:15:29 +0200 Subject: [PATCH 03/12] Update dbfwfilter build configuration The filter now uses a more recent syntax to declare the function name prefixes. This removes a build warning. --- server/modules/filter/dbfwfilter/ruleparser.y | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/modules/filter/dbfwfilter/ruleparser.y b/server/modules/filter/dbfwfilter/ruleparser.y index 33ad53cfe..ba65ed381 100644 --- a/server/modules/filter/dbfwfilter/ruleparser.y +++ b/server/modules/filter/dbfwfilter/ruleparser.y @@ -27,7 +27,7 @@ %pure-parser /** Prefix all functions */ -%name-prefix="dbfw_yy" +%name-prefix "dbfw_yy" /** The pure parser requires one extra parameter */ %parse-param {void* scanner} From 0581ab1afeb118e80dcfb48603e24984d9bbb4e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Fri, 3 Mar 2017 10:22:03 +0200 Subject: [PATCH 04/12] Extend `failover` documentation The documentation now explains more clearly why the parameter is needed and how it performs the checks. --- Documentation/Monitors/MySQL-Monitor.md | 46 ++++++++++++++++++------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/Documentation/Monitors/MySQL-Monitor.md b/Documentation/Monitors/MySQL-Monitor.md index 45e519ff1..942f6526e 100644 --- a/Documentation/Monitors/MySQL-Monitor.md +++ b/Documentation/Monitors/MySQL-Monitor.md @@ -130,22 +130,42 @@ new master. Normally this is done by using an external agent of some sort [MariaDB Replication Manager](https://github.com/tanji/replication-manager) or [MHA](https://code.google.com/p/mysql-master-ha/). -The failover mode in mysqlmon is completely passive in the sense that it does -not modify the cluster or any servers in it. It labels a slave server as a -master server when there is only one running server. Before a failover can be -initiated, the following conditions must have been met: +When the number of running servers in the cluster drops down to one, MaxScale +cannot be absolutely certain whether the last remaining server is a master or a +slave. At this point, MaxScale will try to deduce the type of the server by +looking at the system variables of the server in question. + +By default, MaxScale will only attempt to deduce if the server can be used as a +slave server (controlled by the `detect_stale_slave` parameter). When the +`failover` mode is enabled, MaxScale will also attempt to deduce whether the +server can be used as a master server. This is done by checking that the server +is not in read-only mode and that it is not configured as a slave. + +The failover mode in mysqlmon is completely passive in the sense that it does +not modify the cluster or any of the servers in it. It only labels the last +remaining server in a cluster as the master server. + +Before a failover can be initiated, the following conditions must have been met: + +- Previous attempts to connect to other servers in the cluster have failed, + controlled by the `failcount` parameter -- The monitor has repeatedly failed to connect to the failed servers - There is only one running server among the monitored servers -- @@read_only is not enabled on the last running server + +- The value of the `@@read_only` system variable is set to `OFF` + +In 2.1.1, the following additional condition was added: + - The last running server is not configured as a slave -When these conditions are met, the monitor assigns the last remaining server the -master status and puts all other servers into maintenance mode. This is done to -prevent accidental use of the failed servers if they came back online. +When these conditions are met, the monitor will label the last remaining server +as a master. -When the failed servers come back up, the maintenance mode needs to be manually -cleared once replication has been set up. +If the value of the `failover_recovery` parameter is set to false, the monitor +sets all other servers into maintenance mode. This is done to prevent accidental +use of the failed servers if they came back online. If the failed servers come +back up, the maintenance mode needs to be manually cleared once replication has +been set up. **Note**: A failover will cause permanent changes in the data of the promoted server. Only use this feature if you know that the slave servers are capable @@ -156,9 +176,9 @@ cleared once replication has been set up. Number of failures that must occur on all failed servers before a failover is initiated. The default value is 5 failures. -The monitor will attemt to contact all servers once per monitoring cycle. When +The monitor will attempt to contact all servers once per monitoring cycle. When _failover_ mode is enabled, all of the failed servers must fail _failcount_ -number of connection attemps before a failover is initiated. +number of connection attempts before a failover is initiated. The formula for calculating the actual number of milliseconds before failover can start is `monitor_interval * failcount`. This means that to trigger a From 916cb4df0863c0b9da8dd502ed70db4021de12cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Fri, 3 Mar 2017 10:46:37 +0200 Subject: [PATCH 05/12] Rename `failover` and `failover_recovery` The names of the parameters were misleading as MaxScale doesn't perform the actual failover but only detects if one has been done. --- Documentation/Monitors/MySQL-Monitor.md | 64 ++++++++++----------- server/modules/monitor/mysqlmon.h | 4 +- server/modules/monitor/mysqlmon/mysql_mon.c | 14 ++--- 3 files changed, 41 insertions(+), 41 deletions(-) diff --git a/Documentation/Monitors/MySQL-Monitor.md b/Documentation/Monitors/MySQL-Monitor.md index 942f6526e..30490924b 100644 --- a/Documentation/Monitors/MySQL-Monitor.md +++ b/Documentation/Monitors/MySQL-Monitor.md @@ -119,9 +119,10 @@ This functionality is similar to the [Multi-Master Monitor](MM-Monitor.md) functionality. The only difference is that the MySQL monitor will also detect traditional Master-Slave topologies. -### `failover` +### `detect_standalone_master` -Failover mode. This feature takes a boolean parameter is disabled by default. +Detect standalone master servers. This feature takes a boolean parameter and is +disabled by default. In MaxScale 2.1.0, this parameter was called `failover`. This parameter is intended to be used with simple, two node master-slave pairs where the failure of the master can be resolved by "promoting" the slave as the @@ -137,15 +138,16 @@ looking at the system variables of the server in question. By default, MaxScale will only attempt to deduce if the server can be used as a slave server (controlled by the `detect_stale_slave` parameter). When the -`failover` mode is enabled, MaxScale will also attempt to deduce whether the -server can be used as a master server. This is done by checking that the server -is not in read-only mode and that it is not configured as a slave. +`detect_standalone_master` mode is enabled, MaxScale will also attempt to deduce +whether the server can be used as a master server. This is done by checking that +the server is not in read-only mode and that it is not configured as a slave. -The failover mode in mysqlmon is completely passive in the sense that it does -not modify the cluster or any of the servers in it. It only labels the last -remaining server in a cluster as the master server. +This mode in mysqlmon is completely passive in the sense that it does not modify +the cluster or any of the servers in it. It only labels the last remaining +server in a cluster as the master server. -Before a failover can be initiated, the following conditions must have been met: +Before a server is labeled as a standalone master, the following conditions must +have been met: - Previous attempts to connect to other servers in the cluster have failed, controlled by the `failcount` parameter @@ -158,10 +160,7 @@ In 2.1.1, the following additional condition was added: - The last running server is not configured as a slave -When these conditions are met, the monitor will label the last remaining server -as a master. - -If the value of the `failover_recovery` parameter is set to false, the monitor +If the value of the `allow_cluster_recovery` parameter is set to false, the monitor sets all other servers into maintenance mode. This is done to prevent accidental use of the failed servers if they came back online. If the failed servers come back up, the maintenance mode needs to be manually cleared once replication has @@ -173,32 +172,33 @@ been set up. ### `failcount` -Number of failures that must occur on all failed servers before a failover is -initiated. The default value is 5 failures. +Number of failures that must occur on all failed servers before a standalone +server is labeled as a master. The default value is 5 failures. The monitor will attempt to contact all servers once per monitoring cycle. When -_failover_ mode is enabled, all of the failed servers must fail _failcount_ -number of connection attempts before a failover is initiated. +`detect_standalone_master` is enabled, all of the failed servers must fail +_failcount_ number of connection attempts before the last server is labeled as +the master. -The formula for calculating the actual number of milliseconds before failover -can start is `monitor_interval * failcount`. This means that to trigger a -failover after 10 seconds of master failure with a _monitor_interval_ of 1000 -milliseconds, the value of _failcount_ must be 10. +The formula for calculating the actual number of milliseconds before the server +is labeled as the master is `monitor_interval * failcount`. -### `failover_recovery` +### `allow_cluster_recovery` -Allow recovery after failover. This feature takes a boolean parameter is -enabled by default. +Allow recovery after the cluster has dropped down to one server. This feature +takes a boolean parameter is enabled by default. This parameter requires that +`detect_standalone_master` is set to true. In MaxScale 2.1.0, this parameter was +called `failover_recovery`. -When this parameter is disabled, if a failover has been triggered and the last -remaining server is chosen as the master, the monitor will set all of the failed -servers into maintenance mode. When this option is enabled, the failed servers -are allowed to rejoin the cluster. +When this parameter is disabled, if the last remaining server is labeled as the +master, the monitor will set all of the failed servers into maintenance +mode. When this option is enabled, the failed servers are allowed to rejoin the +cluster. -This option should be enabled when failover in MaxScale is used in conjunction -with an external agent that resets the slave status for new master servers. One -of these agents is the _replication-manager_ which clears the slave -configuration for each new master and removes the read-only mode. +This option should be enabled only when MaxScale is used in conjunction with an +external agent that automatically reintegrates failed servers into the +cluster. One of these agents is the _replication-manager_ which automatically +configures the failed servers as new slaves of the current master. ## Example 1 - Monitor script diff --git a/server/modules/monitor/mysqlmon.h b/server/modules/monitor/mysqlmon.h index eca4b6b83..ae000150d 100644 --- a/server/modules/monitor/mysqlmon.h +++ b/server/modules/monitor/mysqlmon.h @@ -74,10 +74,10 @@ typedef struct char* script; /*< Script to call when state changes occur on servers */ uint64_t events; /*< enabled events */ HASHTABLE *server_info; /**< Contains server specific information */ - bool failover; /**< If simple failover is enabled */ + bool detect_standalone_master; /**< If standalone master are detected */ int failcount; /**< How many monitoring cycles servers must be down before failover is initiated */ - bool failover_recovery; /**< Allow servers to rejoin the cluster in failover mode */ + bool allow_cluster_recovery; /**< Allow failed servers to rejoin the cluster */ bool warn_failover; /**< Log a warning when failover happens */ } MYSQL_MONITOR; diff --git a/server/modules/monitor/mysqlmon/mysql_mon.c b/server/modules/monitor/mysqlmon/mysql_mon.c index 8d1b01901..4e429be09 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.c +++ b/server/modules/monitor/mysqlmon/mysql_mon.c @@ -125,9 +125,9 @@ MXS_MODULE* MXS_CREATE_MODULE() {"detect_stale_slave", MXS_MODULE_PARAM_BOOL, "true"}, {"mysql51_replication", MXS_MODULE_PARAM_BOOL, "false"}, {"multimaster", MXS_MODULE_PARAM_BOOL, "false"}, - {"failover", MXS_MODULE_PARAM_BOOL, "false"}, + {"detect_standalone_master", MXS_MODULE_PARAM_BOOL, "false"}, {"failcount", MXS_MODULE_PARAM_COUNT, "5"}, - {"failover_recovery", MXS_MODULE_PARAM_BOOL, "true"}, + {"allow_cluster_recovery", MXS_MODULE_PARAM_BOOL, "true"}, { "script", MXS_MODULE_PARAM_PATH, @@ -279,9 +279,9 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params) handle->detectStaleSlave = config_get_bool(params, "detect_stale_slave"); handle->replicationHeartbeat = config_get_bool(params, "detect_replication_lag"); handle->multimaster = config_get_bool(params, "multimaster"); - handle->failover = config_get_bool(params, "failover"); + handle->detect_standalone_master = config_get_bool(params, "detect_standalone_master"); handle->failcount = config_get_integer(params, "failcount"); - handle->failover_recovery = config_get_bool(params, "failover_recovery"); + handle->allow_cluster_recovery = config_get_bool(params, "allow_cluster_recovery"); handle->mysql51_replication = config_get_bool(params, "mysql51_replication"); handle->script = config_copy_string(params, "script"); handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values); @@ -1010,7 +1010,7 @@ void do_failover(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *db) { MXS_WARNING("Failover initiated, server '%s' is now the master.%s", db->server->unique_name, - handle->failover_recovery ? + handle->allow_cluster_recovery ? "" : " All other servers are set into maintenance mode."); handle->warn_failover = false; } @@ -1019,7 +1019,7 @@ void do_failover(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *db) monitor_set_pending_status(db, SERVER_MASTER); monitor_clear_pending_status(db, SERVER_SLAVE); } - else if (!handle->failover_recovery) + else if (!handle->allow_cluster_recovery) { server_set_status_nolock(db->server, SERVER_MAINT); monitor_set_pending_status(db, SERVER_MAINT); @@ -1298,7 +1298,7 @@ monitorMain(void *arg) /** Now that all servers have their status correctly set, we can check if we need to do a failover */ - if (handle->failover) + if (handle->detect_standalone_master) { if (failover_required(handle, mon->databases)) { From dadc0d6a9dd58216f878cb3520404e000c755379 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Sat, 4 Mar 2017 00:26:51 +0200 Subject: [PATCH 06/12] Fix DATETIME and BLOB processing The old DATETIME format wasn't processed properly which caused a corruption of following events. A BLOB type value could be non-NULL but still have no data. In this case, the value should be stored as a null Avro value. --- server/core/mysql_binlog.c | 22 ++++++++++++++++------ server/modules/routing/avro/avro_rbr.c | 15 +++++++++++---- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/server/core/mysql_binlog.c b/server/core/mysql_binlog.c index 5e0539679..ddf5825c0 100644 --- a/server/core/mysql_binlog.c +++ b/server/core/mysql_binlog.c @@ -98,6 +98,7 @@ const char* column_type_to_string(uint8_t type) case TABLE_COL_TYPE_GEOMETRY: return "GEOMETRY"; default: + ss_dassert(false); break; } return "UNKNOWN"; @@ -216,7 +217,6 @@ static void unpack_year(uint8_t *ptr, struct tm *dest) dest->tm_year = *ptr; } -#ifdef USE_OLD_DATETIME /** * @brief Unpack a DATETIME * @@ -225,8 +225,10 @@ static void unpack_year(uint8_t *ptr, struct tm *dest) * @param val Value read from the binary log * @param dest Pointer where the unpacked value is stored */ -static void unpack_datetime(uint8_t *ptr, uint8_t decimals, struct tm *dest) +static void unpack_datetime(uint8_t *ptr, struct tm *dest) { + uint64_t val = 0; + memcpy(&val, ptr, sizeof(val)); uint32_t second = val - ((val / 100) * 100); val /= 100; uint32_t minute = val - ((val / 100) * 100); @@ -241,13 +243,12 @@ static void unpack_datetime(uint8_t *ptr, uint8_t decimals, struct tm *dest) memset(dest, 0, sizeof(struct tm)); dest->tm_year = year - 1900; - dest->tm_mon = month; + dest->tm_mon = month - 1; dest->tm_mday = day; dest->tm_hour = hour; dest->tm_min = minute; dest->tm_sec = second; } -#endif /** * Unpack a 5 byte reverse byte order value @@ -413,6 +414,8 @@ static size_t temporal_field_size(uint8_t type, uint8_t decimals) return 3 + ((decimals + 1) / 2); case TABLE_COL_TYPE_DATETIME: + return 8; + case TABLE_COL_TYPE_TIMESTAMP: return 4; @@ -448,8 +451,7 @@ size_t unpack_temporal_value(uint8_t type, uint8_t *ptr, uint8_t *metadata, stru break; case TABLE_COL_TYPE_DATETIME: - // This is not used with MariaDB RBR - //unpack_datetime(ptr, *metadata, tm); + unpack_datetime(ptr, tm); break; case TABLE_COL_TYPE_DATETIME2: @@ -468,6 +470,10 @@ size_t unpack_temporal_value(uint8_t type, uint8_t *ptr, uint8_t *metadata, stru case TABLE_COL_TYPE_TIMESTAMP2: unpack_timestamp(ptr, *metadata, tm); break; + + default: + ss_dassert(false); + break; } return temporal_field_size(type, *metadata); } @@ -597,6 +603,10 @@ static uint64_t unpack_bytes(uint8_t *ptr, size_t bytes) ((uint64_t)ptr[3] << 32) | ((uint64_t)ptr[2] << 40) | ((uint64_t)ptr[1] << 48) | ((uint64_t)ptr[0] << 56); break; + + default: + ss_dassert(false); + break; } return val; diff --git a/server/modules/routing/avro/avro_rbr.c b/server/modules/routing/avro/avro_rbr.c index 3ac3cf8d5..7390a6dc0 100644 --- a/server/modules/routing/avro/avro_rbr.c +++ b/server/modules/routing/avro/avro_rbr.c @@ -271,7 +271,6 @@ bool handle_row_event(AVRO_INSTANCE *router, REP_HEADER *hdr, uint8_t *ptr) /** There should always be a table map event prior to a row event. * TODO: Make the active_maps dynamic */ TABLE_MAP *map = router->active_maps[table_id % sizeof(router->active_maps)]; - ss_dassert(map); if (map) { @@ -289,10 +288,11 @@ bool handle_row_event(AVRO_INSTANCE *router, REP_HEADER *hdr, uint8_t *ptr) * beforehand so we must continue processing them until we reach the end * of the event. */ int rows = 0; + while (ptr - start < hdr->event_size - BINLOG_EVENT_HDR_LEN) { /** Add the current GTID and timestamp */ - uint8_t *end = ptr + hdr->event_size; + uint8_t *end = ptr + hdr->event_size - BINLOG_EVENT_HDR_LEN; int event_type = get_event_type(hdr->event_type); prepare_record(router, hdr, event_type, &record); ptr = process_row_event_data(map, create, &record, ptr, col_present, end); @@ -597,8 +597,15 @@ uint8_t* process_row_event_data(TABLE_MAP *map, TABLE_CREATE *create, avro_value uint64_t len = 0; memcpy(&len, ptr, bytes); ptr += bytes; - avro_value_set_bytes(&field, ptr, len); - ptr += len; + if (len) + { + avro_value_set_bytes(&field, ptr, len); + ptr += len; + } + else + { + avro_value_set_null(&field); + } ss_dassert(ptr < end); } else if (column_is_temporal(map->column_types[i])) From 09df0acb008a8e1e3d1451b3b5294a45c9dae520 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Sat, 4 Mar 2017 00:31:07 +0200 Subject: [PATCH 07/12] Fix binlog rotation detection The rotations of binlogs weren't detected as the file names weren't compared. Moved the indexing of the binlogs to the end of the binlog processing. This way the files can be flushed multiple times before they are indexed. --- server/modules/routing/avro/avro.c | 8 +++++++- server/modules/routing/avro/avro_file.c | 3 --- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/server/modules/routing/avro/avro.c b/server/modules/routing/avro/avro.c index 398549817..8a7cfd65b 100644 --- a/server/modules/routing/avro/avro.c +++ b/server/modules/routing/avro/avro.c @@ -1000,14 +1000,20 @@ void converter_func(void* data) while (ok && binlog_end == AVRO_OK) { uint64_t start_pos = router->current_pos; + char binlog_name[BINLOG_FNAMELEN + 1]; + strcpy(binlog_name, router->binlog_name); + if (avro_open_binlog(router->binlogdir, router->binlog_name, &router->binlog_fd)) { binlog_end = avro_read_all_events(router); - if (router->current_pos != start_pos) + if (router->current_pos != start_pos || strcmp(binlog_name, router->binlog_name) != 0) { /** We processed some data, reset the conversion task delay */ router->task_delay = 1; + + /** Update the GTID index */ + avro_update_index(router); } avro_close_binlog(router->binlog_fd); diff --git a/server/modules/routing/avro/avro_file.c b/server/modules/routing/avro/avro_file.c index c1d65b1b0..cfa9e66cd 100644 --- a/server/modules/routing/avro/avro_file.c +++ b/server/modules/routing/avro/avro_file.c @@ -867,9 +867,6 @@ void avro_flush_all_tables(AVRO_INSTANCE *router) } hashtable_iterator_free(iter); } - - /** Update the GTID index */ - avro_update_index(router); } /** From f2fc9b9d9f32596364f1eda639e9f837d8a9f050 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Sat, 4 Mar 2017 10:08:52 +0200 Subject: [PATCH 08/12] Add workaround for null value handling in Avro C API The Avro C API fails to write bytes of size zero. A workaround is to write a single zero byte for each NULL field of type bytes. Also added an option to configure the Avro block size in case very large records are written. --- Documentation/Routers/Avrorouter.md | 5 +++++ server/modules/include/avrorouter.h | 3 ++- server/modules/routing/avro/avro.c | 5 +++++ server/modules/routing/avro/avro_file.c | 4 ++-- server/modules/routing/avro/avro_rbr.c | 30 ++++++++++++++++++++----- 5 files changed, 38 insertions(+), 9 deletions(-) diff --git a/Documentation/Routers/Avrorouter.md b/Documentation/Routers/Avrorouter.md index f9e376ab7..ad58d3105 100644 --- a/Documentation/Routers/Avrorouter.md +++ b/Documentation/Routers/Avrorouter.md @@ -142,6 +142,11 @@ data block. The default value is 1 transaction. Controls the number of row events that are grouped into a single Avro data block. The default value is 1000 row events. +#### `block_size` + +The Avro data block size in bytes. The default is 16 kilobytes. Increase this +value if individual events in the binary logs are very large. + # Files Created by the Avrorouter The avrorouter creates two files in the location pointed by _avrodir_: diff --git a/server/modules/include/avrorouter.h b/server/modules/include/avrorouter.h index b853ca57e..a233915b4 100644 --- a/server/modules/include/avrorouter.h +++ b/server/modules/include/avrorouter.h @@ -261,6 +261,7 @@ typedef struct avro_instance uint64_t row_count; /*< Row events processed */ uint64_t row_target; /*< Minimum about of row events that will trigger * a flush of all tables */ + uint64_t block_size; /**< Avro datablock size */ struct avro_instance *next; } AVRO_INSTANCE; @@ -278,7 +279,7 @@ extern void avro_client_rotate(AVRO_INSTANCE *router, AVRO_CLIENT *client, uint8 extern bool avro_open_binlog(const char *binlogdir, const char *file, int *fd); extern void avro_close_binlog(int fd); extern avro_binlog_end_t avro_read_all_events(AVRO_INSTANCE *router); -extern AVRO_TABLE* avro_table_alloc(const char* filepath, const char* json_schema); +extern AVRO_TABLE* avro_table_alloc(const char* filepath, const char* json_schema, size_t block_size); extern void* avro_table_free(AVRO_TABLE *table); extern void avro_flush_all_tables(AVRO_INSTANCE *router); extern char* json_new_schema_from_table(TABLE_MAP *map); diff --git a/server/modules/routing/avro/avro.c b/server/modules/routing/avro/avro.c index 8a7cfd65b..3306b4052 100644 --- a/server/modules/routing/avro/avro.c +++ b/server/modules/routing/avro/avro.c @@ -332,6 +332,7 @@ createInstance(SERVICE *service, char **options) inst->trx_count = 0; inst->row_target = AVRO_DEFAULT_BLOCK_ROW_COUNT; inst->trx_target = AVRO_DEFAULT_BLOCK_TRX_COUNT; + inst->block_size = 0; int first_file = 1; bool err = false; @@ -402,6 +403,10 @@ createInstance(SERVICE *service, char **options) { first_file = MAX(1, atoi(value)); } + else if (strcmp(options[i], "block_size") == 0) + { + inst->block_size = atoi(value); + } else { MXS_WARNING("[avrorouter] Unknown router option: '%s'", options[i]); diff --git a/server/modules/routing/avro/avro_file.c b/server/modules/routing/avro/avro_file.c index cfa9e66cd..41ef968f3 100644 --- a/server/modules/routing/avro/avro_file.c +++ b/server/modules/routing/avro/avro_file.c @@ -105,7 +105,7 @@ void avro_close_binlog(int fd) * @param filepath Path to the created file * @param json_schema The schema of the table in JSON format */ -AVRO_TABLE* avro_table_alloc(const char* filepath, const char* json_schema) +AVRO_TABLE* avro_table_alloc(const char* filepath, const char* json_schema, size_t block_size) { AVRO_TABLE *table = calloc(1, sizeof(AVRO_TABLE)); if (table) @@ -126,7 +126,7 @@ AVRO_TABLE* avro_table_alloc(const char* filepath, const char* json_schema) } else { - rc = avro_file_writer_create(filepath, table->avro_schema, &table->avro_file); + rc = avro_file_writer_create_with_codec(filepath, table->avro_schema, &table->avro_file, "null", block_size); } if (rc) diff --git a/server/modules/routing/avro/avro_rbr.c b/server/modules/routing/avro/avro_rbr.c index 7390a6dc0..feb4058ff 100644 --- a/server/modules/routing/avro/avro_rbr.c +++ b/server/modules/routing/avro/avro_rbr.c @@ -104,7 +104,7 @@ bool handle_table_map_event(AVRO_INSTANCE *router, REP_HEADER *hdr, uint8_t *ptr /** Close the file and open a new one */ hashtable_delete(router->open_tables, table_ident); - AVRO_TABLE *avro_table = avro_table_alloc(filepath, json_schema); + AVRO_TABLE *avro_table = avro_table_alloc(filepath, json_schema, router->block_size); if (avro_table) { @@ -296,7 +296,11 @@ bool handle_row_event(AVRO_INSTANCE *router, REP_HEADER *hdr, uint8_t *ptr) int event_type = get_event_type(hdr->event_type); prepare_record(router, hdr, event_type, &record); ptr = process_row_event_data(map, create, &record, ptr, col_present, end); - avro_file_writer_append_value(table->avro_file, &record); + if (avro_file_writer_append_value(table->avro_file, &record)) + { + MXS_ERROR("Failed to write value at position %ld: %s", + router->current_pos, avro_strerror()); + } /** Update rows events have the before and after images of the * affected rows so we'll process them as another record with @@ -305,7 +309,11 @@ bool handle_row_event(AVRO_INSTANCE *router, REP_HEADER *hdr, uint8_t *ptr) { prepare_record(router, hdr, UPDATE_EVENT_AFTER, &record); ptr = process_row_event_data(map, create, &record, ptr, col_present, end); - avro_file_writer_append_value(table->avro_file, &record); + if (avro_file_writer_append_value(table->avro_file, &record)) + { + MXS_ERROR("Failed to write value at position %ld: %s", + router->current_pos, avro_strerror()); + } } rows++; @@ -501,14 +509,23 @@ uint8_t* process_row_event_data(TABLE_MAP *map, TABLE_CREATE *create, avro_value for (long i = 0; i < map->columns && npresent < ncolumns; i++) { ss_dassert(create->columns == map->columns); - avro_value_get_by_name(record, create->column_names[i], &field, NULL); + ss_debug(int rc = )avro_value_get_by_name(record, create->column_names[i], &field, NULL); + ss_dassert(rc == 0); if (bit_is_set(columns_present, ncolumns, i)) { npresent++; if (bit_is_set(null_bitmap, ncolumns, i)) { - avro_value_set_null(&field); + if (column_is_blob(map->column_types[i])) + { + uint8_t nullvalue = 0; + avro_value_set_bytes(&field, &nullvalue, 1); + } + else + { + avro_value_set_null(&field); + } } else if (column_is_fixed_string(map->column_types[i])) { @@ -604,7 +621,8 @@ uint8_t* process_row_event_data(TABLE_MAP *map, TABLE_CREATE *create, avro_value } else { - avro_value_set_null(&field); + uint8_t nullvalue = 0; + avro_value_set_bytes(&field, &nullvalue, 1); } ss_dassert(ptr < end); } From e7b5731976834801051f4e26792260635f79db4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Mon, 6 Mar 2017 11:06:33 +0200 Subject: [PATCH 09/12] Fix internal test suite Two of the tests didn't initialize the random number generator. --- server/core/test/testlogthrottling.cc | 3 ++- server/core/test/testqueuemanager.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/server/core/test/testlogthrottling.cc b/server/core/test/testlogthrottling.cc index cd398f5a4..de78bdff7 100644 --- a/server/core/test/testlogthrottling.cc +++ b/server/core/test/testlogthrottling.cc @@ -20,6 +20,7 @@ #include #include #include +#include using std::cerr; using std::cout; @@ -152,7 +153,7 @@ int main(int argc, char* argv[]) int rc; std::ios::sync_with_stdio(); - + random_jkiss_init(); rc = sem_init(&u_semstart, 0, 0); ensure(rc == 0); diff --git a/server/core/test/testqueuemanager.c b/server/core/test/testqueuemanager.c index a11955b06..ce132565e 100644 --- a/server/core/test/testqueuemanager.c +++ b/server/core/test/testqueuemanager.c @@ -66,6 +66,7 @@ test1() int input_counter = 0; int output_counter = 0; + random_jkiss_init(); hkheartbeat = 0; queue = mxs_queue_alloc(TEST_QUEUE_SIZE, HEARTBEATS_TO_EXPIRE); From a3cae79fdf351a344172763651ac8375bed598e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Tue, 7 Mar 2017 08:07:06 +0200 Subject: [PATCH 10/12] Enable JIT support for PCRE2 The bundled PCRE2 library was built without JIT support. --- cmake/BuildPCRE2.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/BuildPCRE2.cmake b/cmake/BuildPCRE2.cmake index 57216730d..895b7f479 100644 --- a/cmake/BuildPCRE2.cmake +++ b/cmake/BuildPCRE2.cmake @@ -6,7 +6,7 @@ # to the CMakeLists.txt. You don't need to link against the pcre2 library # because the static symbols will be in MaxScale. ExternalProject_Add(pcre2 SOURCE_DIR ${CMAKE_SOURCE_DIR}/pcre2/ - CMAKE_ARGS -DCMAKE_C_FLAGS=-fPIC -DBUILD_SHARED_LIBS=N -DPCRE2_BUILD_PCRE2GREP=N -DPCRE2_BUILD_TESTS=N + CMAKE_ARGS -DCMAKE_C_FLAGS=-fPIC -DBUILD_SHARED_LIBS=N -DPCRE2_BUILD_PCRE2GREP=N -DPCRE2_BUILD_TESTS=N -DPCRE2_SUPPORT_JIT=Y BINARY_DIR ${CMAKE_BINARY_DIR}/pcre2/ BUILD_COMMAND make INSTALL_COMMAND "") From 29ece502f572b6a94a185ef8e13e8926a15e7a81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Sun, 5 Mar 2017 10:28:07 +0200 Subject: [PATCH 11/12] Minor refactoring of error handling Refactored common code into a reusable function. Now all of the backend error handling uses the same function. Moved responsibility of the DCB error handling tracking to the backend protocol. The routers no longer need to manage the `dcb->dcb_errhandle_called` variable of the failed DCB. Removed calls to handleError with client DCBs as parameters. All of the routers close the DCB given to handleError if it is a client DCB. The only time the error handler would be called is when the routeQuery function fails. The handleError call is redundant as the router already knows that the session should be closed when it returns from routeQuery. --- .../MySQL/MySQLBackend/mysql_backend.c | 248 ++++-------------- .../protocol/MySQL/MySQLClient/mysql_client.c | 31 +-- 2 files changed, 58 insertions(+), 221 deletions(-) diff --git a/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c b/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c index bedd8eb0e..6080ae7cc 100644 --- a/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c +++ b/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c @@ -602,6 +602,34 @@ gw_read_backend_event(DCB *dcb) return rc; } +static void do_handle_error(DCB *dcb, mxs_error_action_t action, const char *errmsg) +{ + bool succp = false; + MXS_SESSION *session = dcb->session; + + if (!dcb->dcb_errhandle_called) + { + GWBUF *errbuf = mysql_create_custom_error(1, 0, errmsg); + void *rsession = session->router_session; + MXS_ROUTER_OBJECT *router = session->service->router; + MXS_ROUTER *router_instance = session->service->router_instance; + + router->handleError(router_instance, rsession, errbuf, + dcb, action, &succp); + + gwbuf_free(errbuf); + dcb->dcb_errhandle_called = true; + } + /** + * If error handler fails it means that routing session can't continue + * and it must be closed. In success, only this DCB is closed. + */ + if (!succp) + { + session->state = SESSION_STATE_STOPPING; + } +} + /** * @brief Authentication of backend - read the reply, or handle an error * @@ -609,43 +637,21 @@ gw_read_backend_event(DCB *dcb) * @param local_session The current MySQL session data structure * @return */ -static void -gw_reply_on_error(DCB *dcb, mxs_auth_state_t state) +static void gw_reply_on_error(DCB *dcb, mxs_auth_state_t state) { MXS_SESSION *session = dcb->session; CHK_SESSION(session); - /* Only reload the users table if authentication failed and the - * client session is not stopping. It is possible that authentication - * fails because the client has closed the connection before all - * backends have done authentication. */ - if (state == MXS_AUTH_STATE_FAILED && session->state != SESSION_STATE_STOPPING) - { - service_refresh_users(session->service); - } - - GWBUF* errbuf = mysql_create_custom_error(1, 0, "Authentication with backend " - "failed. Session will be closed."); - if (session->router_session) { - bool succp = false; - - session->service->router->handleError(session->service->router_instance, - session->router_session, - errbuf, dcb, ERRACT_REPLY_CLIENT, &succp); - + do_handle_error(dcb, ERRACT_REPLY_CLIENT, + "Authentication with backend failed. Session will be closed."); session->state = SESSION_STATE_STOPPING; - ss_dassert(dcb->dcb_errhandle_called); } else { - /** A NULL router_session is valid if a router declares the - * RCAP_TYPE_NO_RSESSION capability flag */ dcb->dcb_errhandle_called = true; } - - gwbuf_free(errbuf); } /** @@ -712,28 +718,7 @@ gw_read_and_write(DCB *dcb) if (return_code < 0) { - GWBUF* errbuf; - bool succp; -#if defined(SS_DEBUG) - MXS_ERROR("Backend read error handling #2."); -#endif - errbuf = mysql_create_custom_error(1, - 0, - "Read from backend failed"); - - session->service->router->handleError( - session->service->router_instance, - session->router_session, - errbuf, - dcb, - ERRACT_NEW_CONNECTION, - &succp); - gwbuf_free(errbuf); - - if (!succp) - { - session->state = SESSION_STATE_STOPPING; - } + do_handle_error(dcb, ERRACT_NEW_CONNECTION, "Read from backend failed"); return 0; } @@ -1113,18 +1098,11 @@ static int gw_MySQLWrite_backend(DCB *dcb, GWBUF *queue) */ static int gw_error_backend_event(DCB *dcb) { - MXS_SESSION* session; - void* rsession; - MXS_ROUTER_OBJECT* router; - MXS_ROUTER* router_instance; - GWBUF* errbuf; - bool succp; - mxs_session_state_t ses_state; - CHK_DCB(dcb); - session = dcb->session; + MXS_SESSION *session = dcb->session; CHK_SESSION(session); - if (SESSION_STATE_DUMMY == session->state) + + if (session->state == SESSION_STATE_DUMMY) { if (dcb->persistentstart == 0) { @@ -1133,81 +1111,32 @@ static int gw_error_backend_event(DCB *dcb) "Closing connection."); } dcb_close(dcb); - return 1; } - rsession = session->router_session; - router = session->service->router; - router_instance = session->service->router_instance; - - /** - * Avoid running redundant error handling procedure. - * dcb_close is already called for the DCB. Thus, either connection is - * closed by router and COM_QUIT sent or there was an error which - * have already been handled. - */ - if (dcb->state != DCB_STATE_POLLING) + else if (dcb->state != DCB_STATE_POLLING || session->state != SESSION_STATE_ROUTER_READY) { - int error, len; + int error; + int len = sizeof(error); - len = sizeof(error); - - if (getsockopt(dcb->fd, SOL_SOCKET, SO_ERROR, &error, (socklen_t *) & len) == 0) + if (getsockopt(dcb->fd, SOL_SOCKET, SO_ERROR, &error, (socklen_t *) & len) == 0 && error != 0) { - if (error != 0) + char errstring[MXS_STRERROR_BUFLEN]; + if (dcb->state != DCB_STATE_POLLING) { - char errstring[MXS_STRERROR_BUFLEN]; - MXS_ERROR("DCB in state %s got error '%s'.", - STRDCBSTATE(dcb->state), + MXS_ERROR("DCB in state %s got error '%s'.", STRDCBSTATE(dcb->state), strerror_r(error, errstring, sizeof(errstring))); } - } - return 1; - } - errbuf = mysql_create_custom_error(1, - 0, - "Lost connection to backend server."); - - ses_state = session->state; - - if (ses_state != SESSION_STATE_ROUTER_READY) - { - int error, len; - - len = sizeof(error); - if (getsockopt(dcb->fd, SOL_SOCKET, SO_ERROR, &error, (socklen_t *) & len) == 0) - { - if (error != 0) + else { - char errstring[MXS_STRERROR_BUFLEN]; MXS_ERROR("Error '%s' in session that is not ready for routing.", strerror_r(error, errstring, sizeof(errstring))); } } - gwbuf_free(errbuf); - goto retblock; } - -#if defined(SS_DEBUG) - MXS_INFO("Backend error event handling."); -#endif - router->handleError(router_instance, - rsession, - errbuf, - dcb, - ERRACT_NEW_CONNECTION, - &succp); - gwbuf_free(errbuf); - - /** - * If error handler fails it means that routing session can't continue - * and it must be closed. In success, only this DCB is closed. - */ - if (!succp) + else { - session->state = SESSION_STATE_STOPPING; + do_handle_error(dcb, ERRACT_NEW_CONNECTION, "Lost connection to backend server."); } -retblock: return 1; } @@ -1223,47 +1152,21 @@ retblock: */ static int gw_backend_hangup(DCB *dcb) { - MXS_SESSION* session; - void* rsession; - MXS_ROUTER_OBJECT* router; - MXS_ROUTER* router_instance; - bool succp; - GWBUF* errbuf; - mxs_session_state_t ses_state; - CHK_DCB(dcb); + MXS_SESSION *session = dcb->session; + CHK_SESSION(session); + if (dcb->persistentstart) { dcb->dcb_errhandle_called = true; - goto retblock; } - session = dcb->session; - - if (session == NULL) + else if (session->state != SESSION_STATE_ROUTER_READY) { - goto retblock; - } - - CHK_SESSION(session); - - rsession = session->router_session; - router = session->service->router; - router_instance = session->service->router_instance; - - errbuf = mysql_create_custom_error(1, - 0, - "Lost connection to backend server."); - - ses_state = session->state; - - if (ses_state != SESSION_STATE_ROUTER_READY) - { - int error, len; - - len = sizeof(error); + int error; + int len = sizeof(error); if (getsockopt(dcb->fd, SOL_SOCKET, SO_ERROR, &error, (socklen_t *) & len) == 0) { - if (error != 0 && ses_state != SESSION_STATE_STOPPING) + if (error != 0 && session->state != SESSION_STATE_STOPPING) { char errstring[MXS_STRERROR_BUFLEN]; MXS_ERROR("Hangup in session that is not ready for routing, " @@ -1271,31 +1174,12 @@ static int gw_backend_hangup(DCB *dcb) strerror_r(error, errstring, sizeof(errstring))); } } - gwbuf_free(errbuf); - /* - * I'm pretty certain this is best removed and - * causes trouble if present, but have left it - * here just for now as a comment. Martin - */ - /* dcb_close(dcb); */ - goto retblock; } - - router->handleError(router_instance, - rsession, - errbuf, - dcb, - ERRACT_NEW_CONNECTION, - &succp); - - gwbuf_free(errbuf); - /** There are no required backends available, close session. */ - if (!succp) + else { - session->state = SESSION_STATE_STOPPING; + do_handle_error(dcb, ERRACT_NEW_CONNECTION, "Lost connection to backend server."); } -retblock: return 1; } @@ -1400,29 +1284,7 @@ static int backend_write_delayqueue(DCB *dcb, GWBUF *buffer) if (rc == 0) { - MXS_SESSION *session = dcb->session; - CHK_SESSION(session); - MXS_ROUTER_OBJECT *router = session->service->router; - MXS_ROUTER *router_instance = session->service->router_instance; - void *rsession = session->router_session; - bool succp = false; - GWBUF* errbuf = mysql_create_custom_error( - 1, 0, "Failed to write buffered data to back-end server. " - "Buffer was empty or back-end was disconnected during " - "operation. Attempting to find a new backend."); - - router->handleError(router_instance, - rsession, - errbuf, - dcb, - ERRACT_NEW_CONNECTION, - &succp); - gwbuf_free(errbuf); - - if (!succp) - { - session->state = SESSION_STATE_STOPPING; - } + do_handle_error(dcb, ERRACT_NEW_CONNECTION, "Lost connection to backend server."); } return rc; diff --git a/server/modules/protocol/MySQL/MySQLClient/mysql_client.c b/server/modules/protocol/MySQL/MySQLClient/mysql_client.c index bb13ceaf1..a9aaff2ab 100644 --- a/server/modules/protocol/MySQL/MySQLClient/mysql_client.c +++ b/server/modules/protocol/MySQL/MySQLClient/mysql_client.c @@ -1019,36 +1019,11 @@ gw_read_finish_processing(DCB *dcb, GWBUF *read_buffer, uint64_t capabilities) /* else return_code is still 0 from when it was originally set */ /* Note that read_buffer has been freed or transferred by this point */ - /** Routing failed */ if (return_code != 0) { - bool router_can_continue; - GWBUF* errbuf; - /** - * Create error to be sent to client if session - * can't be continued. - */ - errbuf = mysql_create_custom_error(1, 0, - "Routing failed. Session is closed."); - /** - * Ensure that there are enough backends - * available for router to continue operation. - */ - session->service->router->handleError(session->service->router_instance, - session->router_session, - errbuf, - dcb, - ERRACT_NEW_CONNECTION, - &router_can_continue); - gwbuf_free(errbuf); - /** - * If the router cannot continue, close session - */ - if (!router_can_continue) - { - MXS_ERROR("Routing the query failed. " - "Session will be closed."); - } + /** Routing failed, close the client connection */ + dcb_close(dcb); + MXS_ERROR("Routing the query failed. Session will be closed."); } if (proto->current_command == MYSQL_COM_QUIT) From f18a40ce731c133bbc582baef36f4e0179c7be87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Mon, 6 Mar 2017 11:44:19 +0200 Subject: [PATCH 12/12] Remove redundant error handling code from routers The routers no longer need to track the number of errors each DCB receives. This is now done by the protocol modules. The type of the DCB no longer needs to be checked in the handleError implementation as the function is only called when a backend DCB fails. --- server/modules/routing/binlogrouter/blr.c | 71 +++--- .../routing/hintrouter/hintroutersession.cc | 1 + server/modules/routing/maxinfo/maxinfo.c | 17 +- .../routing/readconnroute/readconnroute.c | 19 +- .../routing/readwritesplit/readwritesplit.c | 207 ++++++++---------- .../routing/schemarouter/schemarouter.c | 16 +- 6 files changed, 129 insertions(+), 202 deletions(-) diff --git a/server/modules/routing/binlogrouter/blr.c b/server/modules/routing/binlogrouter/blr.c index f952040f4..53c933d58 100644 --- a/server/modules/routing/binlogrouter/blr.c +++ b/server/modules/routing/binlogrouter/blr.c @@ -1732,6 +1732,7 @@ errorReply(MXS_ROUTER *instance, mxs_error_action_t action, bool *succp) { + ss_dassert(backend_dcb->dcb_role == DCB_ROLE_BACKEND_HANDLER); ROUTER_INSTANCE *router = (ROUTER_INSTANCE *)instance; int error; socklen_t len; @@ -1742,54 +1743,46 @@ errorReply(MXS_ROUTER *instance, mysql_errno = (unsigned long) extract_field(((uint8_t *)GWBUF_DATA(message) + 5), 16); errmsg = extract_message(message); - /** Don't handle same error twice on same DCB */ - if (backend_dcb->dcb_errhandle_called) + /** Check router state and set errno an message */ + if (router->master_state < BLRM_BINLOGDUMP || router->master_state != BLRM_SLAVE_STOPPED) { - /** Check router state and set errno an message */ - if (router->master_state < BLRM_BINLOGDUMP || router->master_state != BLRM_SLAVE_STOPPED) + /* Authentication failed */ + if (router->master_state == BLRM_TIMESTAMP) { - /* Authentication failed */ - if (router->master_state == BLRM_TIMESTAMP) + spinlock_acquire(&router->lock); + /* set io error message */ + if (router->m_errmsg) { - spinlock_acquire(&router->lock); - /* set io error message */ - if (router->m_errmsg) - { - free(router->m_errmsg); - } - router->m_errmsg = mxs_strdup("#28000 Authentication with master server failed"); - /* set mysql_errno */ - router->m_errno = 1045; - - /* Stop replication */ - router->master_state = BLRM_SLAVE_STOPPED; - spinlock_release(&router->lock); - - /* Force backend DCB close */ - dcb_close(backend_dcb); - - MXS_ERROR("%s: Master connection error %lu '%s' in state '%s', " - "%s while connecting to master %s:%d", - router->service->name, router->m_errno, router->m_errmsg, - blrm_states[BLRM_TIMESTAMP], msg, - router->service->dbref->server->name, - router->service->dbref->server->port); + free(router->m_errmsg); } - } - if (errmsg) - { - free(errmsg); - } + router->m_errmsg = mxs_strdup("#28000 Authentication with master server failed"); + /* set mysql_errno */ + router->m_errno = 1045; - /** we optimistically assume that previous call succeed */ - *succp = true; - return; + /* Stop replication */ + router->master_state = BLRM_SLAVE_STOPPED; + spinlock_release(&router->lock); + + /* Force backend DCB close */ + dcb_close(backend_dcb); + + MXS_ERROR("%s: Master connection error %lu '%s' in state '%s', " + "%s while connecting to master %s:%d", + router->service->name, router->m_errno, router->m_errmsg, + blrm_states[BLRM_TIMESTAMP], msg, + router->service->dbref->server->name, + router->service->dbref->server->port); + } } - else + if (errmsg) { - backend_dcb->dcb_errhandle_called = true; + free(errmsg); } + /** we optimistically assume that previous call succeed */ + *succp = true; + return; + len = sizeof(error); if (router->master && getsockopt(router->master->fd, SOL_SOCKET, SO_ERROR, &error, &len) == 0 && diff --git a/server/modules/routing/hintrouter/hintroutersession.cc b/server/modules/routing/hintrouter/hintroutersession.cc index 542212cfa..9c9a4351d 100644 --- a/server/modules/routing/hintrouter/hintroutersession.cc +++ b/server/modules/routing/hintrouter/hintroutersession.cc @@ -51,5 +51,6 @@ void HintRouterSession::handleError(GWBUF* pMessage, mxs_error_action_t action, bool* pSuccess) { + ss_dassert(pProblem->dcb_role == DCB_ROLE_BACKEND_HANDLER); MXS_ERROR("handleError not implemented yet."); } diff --git a/server/modules/routing/maxinfo/maxinfo.c b/server/modules/routing/maxinfo/maxinfo.c index 63c84e3bf..73d75ed21 100644 --- a/server/modules/routing/maxinfo/maxinfo.c +++ b/server/modules/routing/maxinfo/maxinfo.c @@ -276,26 +276,13 @@ static void handleError(MXS_ROUTER *instance, bool *succp) { + ss_dassert(backend_dcb->dcb_role == DCB_ROLE_BACKEND_HANDLER); DCB *client_dcb; MXS_SESSION *session = backend_dcb->session; - mxs_session_state_t sesstate; - /** Don't handle same error twice on same DCB */ - if (backend_dcb->dcb_errhandle_called) - { - /** we optimistically assume that previous call succeed */ - *succp = true; - return; - } - else - { - backend_dcb->dcb_errhandle_called = true; - } - - sesstate = session->state; client_dcb = session->client_dcb; - if (sesstate == SESSION_STATE_ROUTER_READY) + if (session->state == SESSION_STATE_ROUTER_READY) { CHK_DCB(client_dcb); client_dcb->func.write(client_dcb, gwbuf_clone(errbuf)); diff --git a/server/modules/routing/readconnroute/readconnroute.c b/server/modules/routing/readconnroute/readconnroute.c index 7c1942599..a1564c18f 100644 --- a/server/modules/routing/readconnroute/readconnroute.c +++ b/server/modules/routing/readconnroute/readconnroute.c @@ -679,23 +679,12 @@ static void handleError(MXS_ROUTER *instance, MXS_ROUTER_SESSION *router_session DCB *problem_dcb, mxs_error_action_t action, bool *succp) { + ss_dassert(problem_dcb->dcb_role == DCB_ROLE_BACKEND_HANDLER); DCB *client_dcb; MXS_SESSION *session = problem_dcb->session; mxs_session_state_t sesstate; ROUTER_CLIENT_SES *router_cli_ses = (ROUTER_CLIENT_SES *) router_session; - /** Don't handle same error twice on same DCB */ - if (problem_dcb->dcb_errhandle_called) - { - /** we optimistically assume that previous call succeed */ - *succp = true; - return; - } - else - { - problem_dcb->dcb_errhandle_called = true; - } - sesstate = session->state; client_dcb = session->client_dcb; @@ -706,11 +695,7 @@ static void handleError(MXS_ROUTER *instance, MXS_ROUTER_SESSION *router_session client_dcb->func.write(client_dcb, gwbuf_clone(errbuf)); } - if (DCB_ROLE_CLIENT_HANDLER == problem_dcb->dcb_role) - { - dcb_close(problem_dcb); - } - else if (router_cli_ses && problem_dcb == router_cli_ses->backend_dcb) + if (router_cli_ses && problem_dcb == router_cli_ses->backend_dcb) { router_cli_ses->backend_dcb = NULL; dcb_close(problem_dcb); diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index 63d53e391..806738014 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -1180,6 +1180,7 @@ static void handleError(MXS_ROUTER *instance, mxs_error_action_t action, bool *succp) { + ss_dassert(problem_dcb->dcb_role == DCB_ROLE_BACKEND_HANDLER); ROUTER_INSTANCE *inst = (ROUTER_INSTANCE *)instance; ROUTER_CLIENT_SES *rses = (ROUTER_CLIENT_SES *)router_session; CHK_CLIENT_RSES(rses); @@ -1187,150 +1188,124 @@ static void handleError(MXS_ROUTER *instance, if (rses->rses_closed) { - /** Session is already closed */ - problem_dcb->dcb_errhandle_called = true; *succp = false; return; } - /** Don't handle same error twice on same DCB */ - if (problem_dcb->dcb_errhandle_called) - { - /** we optimistically assume that previous call succeed */ - /* - * The return of true is potentially misleading, but appears to - * be safe with the code as it stands on 9 Sept 2015 - MNB - */ - *succp = true; - return; - } - else - { - problem_dcb->dcb_errhandle_called = true; - } - MXS_SESSION *session = problem_dcb->session; ss_dassert(session); - if (problem_dcb->dcb_role == DCB_ROLE_CLIENT_HANDLER) - { - dcb_close(problem_dcb); - *succp = false; - } - else - { - backend_ref_t *bref = get_bref_from_dcb(rses, problem_dcb); + backend_ref_t *bref = get_bref_from_dcb(rses, problem_dcb); - switch (action) - { + switch (action) + { case ERRACT_NEW_CONNECTION: + { + /** + * If master has lost its Master status error can't be + * handled so that session could continue. + */ + if (rses->rses_master_ref && rses->rses_master_ref->bref_dcb == problem_dcb) { - /** - * If master has lost its Master status error can't be - * handled so that session could continue. - */ - if (rses->rses_master_ref && rses->rses_master_ref->bref_dcb == problem_dcb) + SERVER *srv = rses->rses_master_ref->ref->server; + bool can_continue = false; + + if (rses->rses_config.master_failure_mode != RW_FAIL_INSTANTLY && + (bref == NULL || !BREF_IS_WAITING_RESULT(bref))) { - SERVER *srv = rses->rses_master_ref->ref->server; - bool can_continue = false; - - if (rses->rses_config.master_failure_mode != RW_FAIL_INSTANTLY && - (bref == NULL || !BREF_IS_WAITING_RESULT(bref))) - { - /** The failure of a master is not considered a critical - * failure as partial functionality still remains. Reads - * are allowed as long as slave servers are available - * and writes will cause an error to be returned. - * - * If we were waiting for a response from the master, we - * can't be sure whether it was executed or not. In this - * case the safest thing to do is to close the client - * connection. */ - can_continue = true; - } - else if (!SERVER_IS_MASTER(srv) && !srv->master_err_is_logged) - { - MXS_ERROR("Server %s:%d lost the master status. Readwritesplit " - "service can't locate the master. Client sessions " - "will be closed.", srv->name, srv->port); - srv->master_err_is_logged = true; - } - - *succp = can_continue; - - if (bref != NULL) - { - CHK_BACKEND_REF(bref); - RW_CHK_DCB(bref, problem_dcb); - dcb_close(problem_dcb); - RW_CLOSE_BREF(bref); - close_failed_bref(bref, true); - } - else - { - MXS_ERROR("Server %s:%d lost the master status but could not locate the " - "corresponding backend ref.", srv->name, srv->port); - } + /** The failure of a master is not considered a critical + * failure as partial functionality still remains. Reads + * are allowed as long as slave servers are available + * and writes will cause an error to be returned. + * + * If we were waiting for a response from the master, we + * can't be sure whether it was executed or not. In this + * case the safest thing to do is to close the client + * connection. */ + can_continue = true; } - else if (bref) + else if (!SERVER_IS_MASTER(srv) && !srv->master_err_is_logged) { - /** Check whether problem_dcb is same as dcb of rses->forced_node - * and within READ ONLY transaction: - * if true reset rses->forced_node and close session - */ - if (rses->forced_node && - (rses->forced_node->bref_dcb == problem_dcb && - session_trx_is_read_only(problem_dcb->session))) - { - MXS_ERROR("forced_node SLAVE %s in opened READ ONLY transaction has failed:" - " closing session", - problem_dcb->server->unique_name); - - rses->forced_node = NULL; - *succp = false; - break; - } - - /** We should reconnect only if we find a backend for this - * DCB. If this DCB is an older DCB that has been closed, - * we can ignore it. */ - *succp = handle_error_new_connection(inst, &rses, problem_dcb, errmsgbuf); + MXS_ERROR("Server %s:%d lost the master status. Readwritesplit " + "service can't locate the master. Client sessions " + "will be closed.", srv->name, srv->port); + srv->master_err_is_logged = true; } - if (bref) + *succp = can_continue; + + if (bref != NULL) { - /** This is a valid DCB for a backend ref */ - if (BREF_IS_IN_USE(bref) && bref->bref_dcb == problem_dcb) - { - ss_dassert(false); - MXS_ERROR("Backend '%s' is still in use and points to the problem DCB.", - bref->ref->server->unique_name); - } + CHK_BACKEND_REF(bref); + RW_CHK_DCB(bref, problem_dcb); + dcb_close(problem_dcb); + RW_CLOSE_BREF(bref); + close_failed_bref(bref, true); } else { - const char *remote = problem_dcb->state == DCB_STATE_POLLING && - problem_dcb->server ? problem_dcb->server->unique_name : "CLOSED"; - - MXS_ERROR("DCB connected to '%s' is not in use by the router " - "session, not closing it. DCB is in state '%s'", - remote, STRDCBSTATE(problem_dcb->state)); + MXS_ERROR("Server %s:%d lost the master status but could not locate the " + "corresponding backend ref.", srv->name, srv->port); } - break; } + else if (bref) + { + /** Check whether problem_dcb is same as dcb of rses->forced_node + * and within READ ONLY transaction: + * if true reset rses->forced_node and close session + */ + if (rses->forced_node && + (rses->forced_node->bref_dcb == problem_dcb && + session_trx_is_read_only(problem_dcb->session))) + { + MXS_ERROR("forced_node SLAVE %s in opened READ ONLY transaction has failed:" + " closing session", + problem_dcb->server->unique_name); + + rses->forced_node = NULL; + *succp = false; + break; + } + + /** We should reconnect only if we find a backend for this + * DCB. If this DCB is an older DCB that has been closed, + * we can ignore it. */ + *succp = handle_error_new_connection(inst, &rses, problem_dcb, errmsgbuf); + } + + if (bref) + { + /** This is a valid DCB for a backend ref */ + if (BREF_IS_IN_USE(bref) && bref->bref_dcb == problem_dcb) + { + ss_dassert(false); + MXS_ERROR("Backend '%s' is still in use and points to the problem DCB.", + bref->ref->server->unique_name); + } + } + else + { + const char *remote = problem_dcb->state == DCB_STATE_POLLING && + problem_dcb->server ? problem_dcb->server->unique_name : "CLOSED"; + + MXS_ERROR("DCB connected to '%s' is not in use by the router " + "session, not closing it. DCB is in state '%s'", + remote, STRDCBSTATE(problem_dcb->state)); + } + break; + } case ERRACT_REPLY_CLIENT: - { - handle_error_reply_client(session, rses, problem_dcb, errmsgbuf); - *succp = false; /*< no new backend servers were made available */ - break; - } + { + handle_error_reply_client(session, rses, problem_dcb, errmsgbuf); + *succp = false; /*< no new backend servers were made available */ + break; + } default: ss_dassert(!true); *succp = false; break; - } } } diff --git a/server/modules/routing/schemarouter/schemarouter.c b/server/modules/routing/schemarouter/schemarouter.c index 1256fb9b9..d396343ec 100644 --- a/server/modules/routing/schemarouter/schemarouter.c +++ b/server/modules/routing/schemarouter/schemarouter.c @@ -3557,33 +3557,19 @@ static void handleError(MXS_ROUTER* instance, mxs_error_action_t action, bool* succp) { + ss_dassert(problem_dcb->dcb_role == DCB_ROLE_BACKEND_HANDLER); MXS_SESSION* session; ROUTER_INSTANCE* inst = (ROUTER_INSTANCE *)instance; ROUTER_CLIENT_SES* rses = (ROUTER_CLIENT_SES *)router_session; CHK_DCB(problem_dcb); - /** Don't handle same error twice on same DCB */ - if (problem_dcb->dcb_errhandle_called) - { - /** we optimistically assume that previous call succeed */ - *succp = true; - return; - } - else - { - problem_dcb->dcb_errhandle_called = true; - } session = problem_dcb->session; if (session == NULL || rses == NULL) { *succp = false; } - else if (DCB_ROLE_CLIENT_HANDLER == problem_dcb->dcb_role) - { - *succp = false; - } else { CHK_SESSION(session);