From c470813762ef51096319c7b3fc1cd2a46f91528d Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Mon, 26 Sep 2016 15:05:08 +0300 Subject: [PATCH 01/34] MXS-831: Detect new_master events The new_master events were mistakenly detected as lost_slave events. --- server/core/monitor.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/server/core/monitor.c b/server/core/monitor.c index b74104aa3..fbfbea784 100644 --- a/server/core/monitor.c +++ b/server/core/monitor.c @@ -711,8 +711,14 @@ mon_get_event_type(MONITOR_SERVERS* node) } else { + /** These are used to detect whether we actually lost something or + * just transitioned from one state to another */ + unsigned int prev_bits = prev & (SERVER_MASTER | SERVER_SLAVE); + unsigned int present_bits = present & (SERVER_MASTER | SERVER_SLAVE); + /* Was running and still is */ - if (prev & (SERVER_MASTER | SERVER_SLAVE | SERVER_JOINED | SERVER_NDB)) + if ((!prev_bits || !present_bits || prev_bits == present_bits) && + prev & (SERVER_MASTER | SERVER_SLAVE | SERVER_JOINED | SERVER_NDB)) { /* We used to know what kind of server it was */ event_type = LOSS_EVENT; From b50e794be7aab1ddf2fd7e15b9210b84a91ee1db Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Mon, 17 Oct 2016 06:54:37 +0300 Subject: [PATCH 02/34] MXS-917: Only log an error if the master is in use When the readwritesplit can't locate the master server when it's checking the list of available servers, it logs an error if the original master reference isn't in a valid state. This error should only be logged if the server is in use but in an unexpected state. --- server/modules/routing/readwritesplit/readwritesplit.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index 3b3cf17d1..9cdb72388 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -4944,13 +4944,11 @@ static backend_ref_t *get_root_master_bref(ROUTER_CLIENT_SES *rses) } } - if (candidate_bref == NULL && rses->rses_config.rw_master_failure_mode == RW_FAIL_INSTANTLY) + if (candidate_bref == NULL && rses->rses_config.rw_master_failure_mode == RW_FAIL_INSTANTLY && + rses->rses_master_ref && BREF_IS_IN_USE(rses->rses_master_ref)) { - MXS_ERROR("Could not find master among the backend " - "servers. Previous master's state : %s", - rses->rses_master_ref == NULL ? "No master found" : - (!BREF_IS_IN_USE(rses->rses_master_ref) ? "Master is not in use" : - STRSRVSTATUS(&master))); + MXS_ERROR("Could not find master among the backend servers. " + "Previous master's state : %s", STRSRVSTATUS(&master)); } return candidate_bref; From f1acc1f4512f67943b1ccdd7284dd5fc64fdc76d Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Wed, 19 Oct 2016 15:40:50 +0300 Subject: [PATCH 03/34] Use the backend server charset The default character set should be copied from the server so that MaxScale appears to be the same. This fixes problems where utf8mb4 couldn't be taken into use because MaxScale would always send latin1 as the server charset. --- server/core/dbusers.c | 5 +++++ server/core/server.c | 4 ++++ server/include/server.h | 1 + server/modules/protocol/mysql_client.c | 5 +++++ 4 files changed, 15 insertions(+) diff --git a/server/core/dbusers.c b/server/core/dbusers.c index 0ebd8f8cd..04d4b8f04 100644 --- a/server/core/dbusers.c +++ b/server/core/dbusers.c @@ -2612,6 +2612,11 @@ static bool check_server_permissions(SERVICE *service, SERVER* server, return my_errno != ER_ACCESS_DENIED_ERROR; } + /** Copy the server charset */ + MY_CHARSET_INFO cs_info; + mysql_get_character_set_info(mysql, &cs_info); + server->charset = cs_info.number; + if (server->server_string == NULL) { const char *server_string = mysql_get_server_info(mysql); diff --git a/server/core/server.c b/server/core/server.c index a29198791..8de58eb5d 100644 --- a/server/core/server.c +++ b/server/core/server.c @@ -44,6 +44,9 @@ #include #include +/** The latin1 charset */ +#define SERVER_DEFAULT_CHARSET 0x08 + static SPINLOCK server_spin = SPINLOCK_INIT; static SERVER *allServers = NULL; @@ -89,6 +92,7 @@ server_alloc(char *servname, char *protocol, unsigned short port) server->persistmaxtime = 0; server->persistpoolmax = 0; server->slave_configured = false; + server->charset = SERVER_DEFAULT_CHARSET; spinlock_init(&server->persistlock); spinlock_acquire(&server_spin); diff --git a/server/include/server.h b/server/include/server.h index 5dfe79cfc..674a19370 100644 --- a/server/include/server.h +++ b/server/include/server.h @@ -109,6 +109,7 @@ typedef struct server long persistpoolmax; /**< Maximum size of persistent connections pool */ long persistmaxtime; /**< Maximum number of seconds connection can live */ int persistmax; /**< Maximum pool size actually achieved since startup */ + uint8_t charset; /**< Default server character set */ #if defined(SS_DEBUG) skygw_chk_t server_chk_tail; #endif diff --git a/server/modules/protocol/mysql_client.c b/server/modules/protocol/mysql_client.c index e7663308d..0146e819e 100644 --- a/server/modules/protocol/mysql_client.c +++ b/server/modules/protocol/mysql_client.c @@ -277,6 +277,11 @@ int MySQLSendHandshake(DCB* dcb) int len_version_string = 0; int id_num; + if (dcb->service->dbref) + { + mysql_server_language = dcb->service->dbref->server->charset; + } + MySQLProtocol *protocol = DCB_PROTOCOL(dcb, MySQLProtocol); GWBUF *buf; From 7a988df5a0ed4410a0dbf6911e17a047a2105203 Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Fri, 21 Oct 2016 10:05:07 +0300 Subject: [PATCH 04/34] MXS-955: Put back proper .maxadmin handling Now MaxScale again supports having 'hostname', 'user', 'port' and 'passwd' in the .maxadmin file in addition to 'socket'. --- Documentation/Reference/MaxAdmin.md | 15 ++++- client/maxadmin.c | 98 +++++++++++++++++++---------- 2 files changed, 78 insertions(+), 35 deletions(-) diff --git a/Documentation/Reference/MaxAdmin.md b/Documentation/Reference/MaxAdmin.md index fefb53dbf..c1da5453e 100644 --- a/Documentation/Reference/MaxAdmin.md +++ b/Documentation/Reference/MaxAdmin.md @@ -287,13 +287,22 @@ Then simply set this file to have execute permissions and it may be run like any ## The .maxadmin file -MaxAdmin supports a mechanism to set defaults for all the command line switches via a file in the home directory of the user. If a file named .maxadmin exists it will be read and parameters set according to the lies in this files. The parameter that can be set is: socket. An example of a .maxadmin file that will alter the default password and user name arguments would be +MaxAdmin supports a mechanism to set defaults for the command line switches via a file in the home directory of the user. If a file named `.maxadmin` exists, it will be read and parameters set according to the entries in that file. + +This mechanism can be used to provide defaults to the command line options. If a command line option is provided, it will still override the value in the `.maxadmin` file. + +The parameters than can be set are: + * `1.4`: _hostname_, _port_, _user_ and _passwd_ + * `2.0.0` and `2.0.1`: _socket_ + * `2.0.2` onwards: _socket_, _hostname_, _port_, _user_ and _passwd_ (and as synonym _password_) + +An example of a `.maxadmin` file that will alter the default socket path is: socket=/somepath/maxadmin.socket -This mechanism can be used to provide a means of passwords entry into maxadmin or to override any of the command line option defaults. If a command line option is given that will still override the value in the .maxadmin file. +Note that if in `2.0.2` or later, a value for _socket_ as well as any of the internet socket related options, such as _hostname_, is provided in `.maxadmin`, then _socket_ takes precedense. In that case, provide at least one internet socket related option on the command line to force MaxAdmin to use an internet socket and thus the internet socket related options from `.maxadmin`. -The .maxadmin file may be made read only to protect any passwords written to that file. +The `.maxadmin` file may be made read only to protect any passwords written to that file. # Getting Help diff --git a/client/maxadmin.c b/client/maxadmin.c index 1c9cc8d93..239a131c6 100644 --- a/client/maxadmin.c +++ b/client/maxadmin.c @@ -75,7 +75,9 @@ static void DoSource(int so, char *cmd); static void DoUsage(const char*); static int isquit(char *buf); static void PrintVersion(const char *progname); -static void read_inifile(char **, int*); +static void read_inifile(char **socket, + char **hostname, char **port, char **user, char **passwd, + int *editor); static bool getPassword(char *password, size_t length); #ifdef HISTORY @@ -116,10 +118,6 @@ static struct option long_options[] = int main(int argc, char **argv) { - const char* vi = "vi"; - const char* emacs = "emacs"; - - int i, num, rv; #ifdef HISTORY char *buf; EditLine *el = NULL; @@ -133,39 +131,45 @@ main(int argc, char **argv) char *port = NULL; char *user = NULL; char *passwd = NULL; - char *conn_socket = NULL; - char *default_socket = MAXADMIN_DEFAULT_SOCKET; + char *socket_path = NULL; int use_emacs = 0; - int so; + + read_inifile(&socket_path, &hostname, &port, &user, &passwd, &use_emacs); + + bool use_inet_socket = false; + bool use_unix_socket = false; + int option_index = 0; char c; - - read_inifile(&conn_socket, &use_emacs); - while ((c = getopt_long(argc, argv, "h:p:P:u:S:v?e", long_options, &option_index)) >= 0) { switch (c) { case 'h': + use_inet_socket = true; hostname = strdup(optarg); break; case 'p': + use_inet_socket = true; passwd = strdup(optarg); memset(optarg, '\0', strlen(optarg)); break; case 'P': + use_inet_socket = true; port = strdup(optarg); break; case 'u': + use_inet_socket = true; user = strdup(optarg); break; case 'S': - conn_socket = strdup(optarg); + use_unix_socket = true; + socket_path = strdup(optarg); break; case 'v': @@ -182,16 +186,20 @@ main(int argc, char **argv) } } - if ((hostname || port || user || passwd) && (conn_socket)) + if (use_inet_socket && use_unix_socket) { - // Either socket or any parameters related to hostname/port. + // Both unix socket path and at least of the internet socket + // options have been provided. DoUsage(argv[0]); exit(EXIT_FAILURE); } - if (hostname || port || user || passwd) + if (use_inet_socket || (!socket_path && (hostname || port || user || passwd))) { - assert(!conn_socket); + // If any of the internet socket options have explicitly been provided, or + // .maxadmin does not contain "socket" but does contain at least one of + // the internet socket options, we use an internet socket. Note that if + // -S is provided, then socket_path will be non-NULL. if (!hostname) { @@ -210,23 +218,29 @@ main(int argc, char **argv) } else { - if (!conn_socket) + use_unix_socket = true; + + if (!socket_path) { - conn_socket = MAXADMIN_DEFAULT_SOCKET; + socket_path = MAXADMIN_DEFAULT_SOCKET; } } - assert(!((hostname || port) && conn_socket)); + int so; - if (conn_socket) + if (use_unix_socket) { - if ((so = connectUsingUnixSocket(conn_socket)) == -1) + assert(socket_path); + + if ((so = connectUsingUnixSocket(socket_path)) == -1) { exit(EXIT_FAILURE); } } else { + assert(hostname && user && port); + char password[MAX_PASSWORD_LEN]; if (passwd == NULL) @@ -301,11 +315,11 @@ main(int argc, char **argv) if (use_emacs) { - el_set(el, EL_EDITOR, emacs); /** Editor is emacs */ + el_set(el, EL_EDITOR, "emacs"); /** Editor is emacs */ } else { - el_set(el, EL_EDITOR, vi); /* Default editor is vi */ + el_set(el, EL_EDITOR, "vi"); /* Default editor is vi */ } el_set(el, EL_SIGNAL, 1); /* Handle signals gracefully */ el_set(el, EL_PROMPT, prompt); /* Set the prompt function */ @@ -325,6 +339,7 @@ main(int argc, char **argv) */ el_source(el, NULL); + int num; while ((buf = (char *) el_gets(el, &num)) != NULL && num != 0) { #else @@ -333,7 +348,7 @@ main(int argc, char **argv) num = strlen(buf); #endif /* Strip trailing \n\r */ - for (i = num - 1; buf[i] == '\r' || buf[i] == '\n'; i--) + for (int i = num - 1; buf[i] == '\r' || buf[i] == '\n'; i--) { buf[i] = 0; } @@ -350,6 +365,7 @@ main(int argc, char **argv) else if (!strcasecmp(buf, "history")) { #ifdef HISTORY + int rv; for (rv = history(hist, &ev, H_LAST); rv != -1; rv = history(hist, &ev, H_PREV)) { @@ -394,11 +410,11 @@ main(int argc, char **argv) /** * Connect to the MaxScale server * - * @param conn_socket The UNIX socket to connect to + * @param socket_path The UNIX socket to connect to * @return The connected socket or -1 on error */ static int -connectUsingUnixSocket(const char *conn_socket) +connectUsingUnixSocket(const char *socket_path) { int so = -1; @@ -408,7 +424,7 @@ connectUsingUnixSocket(const char *conn_socket) memset(&local_addr, 0, sizeof local_addr); local_addr.sun_family = AF_UNIX; - strncpy(local_addr.sun_path, conn_socket, sizeof(local_addr.sun_path) - 1); + strncpy(local_addr.sun_path, socket_path, sizeof(local_addr.sun_path) - 1); if (connect(so, (struct sockaddr *) &local_addr, sizeof(local_addr)) == 0) { @@ -441,7 +457,7 @@ connectUsingUnixSocket(const char *conn_socket) { char errbuf[STRERROR_BUFLEN]; fprintf(stderr, "Unable to connect to MaxScale at %s: %s\n", - conn_socket, strerror_r(errno, errbuf, sizeof(errbuf))); + socket_path, strerror_r(errno, errbuf, sizeof(errbuf))); close(so); so = -1; } @@ -853,13 +869,16 @@ rtrim(char *str) * Read defaults for hostname, port, user and password from * the .maxadmin file in the users home directory. * - * @param hostname Pointer the hostname to be updated + * @param socket Pointer to the socket to be updated. + * @param hostname Pointer to the hostname to be updated * @param port Pointer to the port to be updated * @param user Pointer to the user to be updated * @param passwd Pointer to the password to be updated */ static void -read_inifile(char **conn_socket, int* editor) +read_inifile(char **socket, + char **hostname, char** port, char **user, char **passwd, + int* editor) { char pathname[400]; char *home, *brkt; @@ -893,11 +912,26 @@ read_inifile(char **conn_socket, int* editor) { if (strcmp(name, "socket") == 0) { - *conn_socket = strdup(value); + *socket = strdup(value); + } + else if (strcmp(name, "hostname") == 0) + { + *hostname = strdup(value); + } + else if (strcmp(name, "port") == 0) + { + *port = strdup(value); + } + else if (strcmp(name, "user") == 0) + { + *user = strdup(value); + } + else if ((strcmp(name, "passwd") == 0) || (strcmp(name, "password") == 0)) + { + *passwd = strdup(value); } else if (strcmp(name, "editor") == 0) { - if (strcmp(value, "vi") == 0) { *editor = 0; From 8e9012f514fee4834b7830eda3af14791e5b6a7c Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Mon, 24 Oct 2016 11:17:19 +0300 Subject: [PATCH 05/34] Add missing declaration to maxadmin A variable was not declared before it was used. --- client/maxadmin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/maxadmin.c b/client/maxadmin.c index 239a131c6..966d34bb2 100644 --- a/client/maxadmin.c +++ b/client/maxadmin.c @@ -345,7 +345,7 @@ main(int argc, char **argv) #else while (printf("MaxScale> ") && fgets(buf, 1024, stdin) != NULL) { - num = strlen(buf); + int num = strlen(buf); #endif /* Strip trailing \n\r */ for (int i = num - 1; buf[i] == '\r' || buf[i] == '\n'; i--) From d53a6d28992aee0cf589911c628ebfb87912e370 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Fri, 21 Oct 2016 00:34:21 +0300 Subject: [PATCH 06/34] MXS-942: Don't return information_schema as the parsed database When a DESCRIBE or a SHOW COLUMNS IN
query is done, the actual query is performed on tables in the information_schema database. This might be what actually happens on the backend server but this information is not really useful when we need to know which database the query targets. By passing the actual table names instead of the underlying table names, the schemarouter is able to detect where these statements should be routed. --- .../qc_mysqlembedded/qc_mysqlembedded.cc | 26 +++++++++++++++---- query_classifier/qc_sqlite/qc_sqlite.c | 4 +-- .../qc_sqlite/sqlite-src-3110100/src/parse.y | 6 ++++- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/query_classifier/qc_mysqlembedded/qc_mysqlembedded.cc b/query_classifier/qc_mysqlembedded/qc_mysqlembedded.cc index df86d4f47..3fd8573ba 100644 --- a/query_classifier/qc_mysqlembedded/qc_mysqlembedded.cc +++ b/query_classifier/qc_mysqlembedded/qc_mysqlembedded.cc @@ -95,7 +95,7 @@ static parsing_info_t* parsing_info_init(void (*donefun)(void *)); static void parsing_info_set_plain_str(void* ptr, char* str); /** Free THD context and close MYSQL */ static void parsing_info_done(void* ptr); -static void* skygw_get_affected_tables(void* lexptr); +static TABLE_LIST* skygw_get_affected_tables(void* lexptr); static bool ensure_query_is_parsed(GWBUF* query); static bool parse_query(GWBUF* querybuf); static bool query_is_parsed(GWBUF* buf); @@ -1063,7 +1063,7 @@ LEX* get_lex(GWBUF* querybuf) * @param thd Pointer to a valid THD * @return Pointer to the head of the TABLE_LIST chain or NULL in case of an error */ -static void* skygw_get_affected_tables(void* lexptr) +static TABLE_LIST* skygw_get_affected_tables(void* lexptr) { LEX* lex = (LEX*) lexptr; @@ -1073,7 +1073,23 @@ static void* skygw_get_affected_tables(void* lexptr) return NULL; } - return (void*) lex->current_select->table_list.first; + TABLE_LIST *tbl = lex->current_select->table_list.first; + + if (tbl && tbl->schema_select_lex && tbl->schema_select_lex->table_list.elements && + lex->sql_command != SQLCOM_SHOW_KEYS) + { + /** + * Some statements e.g. EXPLAIN or SHOW COLUMNS give `information_schema` + * as the underlying table and the table in the query is stored in + * @c schema_select_lex. + * + * SHOW [KEYS | INDEX] does the reverse so we need to skip the + * @c schema_select_lex when processing a SHOW [KEYS | INDEX] statement. + */ + tbl = tbl->schema_select_lex->table_list.first; + } + + return tbl; } /** @@ -1111,7 +1127,7 @@ char** qc_get_table_names(GWBUF* querybuf, int* tblsize, bool fullnames) while (lex->current_select) { - tbl = (TABLE_LIST*) skygw_get_affected_tables(lex); + tbl = skygw_get_affected_tables(lex); while (tbl) { @@ -1803,7 +1819,7 @@ char** qc_get_database_names(GWBUF* querybuf, int* size) while (lex->current_select) { - tbl = lex->current_select->table_list.first; + tbl = skygw_get_affected_tables(lex); while (tbl) { diff --git a/query_classifier/qc_sqlite/qc_sqlite.c b/query_classifier/qc_sqlite/qc_sqlite.c index 6253b2a33..33d67de28 100644 --- a/query_classifier/qc_sqlite/qc_sqlite.c +++ b/query_classifier/qc_sqlite/qc_sqlite.c @@ -1629,7 +1629,7 @@ void maxscaleExplain(Parse* pParse, SrcList* pName) info->status = QC_QUERY_PARSED; info->types = QUERY_TYPE_READ; - update_names(info, "information_schema", "COLUMNS"); + update_names(info, pName->a[0].zDatabase, pName->a[0].zName); append_affected_field(info, "COLUMN_DEFAULT COLUMN_KEY COLUMN_NAME " "COLUMN_TYPE EXTRA IS_NULLABLE"); @@ -2191,7 +2191,7 @@ extern void maxscaleShow(Parse* pParse, MxsShow* pShow) case MXS_SHOW_COLUMNS: { info->types = QUERY_TYPE_READ; - update_names(info, "information_schema", "COLUMNS"); + update_names(info, zDatabase, zName); if (pShow->data == MXS_SHOW_COLUMNS_FULL) { append_affected_field(info, diff --git a/query_classifier/qc_sqlite/sqlite-src-3110100/src/parse.y b/query_classifier/qc_sqlite/sqlite-src-3110100/src/parse.y index 2bff7038f..5b9f888f0 100644 --- a/query_classifier/qc_sqlite/sqlite-src-3110100/src/parse.y +++ b/query_classifier/qc_sqlite/sqlite-src-3110100/src/parse.y @@ -3010,13 +3010,17 @@ like_or_where_opt ::= WHERE expr. %type show {MxsShow} -show(A) ::= SHOW full_opt(X) COLUMNS from_or_in nm(Y) dbnm(Z) from_or_in_db_opt like_or_where_opt . { +show(A) ::= SHOW full_opt(X) COLUMNS from_or_in nm(Y) dbnm(Z) from_or_in_db_opt(W) like_or_where_opt . { A.what = MXS_SHOW_COLUMNS; A.data = X; if (Z.z) { A.pName = &Z; A.pDatabase = &Y; } + else if (W.z) { + A.pName = &Y; + A.pDatabase = &W; + } else { A.pName = &Y; A.pDatabase = NULL; From 367d4407dbcd5c7708d6c82d265d4d716d7fe0a6 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Mon, 24 Oct 2016 12:47:05 +0300 Subject: [PATCH 07/34] Fix build failures when using C++11 Some parts of the code weren't C++11 compatible and the build would fail with newer compilers. --- server/modules/include/mysql_client_server_protocol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/modules/include/mysql_client_server_protocol.h b/server/modules/include/mysql_client_server_protocol.h index 9cf49689b..6534a81c4 100644 --- a/server/modules/include/mysql_client_server_protocol.h +++ b/server/modules/include/mysql_client_server_protocol.h @@ -62,7 +62,7 @@ #include #include -#define GW_MYSQL_VERSION "5.5.5-10.0.0 "MAXSCALE_VERSION"-maxscale" +#define GW_MYSQL_VERSION "5.5.5-10.0.0 " MAXSCALE_VERSION "-maxscale" #define GW_MYSQL_LOOP_TIMEOUT 300000000 #define GW_MYSQL_READ 0 #define GW_MYSQL_WRITE 1 From 1388686fb7b2b23ea0811be9410e5cbee3cc7abd Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Wed, 26 Oct 2016 17:36:48 +0300 Subject: [PATCH 08/34] MXS-957: Fix temporary table detection Temporary tables which were created from joins with other temporary tables weren't properly detected as CREATE TEMPORARY TABLE statements. --- .../routing/readwritesplit/readwritesplit.c | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index 9cdb72388..3de469452 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -97,7 +97,7 @@ static backend_ref_t *check_candidate_bref(backend_ref_t *candidate_bref, backend_ref_t *new_bref, select_criteria_t sc); -static qc_query_type_t is_read_tmp_table(ROUTER_CLIENT_SES *router_cli_ses, +static bool is_read_tmp_table(ROUTER_CLIENT_SES *router_cli_ses, GWBUF *querybuf, qc_query_type_t type); static void check_create_tmp_table(ROUTER_CLIENT_SES *router_cli_ses, @@ -1560,9 +1560,9 @@ void check_drop_tmp_table(ROUTER_CLIENT_SES *router_cli_ses, GWBUF *querybuf, * @param type The type of the query resolved so far * @return The type of the query */ -static qc_query_type_t is_read_tmp_table(ROUTER_CLIENT_SES *router_cli_ses, +static bool is_read_tmp_table(ROUTER_CLIENT_SES *router_cli_ses, GWBUF *querybuf, - qc_query_type_t type) + qc_query_type_t qtype) { bool target_tmp_table = false; @@ -1571,20 +1571,20 @@ static qc_query_type_t is_read_tmp_table(ROUTER_CLIENT_SES *router_cli_ses, char *dbname; char hkey[MYSQL_DATABASE_MAXLEN + MYSQL_TABLE_MAXLEN + 2]; MYSQL_session *data; - qc_query_type_t qtype = type; + bool rval = false; rses_property_t *rses_prop_tmp; if (router_cli_ses == NULL || querybuf == NULL) { MXS_ERROR("[%s] Error: NULL parameters passed: %p %p", __FUNCTION__, router_cli_ses, querybuf); - return type; + return false; } if (router_cli_ses->client_dcb == NULL) { MXS_ERROR("[%s] Error: Client DCB is NULL.", __FUNCTION__); - return type; + return false; } rses_prop_tmp = router_cli_ses->rses_properties[RSES_PROP_TYPE_TMPTABLES]; @@ -1593,7 +1593,7 @@ static qc_query_type_t is_read_tmp_table(ROUTER_CLIENT_SES *router_cli_ses, if (data == NULL) { MXS_ERROR("[%s] Error: User data in client DBC is NULL.", __FUNCTION__); - return qtype; + return false; } dbname = (char *)data->db; @@ -1617,7 +1617,7 @@ static qc_query_type_t is_read_tmp_table(ROUTER_CLIENT_SES *router_cli_ses, if (hashtable_fetch(rses_prop_tmp->rses_prop_data.temp_tables, hkey)) { /**Query target is a temporary table*/ - qtype = QUERY_TYPE_READ_TMP_TABLE; + rval = true; MXS_INFO("Query targets a temporary table: %s", hkey); break; } @@ -1635,7 +1635,7 @@ static qc_query_type_t is_read_tmp_table(ROUTER_CLIENT_SES *router_cli_ses, free(tbl); } - return qtype; + return rval; } /** @@ -1969,9 +1969,9 @@ static bool route_single_stmt(ROUTER_INSTANCE *inst, ROUTER_CLIENT_SES *rses, (packet_type == MYSQL_COM_QUERY || packet_type == MYSQL_COM_DROP_DB)) { check_drop_tmp_table(rses, querybuf, qtype); - if (packet_type == MYSQL_COM_QUERY) + if (packet_type == MYSQL_COM_QUERY && is_read_tmp_table(rses, querybuf, qtype)) { - qtype = is_read_tmp_table(rses, querybuf, qtype); + qtype |= QUERY_TYPE_MASTER_READ; } } check_create_tmp_table(rses, querybuf, qtype); From 0f68fa60288d1c995ba716a3fcaedf278fa085d3 Mon Sep 17 00:00:00 2001 From: MassimilianoPinto Date: Fri, 28 Oct 2016 16:26:43 +0200 Subject: [PATCH 09/34] Removing error messages while executing RESET SLAVE Removing error messages while executing RESET SLAVE --- server/modules/routing/binlog/blr_slave.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/server/modules/routing/binlog/blr_slave.c b/server/modules/routing/binlog/blr_slave.c index 83d0bc1a7..c6b851904 100644 --- a/server/modules/routing/binlog/blr_slave.c +++ b/server/modules/routing/binlog/blr_slave.c @@ -876,10 +876,16 @@ blr_slave_query(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue) spinlock_acquire(&router->lock); + /* Set the BLRM_UNCONFIGURED state */ router->master_state = BLRM_UNCONFIGURED; blr_master_set_empty_config(router); blr_master_free_config(current_master); + /* Remove any error message and errno */ + MXS_FREE(router->m_errmsg); + router->m_errmsg = NULL; + router->m_errno = 0; + spinlock_release(&router->lock); if (removed_cfg == -1) From 01f3b35fad6543e3a8b15a017e5570194218d50b Mon Sep 17 00:00:00 2001 From: MassimilianoPinto Date: Mon, 31 Oct 2016 09:11:51 +0100 Subject: [PATCH 10/34] MXS-961: error in replication stream and checksum calculation When checksum is in use and there is an error in replication stream master connection the blr_terminate_master_replication has no effect. MXS-961: The checksum detection calls blr_master_delayed_connect(router); and connection is scheduled again. The fix will break the main loop as soon as the error indicator byte is seen and no other computation will be done (such as checksum) --- server/modules/routing/binlog/blr_master.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/server/modules/routing/binlog/blr_master.c b/server/modules/routing/binlog/blr_master.c index ba9e16e80..699e008c8 100644 --- a/server/modules/routing/binlog/blr_master.c +++ b/server/modules/routing/binlog/blr_master.c @@ -1063,7 +1063,13 @@ blr_handle_binlog_record(ROUTER_INSTANCE *router, GWBUF *pkt) } else { + /* Terminate replication and exit from main loop */ blr_terminate_master_replication(router, ptr, len); + + gwbuf_free(pkt); + pkt = NULL; + + break; } if (hdr.ok == 0) From 5ca0c730a1633ff8e6b8b5d194c5e422cb62cf69 Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Fri, 28 Oct 2016 13:48:22 +0300 Subject: [PATCH 11/34] MXS-960: In BRL, accept passwords with "," in them. strtok_r replaced with a function that ignores delims that appear within any of the MySQL quotes. --- server/modules/routing/binlog/blr_slave.c | 155 +++++++++++++++++++++- 1 file changed, 149 insertions(+), 6 deletions(-) diff --git a/server/modules/routing/binlog/blr_slave.c b/server/modules/routing/binlog/blr_slave.c index c6b851904..f42606ac2 100644 --- a/server/modules/routing/binlog/blr_slave.c +++ b/server/modules/routing/binlog/blr_slave.c @@ -84,6 +84,7 @@ #include #include +static char* get_next_token(char *str, const char* delim, char **saveptr); extern int load_mysql_users(SERVICE *service); extern void blr_master_close(ROUTER_INSTANCE* router); extern int blr_file_new_binlog(ROUTER_INSTANCE *router, char *file); @@ -4282,6 +4283,148 @@ blr_set_master_password(ROUTER_INSTANCE *router, char *password) return 0; } +/** + * Get next token + * + * Works exactly like strtok_t except that a delim character which appears + * anywhere within quotes is ignored. For instance, if delim is "," then + * a string like "MASTER_USER='maxscale_repl_user',MASTER_PASSWORD='a,a'" + * will be tokenized into the following two tokens: + * + * MASTER_USER='maxscale_repl_user' + * MASTER_PASSWORD='a,a' + * + * @see strtok_r + */ +static char* get_next_token(char *str, const char* delim, char **saveptr) +{ + if (str) + { + *saveptr = str; + } + + if (!*saveptr) + { + return NULL; + } + + bool delim_found = true; + + // Skip any delims in the beginning. + while (**saveptr && delim_found) + { + const char* d = delim; + + while (*d) + { + if (*d == **saveptr) + { + break; + } + + ++d; + } + + if (*d == 0) + { + delim_found = false; + } + else + { + ++*saveptr; + } + } + + if (!**saveptr) + { + return NULL; + } + + delim_found = false; + + char *token = *saveptr; + char *p = *saveptr; + + char quote = 0; + + while (*p && !delim_found) + { + switch (*p) + { + case '\'': + case '"': + case '`': + if (!quote) + { + quote = *p; + } + else if (quote == *p) + { + quote = 0; + } + break; + + default: + if (!quote) + { + const char *d = delim; + while (*d && !delim_found) + { + if (*p == *d) + { + delim_found = true; + *p = 0; + } + else + { + ++d; + } + } + } + } + + ++p; + } + + if (*p == 0) + { + *saveptr = NULL; + } + else if (delim_found) + { + *saveptr = p; + + delim_found = true; + + while (**saveptr && delim_found) + { + const char *d = delim; + while (*d) + { + if (**saveptr == *d) + { + break; + } + else + { + ++d; + } + } + + if (*d == 0) + { + delim_found = false; + } + else + { + ++*saveptr; + } + } + } + + return token; +} + /** * Parse a CHANGE MASTER TO SQL command * @@ -4296,7 +4439,7 @@ blr_parse_change_master_command(char *input, char *error_string, CHANGE_MASTER_O char *sep = ","; char *word, *brkb; - if ((word = strtok_r(input, sep, &brkb)) == NULL) + if ((word = get_next_token(input, sep, &brkb)) == NULL) { sprintf(error_string, "Unable to parse query [%s]", input); return 1; @@ -4310,7 +4453,7 @@ blr_parse_change_master_command(char *input, char *error_string, CHANGE_MASTER_O } } - while ((word = strtok_r(NULL, sep, &brkb)) != NULL) + while ((word = get_next_token(NULL, sep, &brkb)) != NULL) { /* parse options key=val */ if (blr_handle_change_master_token(word, error_string, config)) @@ -4334,12 +4477,12 @@ static int blr_handle_change_master_token(char *input, char *error, CHANGE_MASTER_OPTIONS *config) { /* space+TAB+= */ - char *sep = " ="; + char *sep = " \t="; char *word, *brkb; char *value = NULL; char **option_field = NULL; - if ((word = strtok_r(input, sep, &brkb)) == NULL) + if ((word = get_next_token(input, sep, &brkb)) == NULL) { sprintf(error, "error parsing %s", brkb); return 1; @@ -4378,7 +4521,7 @@ static char * blr_get_parsed_command_value(char *input) { /* space+TAB+= */ - char *sep = " ="; + char *sep = " \t="; char *ret = NULL; char *word; char *value = NULL; @@ -4392,7 +4535,7 @@ blr_get_parsed_command_value(char *input) return ret; } - if ((word = strtok_r(NULL, sep, &input)) != NULL) + if ((word = get_next_token(NULL, sep, &input)) != NULL) { char *ptr; From 6835fe4db97d80bbd60d54ce9d92c13dacbeeab9 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Mon, 31 Oct 2016 14:55:34 +0200 Subject: [PATCH 12/34] Use pre-2.1 freeing functions The binlogrouter code used the new MXS_FREE macro from the 2.1 version. --- server/modules/routing/binlog/blr_slave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/modules/routing/binlog/blr_slave.c b/server/modules/routing/binlog/blr_slave.c index f42606ac2..44bf812a7 100644 --- a/server/modules/routing/binlog/blr_slave.c +++ b/server/modules/routing/binlog/blr_slave.c @@ -883,7 +883,7 @@ blr_slave_query(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue) blr_master_free_config(current_master); /* Remove any error message and errno */ - MXS_FREE(router->m_errmsg); + free(router->m_errmsg); router->m_errmsg = NULL; router->m_errno = 0; From 7b4561e3f4134514026026a864f86fc931710d42 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Mon, 31 Oct 2016 14:52:15 +0200 Subject: [PATCH 13/34] Fix COM_CHANGE_USER handling in MySQLBackend When a COM_CHANGE_USER statement was executed, the new user credentials were copied after the authentication message was sent. This caused the COM_CHANGE_USER to always succeed the first time as it used the current credentials. The user credentials would always lag behind by one. --- server/modules/protocol/mysql_backend.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/server/modules/protocol/mysql_backend.c b/server/modules/protocol/mysql_backend.c index 2a9bbbb7d..2d1243413 100644 --- a/server/modules/protocol/mysql_backend.c +++ b/server/modules/protocol/mysql_backend.c @@ -1900,13 +1900,19 @@ static int gw_change_user(DCB *backend, } else { - rv = gw_send_change_user_to_backend(database, username, client_sha1, backend_protocol); - /* - * Now copy new data into user session - */ + /** This assumes that authentication will succeed. If authentication fails, + * the internal session will represent the wrong user. This is wrong and + * a check whether the COM_CHANGE_USER succeeded should be done in the + * backend protocol reply handling. + * + * For the time being, it is simpler to assume a COM_CHANGE_USER will always + * succeed if the authentication in MaxScale is successful. In practice this + * might not be true but these cases are handled by the router modules + * and the servers that fail to execute the COM_CHANGE_USER are discarded. */ strcpy(current_session->user, username); strcpy(current_session->db, database); memcpy(current_session->client_sha1, client_sha1, sizeof(current_session->client_sha1)); + rv = gw_send_change_user_to_backend(database, username, client_sha1, backend_protocol); } retblock: From 69b8cef95cc259d67002065f97c908c14aba8116 Mon Sep 17 00:00:00 2001 From: MassimilianoPinto Date: Mon, 31 Oct 2016 11:48:21 +0100 Subject: [PATCH 14/34] MXS-958: the specified binlog file is created in $binlogdir while configuring binlog server for the first time While configuring binlog server for the first time, master.ini not existent, the specified MASTER_LOG_FILE is created in the $binlogdir. If START SLAVE command is not issued the replication can start after restarting maxscale as the binlog file exists. --- server/modules/routing/binlog/blr_slave.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/server/modules/routing/binlog/blr_slave.c b/server/modules/routing/binlog/blr_slave.c index 44bf812a7..b037dd8d2 100644 --- a/server/modules/routing/binlog/blr_slave.c +++ b/server/modules/routing/binlog/blr_slave.c @@ -1033,6 +1033,17 @@ blr_slave_query(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue) router->master_state = BLRM_SLAVE_STOPPED; spinlock_release(&router->lock); + + /* + * The binlog server has just been configured + * master.ini file written in router->binlogdir. + * Now create the binlogfile specified in MASTER_LOG_FILE + */ + + if (blr_file_new_binlog(router, router->binlog_name)) + { + MXS_INFO("%s: 'master.ini' created, binlog file '%s' created", router->service->name, router->binlog_name); + } } if (!router->trx_safe) From 7dd498f0b942c94b5024198c45ba35489fc87cb9 Mon Sep 17 00:00:00 2001 From: MassimilianoPinto Date: Mon, 31 Oct 2016 14:18:38 +0100 Subject: [PATCH 15/34] =?UTF-8?q?MXS-958:=20a=20new=20binlog=20file=20is?= =?UTF-8?q?=20created=20after=20CHANGE=20MASTER=20if=20there=20is=E2=80=A6?= =?UTF-8?q?=20=E2=80=A6=20no=20pending=20transaction?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Binlog server is already configured: if there is no pending transaction a new binlog file is created after CHANGE MASTER. If as START SLAVE is issued replication starts as usuale. If maxscale is restarted the replication starts using the new created file. --- server/modules/routing/binlog/blr_slave.c | 51 ++++++++++++----------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/server/modules/routing/binlog/blr_slave.c b/server/modules/routing/binlog/blr_slave.c index b037dd8d2..b109acaf5 100644 --- a/server/modules/routing/binlog/blr_slave.c +++ b/server/modules/routing/binlog/blr_slave.c @@ -1044,11 +1044,8 @@ blr_slave_query(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue) { MXS_INFO("%s: 'master.ini' created, binlog file '%s' created", router->service->name, router->binlog_name); } - } - - if (!router->trx_safe) - { blr_master_free_config(current_master); + return blr_slave_send_ok(router, slave); } if (router->trx_safe && router->pending_transaction) @@ -1064,17 +1061,24 @@ blr_slave_query(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue) return blr_slave_send_warning_message(router, slave, message); } - else - { - blr_master_free_config(current_master); - return blr_slave_send_ok(router, slave); - } + } - } - else + blr_master_free_config(current_master); + + /* + * The CHAMGE MASTER command might specify a new binlog file. + * Let's create the binlogfile specified in MASTER_LOG_FILE + */ + + if (strlen(router->prevbinlog) && strcmp(router->prevbinlog, router->binlog_name)) { - return blr_slave_send_ok(router, slave); - } + if (blr_file_new_binlog(router, router->binlog_name)) + { + MXS_INFO("%s: created new binlog file '%s' by 'CHANGE MASTER TO' command", + router->service->name, router->binlog_name); + } + } + return blr_slave_send_ok(router, slave); } } } @@ -3456,24 +3460,23 @@ blr_start_slave(ROUTER_INSTANCE* router, ROUTER_SLAVE* slave) /* Send warning message to mysql command */ blr_slave_send_warning_message(router, slave, msg); } + } - /* create new one */ + /* No file has beem opened, create a new binlog file */ + if (router->binlog_fd == -1) + { blr_file_new_binlog(router, router->binlog_name); } else { - if (router->binlog_fd == -1) - { - /* create new one */ - blr_file_new_binlog(router, router->binlog_name); - } - else - { - /* use existing one */ - blr_file_append(router, router->binlog_name); - } + /* A new binlog file has been created by CHANGE MASTER TO + * if no pending transaction is detected. + * use the existing one. + */ + blr_file_use_binlog(router, router->binlog_name); } + /* Start the replication from Master server */ blr_start_master(router); MXS_NOTICE("%s: START SLAVE executed by %s@%s. Trying connection to master %s:%d, " From 37a2c8cecff7e15875f8b5f266b7eb4cb000e143 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Tue, 1 Nov 2016 10:28:15 +0200 Subject: [PATCH 16/34] Use new function name in blr_slave The blr_file_use_binlog function no longer exists and blr_file_append should be used instead. --- server/modules/routing/binlog/blr_slave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/modules/routing/binlog/blr_slave.c b/server/modules/routing/binlog/blr_slave.c index b109acaf5..64616c674 100644 --- a/server/modules/routing/binlog/blr_slave.c +++ b/server/modules/routing/binlog/blr_slave.c @@ -3473,7 +3473,7 @@ blr_start_slave(ROUTER_INSTANCE* router, ROUTER_SLAVE* slave) * if no pending transaction is detected. * use the existing one. */ - blr_file_use_binlog(router, router->binlog_name); + blr_file_append(router, router->binlog_name); } /* Start the replication from Master server */ From 572d466fad7fe0befd7ad648cc00e37771b316de Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Wed, 2 Nov 2016 12:12:36 +0200 Subject: [PATCH 17/34] Fix read-only mode error handling in readwritesplit In read-only modes, readwritesplit would always continue even if an active statement execution was in progress. --- .../routing/readwritesplit/readwritesplit.c | 53 +++++++++---------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index 3de469452..61292c7e3 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -4314,23 +4314,7 @@ static void handleError(ROUTER *instance, void *router_session, SERVER *srv = rses->rses_master_ref->bref_backend->backend_server; backend_ref_t *bref; bref = get_bref_from_dcb(rses, problem_dcb); - if (bref != NULL) - { - CHK_BACKEND_REF(bref); - bref_clear_state(bref, BREF_IN_USE); - bref_set_state(bref, BREF_CLOSED); - if (BREF_IS_WAITING_RESULT(bref)) - { - bref_clear_state(bref, BREF_WAITING_RESULT); - } - } - else - { - MXS_ERROR("server %s:%d lost the " - "master status but could not locate the " - "corresponding backend ref.", - srv->name, srv->port); - } + bool can_continue = false; if (rses->rses_config.rw_master_failure_mode != RW_FAIL_INSTANTLY && (bref == NULL || !BREF_IS_WAITING_RESULT(bref))) @@ -4344,20 +4328,33 @@ static void handleError(ROUTER *instance, void *router_session, * can't be sure whether it was executed or not. In this * case the safest thing to do is to close the client * connection. */ - *succp = true; + can_continue = true; + } + else if (!srv->master_err_is_logged) + { + MXS_ERROR("Server %s:%d lost the master status. Readwritesplit " + "service can't locate the master. Client sessions " + "will be closed.", srv->name, srv->port); + srv->master_err_is_logged = true; + } + + *succp = can_continue; + + if (bref != NULL) + { + CHK_BACKEND_REF(bref); + bref_clear_state(bref, BREF_IN_USE); + bref_set_state(bref, BREF_CLOSED); + + if (BREF_IS_WAITING_RESULT(bref)) + { + bref_clear_state(bref, BREF_WAITING_RESULT); + } } else { - if (!srv->master_err_is_logged) - { - MXS_ERROR("server %s:%d lost the " - "master status. Readwritesplit " - "service can't locate the master. " - "Client sessions will be closed.", - srv->name, srv->port); - srv->master_err_is_logged = true; - } - *succp = false; + MXS_ERROR("Server %s:%d lost the master status but could not locate the " + "corresponding backend ref.", srv->name, srv->port); } } else From 3870b81244c126c13f8dbfb896f01b33476d383b Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Wed, 2 Nov 2016 19:55:36 +0200 Subject: [PATCH 18/34] Prevent unintentional reconnections to the master server If a master_failure_mode was set to error_on_write, a reconnection to the old master would happen after the following events: - Master server fails and the connection is closed - The master server recovers - A slave fails and the connection is closed - A replacement for the slave is searched If these events took place, the master would be taken back into use with an inconsistent session state. --- server/modules/include/readwritesplit.h | 4 ++-- .../modules/routing/readwritesplit/readwritesplit.c | 11 +++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/server/modules/include/readwritesplit.h b/server/modules/include/readwritesplit.h index 81d2d450a..c58136b7f 100644 --- a/server/modules/include/readwritesplit.h +++ b/server/modules/include/readwritesplit.h @@ -54,7 +54,7 @@ typedef enum bref_state BREF_WAITING_RESULT = 0x02, /*< for session commands only */ BREF_QUERY_ACTIVE = 0x04, /*< for other queries */ BREF_CLOSED = 0x08, - BREF_SESCMD_FAILED = 0x10 /*< Backend references that should be dropped */ + BREF_FATAL_FAILURE = 0x10 /*< Backend references that should be dropped */ } bref_state_t; #define BREF_IS_NOT_USED(s) ((s)->bref_state & ~BREF_IN_USE) @@ -62,7 +62,7 @@ typedef enum bref_state #define BREF_IS_WAITING_RESULT(s) ((s)->bref_num_result_wait > 0) #define BREF_IS_QUERY_ACTIVE(s) ((s)->bref_state & BREF_QUERY_ACTIVE) #define BREF_IS_CLOSED(s) ((s)->bref_state & BREF_CLOSED) -#define BREF_HAS_FAILED(s) ((s)->bref_state & BREF_SESCMD_FAILED) +#define BREF_HAS_FAILED(s) ((s)->bref_state & BREF_FATAL_FAILURE) typedef enum backend_type_t { diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index 61292c7e3..b13ff7155 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -3394,7 +3394,7 @@ static GWBUF *sescmd_cursor_process_replies(GWBUF *replybuf, bref_clear_state(bref, BREF_QUERY_ACTIVE); bref_clear_state(bref, BREF_IN_USE); bref_set_state(bref, BREF_CLOSED); - bref_set_state(bref, BREF_SESCMD_FAILED); + bref_set_state(bref, BREF_FATAL_FAILURE); if (bref->bref_dcb) { dcb_close(bref->bref_dcb); @@ -3435,7 +3435,7 @@ static GWBUF *sescmd_cursor_process_replies(GWBUF *replybuf, bref_clear_state(&ses->rses_backend_ref[i], BREF_QUERY_ACTIVE); bref_clear_state(&ses->rses_backend_ref[i], BREF_IN_USE); bref_set_state(&ses->rses_backend_ref[i], BREF_CLOSED); - bref_set_state(bref, BREF_SESCMD_FAILED); + bref_set_state(bref, BREF_FATAL_FAILURE); if (ses->rses_backend_ref[i].bref_dcb) { dcb_close(ses->rses_backend_ref[i].bref_dcb); @@ -4329,6 +4329,13 @@ static void handleError(ROUTER *instance, void *router_session, * case the safest thing to do is to close the client * connection. */ can_continue = true; + + if (bref) + { + /** Mark the master as failed so that it won't be + * taken into use if it comes back up. */ + bref_set_state(bref, BREF_FATAL_FAILURE); + } } else if (!srv->master_err_is_logged) { From 8f6c1bdd5b424b8615d1da523d26d8a3093dfa2c Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Thu, 3 Nov 2016 10:16:46 +0200 Subject: [PATCH 19/34] Add more error messages for unexpected situations Some error messages were logged at INFO level and some had conditions that prevent the logging. Removed these restrictions that an error situation is always logged. --- server/modules/protocol/mysql_backend.c | 6 +++++ .../routing/readwritesplit/readwritesplit.c | 26 +++++++------------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/server/modules/protocol/mysql_backend.c b/server/modules/protocol/mysql_backend.c index 2d1243413..bfde4ea1e 100644 --- a/server/modules/protocol/mysql_backend.c +++ b/server/modules/protocol/mysql_backend.c @@ -1368,6 +1368,12 @@ static int gw_error_backend_event(DCB *dcb) CHK_SESSION(session); if (SESSION_STATE_DUMMY == session->state) { + if (dcb->persistentstart == 0) + { + /** Not a persistent connection, something is wrong. */ + MXS_ERROR("EPOLLERR event on a non-persistent DCB with no session. " + "Closing connection."); + } dcb_close(dcb); return 1; } diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index b13ff7155..9e7746c0b 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -3132,11 +3132,9 @@ static bool select_connect_backend_servers(backend_ref_t **p_master_ref, /** Failure cases */ else { - if (slaves_connected < min_nslaves) - { - MXS_ERROR("Couldn't establish required amount of " - "slave connections for router session."); - } + MXS_ERROR("Couldn't establish required amount of slave connections for " + "router session. Would need between %d and %d slaves but only have %d.", + min_nslaves, max_nslaves, slaves_connected); /** Clean up connections */ for (int i = 0; i < router_nservers; i++) @@ -3441,11 +3439,11 @@ static GWBUF *sescmd_cursor_process_replies(GWBUF *replybuf, dcb_close(ses->rses_backend_ref[i].bref_dcb); } *reconnect = true; - MXS_INFO("Disabling slave %s:%d, result differs from " - "master's result. Master: %d Slave: %d", - ses->rses_backend_ref[i].bref_backend->backend_server->name, - ses->rses_backend_ref[i].bref_backend->backend_server->port, - bref->reply_cmd, ses->rses_backend_ref[i].reply_cmd); + MXS_WARNING("Disabling slave %s:%d, result differs from " + "master's result. Master: %0x Slave: %0x", + ses->rses_backend_ref[i].bref_backend->backend_server->name, + ses->rses_backend_ref[i].bref_backend->backend_server->port, + bref->reply_cmd, ses->rses_backend_ref[i].reply_cmd); } } } @@ -4368,18 +4366,14 @@ static void handleError(ROUTER *instance, void *router_session, { /** * This is called in hope of getting replacement for - * failed slave(s). This call may free rses. + * failed slave(s). */ *succp = handle_error_new_connection(inst, &rses, problem_dcb, errmsgbuf); } dcb_close(problem_dcb); close_dcb = false; - /* Free the lock if rses still exists */ - if (rses) - { - rses_end_locked_router_action(rses); - } + rses_end_locked_router_action(rses); break; } From 87e94f6bc6e09d274bba70d62083b1349688ff33 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Fri, 4 Nov 2016 10:04:21 +0200 Subject: [PATCH 20/34] Fix debug assertion on packet length in blr_master.c A debug assertion failed due to a NULL buffer but a non-zero packet length. This was caused by a missing reset of the packet length after freeing the buffers. --- server/modules/routing/binlog/blr_master.c | 1 + 1 file changed, 1 insertion(+) diff --git a/server/modules/routing/binlog/blr_master.c b/server/modules/routing/binlog/blr_master.c index 699e008c8..53e251c83 100644 --- a/server/modules/routing/binlog/blr_master.c +++ b/server/modules/routing/binlog/blr_master.c @@ -1068,6 +1068,7 @@ blr_handle_binlog_record(ROUTER_INSTANCE *router, GWBUF *pkt) gwbuf_free(pkt); pkt = NULL; + pkt_length = 0; break; } From c30d0dfc9d6d42bd0b2055a668f9bb5870dff9f9 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Sat, 5 Nov 2016 11:28:18 +0200 Subject: [PATCH 21/34] Add more error logging to DCB handling If an illegal DCB close is done with a backend DCB, it will log the server where it was connected. This allows us to know whether the DCB was connected to a master or a slave. Added more debug assertions to readwritesplit code. The DCBs should never enter the DCB_STATE_DISCONNECTED. Removed useless debug log messages. The messages usually just flood the logs with no use to the developers. --- server/core/dcb.c | 38 ++++++++++++++++--- server/modules/protocol/mysql_backend.c | 12 ------ .../routing/readwritesplit/readwritesplit.c | 6 ++- 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/server/core/dcb.c b/server/core/dcb.c index 3e115d4f6..631d2b839 100644 --- a/server/core/dcb.c +++ b/server/core/dcb.c @@ -1747,6 +1747,38 @@ dcb_grab_writeq(DCB *dcb, bool first_time) return local_writeq; } +static void log_illegal_dcb(DCB *dcb) +{ + const char *connected_to; + + switch (dcb->dcb_role) + { + case DCB_ROLE_BACKEND_HANDLER: + connected_to = dcb->server->unique_name; + break; + + case DCB_ROLE_CLIENT_HANDLER: + connected_to = dcb->remote; + break; + + case DCB_ROLE_INTERNAL: + connected_to = "Internal DCB"; + break; + + case DCB_ROLE_SERVICE_LISTENER: + connected_to = dcb->service->name; + break; + + default: + connected_to = "Illegal DCB role"; + break; + } + + MXS_ERROR("[dcb_close] Error : Removing DCB %p but it is in state %s " + "which is not legal for a call to dcb_close. The DCB is connected to: %s", + dcb, STRDCBSTATE(dcb->state), connected_to); +} + /** * Removes dcb from poll set, and adds it to zombies list. As a consequence, * dcb first moves to DCB_STATE_NOPOLLING, and then to DCB_STATE_ZOMBIE state. @@ -1766,11 +1798,7 @@ dcb_close(DCB *dcb) if (DCB_STATE_UNDEFINED == dcb->state || DCB_STATE_DISCONNECTED == dcb->state) { - MXS_ERROR("%lu [dcb_close] Error : Removing DCB %p but was in state %s " - "which is not legal for a call to dcb_close. ", - pthread_self(), - dcb, - STRDCBSTATE(dcb->state)); + log_illegal_dcb(dcb); raise(SIGABRT); } diff --git a/server/modules/protocol/mysql_backend.c b/server/modules/protocol/mysql_backend.c index bfde4ea1e..aac99511d 100644 --- a/server/modules/protocol/mysql_backend.c +++ b/server/modules/protocol/mysql_backend.c @@ -1032,9 +1032,6 @@ gw_read_and_write(DCB *dcb, MYSQL_session local_session) { GWBUF* errbuf; bool succp; -#if defined(SS_DEBUG) - MXS_ERROR("Backend read error handling #2."); -#endif errbuf = mysql_create_custom_error(1, 0, "Read from backend failed"); @@ -1554,12 +1551,6 @@ static int gw_backend_hangup(DCB *dcb) /* dcb_close(dcb); */ goto retblock; } -#if defined(SS_DEBUG) - if (ses_state != SESSION_STATE_STOPPING) - { - MXS_ERROR("Backend hangup error handling."); - } -#endif router->handleError(router_instance, rsession, @@ -1572,9 +1563,6 @@ static int gw_backend_hangup(DCB *dcb) /** There are no required backends available, close session. */ if (!succp) { -#if defined(SS_DEBUG) - MXS_ERROR("Backend hangup -> closing session."); -#endif spinlock_acquire(&session->ses_lock); session->state = SESSION_STATE_STOPPING; spinlock_release(&session->ses_lock); diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index 9e7746c0b..07b83f956 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -1096,7 +1096,8 @@ static bool get_dcb(DCB **p_dcb, ROUTER_CLIENT_SES *rses, backend_type_t btype, { *p_dcb = backend_ref[i].bref_dcb; succp = true; - ss_dassert(backend_ref[i].bref_dcb->state != DCB_STATE_ZOMBIE); + ss_dassert(backend_ref[i].bref_dcb->state != DCB_STATE_ZOMBIE || + backend_ref[i].bref_dcb->state != DCB_STATE_DISCONNECTED); break; } } @@ -1228,7 +1229,8 @@ static bool get_dcb(DCB **p_dcb, ROUTER_CLIENT_SES *rses, backend_type_t btype, *p_dcb = master_bref->bref_dcb; succp = true; /** if bref is in use DCB should not be closed */ - ss_dassert(master_bref->bref_dcb->state != DCB_STATE_ZOMBIE); + ss_dassert(master_bref->bref_dcb->state != DCB_STATE_ZOMBIE || + master_bref->bref_dcb->state != DCB_STATE_DISCONNECTED); } else { From 3d4a7179c781d536dc61536510367d9b985413d3 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Sat, 5 Nov 2016 11:31:06 +0200 Subject: [PATCH 22/34] Prevent active readwritesplit sessions from connecting to a master If a readwritesplit session is active, it should never connect to a new master. This will lead to unexpected results as the session states aren't consistent. --- .../routing/readwritesplit/readwritesplit.c | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index 07b83f956..3e16abc67 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -145,7 +145,8 @@ static bool select_connect_backend_servers(backend_ref_t **p_master_ref, int max_rlag, select_criteria_t select_criteria, SESSION *session, - ROUTER_INSTANCE *router); + ROUTER_INSTANCE *router, + bool new_session); static bool get_dcb(DCB **dcb, ROUTER_CLIENT_SES *rses, backend_type_t btype, char *name, int max_rlag); @@ -829,7 +830,7 @@ static void *newSession(ROUTER *router_inst, SESSION *session) succp = select_connect_backend_servers(&master_ref, backend_ref, router_nservers, max_nslaves, max_slave_rlag, client_rses->rses_config.rw_slave_select_criteria, - session, router); + session, router, false); rses_end_locked_router_action(client_rses); @@ -2624,7 +2625,8 @@ static void clientReply(ROUTER *instance, void *router_session, GWBUF *writebuf, router_cli_ses->rses_config.rw_max_slave_replication_lag, router_cli_ses->rses_config.rw_slave_select_criteria, router_cli_ses->rses_master_ref->bref_dcb->session, - router_cli_ses->router); + router_cli_ses->router, + true); } } /** @@ -3011,7 +3013,8 @@ static bool select_connect_backend_servers(backend_ref_t **p_master_ref, int max_slave_rlag, select_criteria_t select_criteria, SESSION *session, - ROUTER_INSTANCE *router) + ROUTER_INSTANCE *router, + bool active_session) { if (p_master_ref == NULL || backend_ref == NULL) { @@ -3032,11 +3035,16 @@ static bool select_connect_backend_servers(backend_ref_t **p_master_ref, } /** - * Existing session : master is already chosen and connected. - * The function was called because new slave must be selected to replace - * failed one. + * New session: + * + * Connect to both master and slaves + * + * Existing session: + * + * Master is already connected or we don't have a master. The function was + * called because new slaves must be selected to replace failed ones. */ - bool master_connected = *p_master_ref != NULL; + bool master_connected = active_session || *p_master_ref != NULL; /** Check slave selection criteria and set compare function */ int (*p)(const void *, const void *) = criteria_cmpfun[select_criteria]; @@ -3085,19 +3093,12 @@ static bool select_connect_backend_servers(backend_ref_t **p_master_ref, } } /* take the master_host for master */ - else if (master_host && (serv == master_host->backend_server)) + else if (!master_connected && master_host && serv == master_host->backend_server) { - /** p_master_ref must be assigned with this backend_ref pointer - * because its original value may have been lost when backend - * references were sorted with qsort. */ - *p_master_ref = &backend_ref[i]; - - if (!master_connected) + if (connect_server(&backend_ref[i], session, false)) { - if (connect_server(&backend_ref[i], session, false)) - { - master_connected = true; - } + *p_master_ref = &backend_ref[i]; + master_connected = true; } } } @@ -4547,7 +4548,7 @@ static bool handle_error_new_connection(ROUTER_INSTANCE *inst, router_nservers, max_nslaves, max_slave_rlag, myrses->rses_config.rw_slave_select_criteria, - ses, inst); + ses, inst, true); } return_succp: From 263688d3af7f1c350b21e30c957082ca00278d3a Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Sun, 6 Nov 2016 07:03:07 +0200 Subject: [PATCH 23/34] Use proper server status macros in readwritesplit Some of the master server status checks didn't check whether the server was actually running. The macros in server.h should always be used instead of manually inspecting the server status. --- server/modules/routing/readwritesplit/readwritesplit.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index 3e16abc67..82a5c2115 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -3152,6 +3152,7 @@ static bool select_connect_backend_servers(backend_ref_t **p_master_ref, bref_clear_state(&backend_ref[i], BREF_WAITING_RESULT); } bref_clear_state(&backend_ref[i], BREF_IN_USE); + bref_set_state(&backend_ref[i], BREF_CLOSED); /** Decrease backend's connection counter. */ atomic_add(&backend_ref[i].bref_backend->backend_conn_count, -1); dcb_close(backend_ref[i].bref_dcb); @@ -4891,8 +4892,7 @@ static BACKEND *get_root_master(backend_ref_t *servers, int router_nservers) b = servers[i].bref_backend; - if ((b->backend_server->status & (SERVER_MASTER | SERVER_MAINT)) == - SERVER_MASTER) + if (SERVER_IS_MASTER(b->backend_server)) { if (master_host == NULL || (b->backend_server->depth < master_host->backend_server->depth)) @@ -4927,13 +4927,14 @@ static backend_ref_t *get_root_master_bref(ROUTER_CLIENT_SES *rses) bref = &rses->rses_backend_ref[i]; if (bref && BREF_IS_IN_USE(bref)) { + ss_dassert(!BREF_IS_CLOSED(bref) && !BREF_HAS_FAILED(bref)); if (bref == rses->rses_master_ref) { /** Store master state for better error reporting */ master.status = bref->bref_backend->backend_server->status; } - if (bref->bref_backend->backend_server->status & SERVER_MASTER) + if (SERVER_IS_MASTER(bref->bref_backend->backend_server)) { if (candidate_bref == NULL || (bref->bref_backend->backend_server->depth < From 83f3245f75b76caeb16207ae6d3d848d88843a0b Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Sun, 6 Nov 2016 10:25:56 +0200 Subject: [PATCH 24/34] Improve master routing failure error logging The error logging is now more detailed and tells why the connection is being closed. This should help the user figure out what is happening when write fails and the connection is closed. --- .../routing/readwritesplit/readwritesplit.c | 70 +++++++++++++++---- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index 82a5c2115..082a59465 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -1854,6 +1854,60 @@ static int routeQuery(ROUTER *instance, void *router_session, GWBUF *querybuf) return rval; } +/** + * @brief Log master write failure + * + * @param rses Router session + */ +static void log_master_routing_failure(ROUTER_CLIENT_SES *rses, DCB *master_dcb, + DCB *curr_master_dcb) +{ + char errmsg[MAX_SERVER_NAME_LEN * 2 + 100]; // Extra space for error message + + if (master_dcb && curr_master_dcb) + { + /** We found a master but it's not the same connection */ + ss_dassert(master_dcb != curr_master_dcb); + if (master_dcb->server != curr_master_dcb->server) + { + sprintf(errmsg, "Master server changed from '%s' to '%s'", + master_dcb->server->unique_name, + curr_master_dcb->server->unique_name); + } + else + { + ss_dassert(false); // Currently we don't reconnect to the master + sprintf(errmsg, "Connection to master '%s' was recreated", + curr_master_dcb->server->unique_name); + } + } + else if (master_dcb) + { + /** We have an original master connection but we couldn't find it */ + sprintf(errmsg, "The connection to master server '%s' is not available", + master_dcb->server->unique_name); + } + else + { + /** We never had a master connection, the session must be in read-only mode */ + if (rses->rses_config.rw_master_failure_mode != RW_FAIL_INSTANTLY) + { + sprintf(errmsg, "Session is in read-only mode because it was created " + "when no master was available"); + } + else + { + ss_dassert(false); // A session should always have a master reference + sprintf(errmsg, "Was supposed to route to master but couldn't " + "find master in a suitable state"); + } + } + + MXS_WARNING("[%s] Write query received from %s@%s. %s. Closing client connection.", + rses->router->service->name, rses->client_dcb->user, + rses->client_dcb->remote, errmsg); +} + /** * Routing function. Find out query type, backend type, and target DCB(s). * Then route query to found target(s). @@ -2277,26 +2331,14 @@ static bool route_single_stmt(ROUTER_INSTANCE *inst, ROUTER_CLIENT_SES *rses, } else { - if (succp && master_dcb != curr_master_dcb) - { - MXS_INFO("Was supposed to route to master but master has changed."); - } - else - { - MXS_INFO("Was supposed to route to master but couldn't find master" - " in a suitable state."); - } - + /** The original master is not available, we can't route the write */ if (rses->rses_config.rw_master_failure_mode == RW_ERROR_ON_WRITE) { - /** Old master is no longer available */ succp = send_readonly_error(rses->client_dcb); } else { - MXS_WARNING("[%s] Write query received from %s@%s when no master is " - "available, closing client connection.", inst->service->name, - rses->client_dcb->user, rses->client_dcb->remote); + log_master_routing_failure(rses, master_dcb, curr_master_dcb); succp = false; } From 5d930585f9d8c6b8eb93cbfd23e1e1fd5d94869d Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Tue, 8 Nov 2016 15:21:58 +0200 Subject: [PATCH 25/34] Use TLS for connector connections The monitors and services didn't use TLS when they connected to the backend servers. Since there has been no proof of instability, TLS should be enabled. --- server/core/mysql_utils.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/server/core/mysql_utils.c b/server/core/mysql_utils.c index 137c73898..db92ab592 100644 --- a/server/core/mysql_utils.c +++ b/server/core/mysql_utils.c @@ -161,15 +161,7 @@ MYSQL *mxs_mysql_real_connect(MYSQL *con, SERVER *server, const char *user, cons if (listener) { -#ifdef CONNECTOR_C_SSL_AND_OPENSSL_INTERFERENCE_SORTED_OUT - // TODO: No conclusive evidence yet, but tentatively it seems that when OpenSSL is - // TODO: used explicitly (backend SSL) and in conjunction with Connector-C, the - // TODO: latter SSL becomes unstable. So for the time being the monitors and - // TODO: services (fetch users) do not use SSL when connecting to the backend. - - // mysql_ssl_set always returns true. mysql_ssl_set(con, listener->ssl_key, listener->ssl_cert, listener->ssl_ca_cert, NULL, NULL); -#endif } return mysql_real_connect(con, server->name, user, passwd, NULL, server->port, NULL, 0); From 7f500feb9d61b05e7c631189b353e0aa2bd767c2 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Wed, 9 Nov 2016 06:42:52 +0200 Subject: [PATCH 26/34] MXS-710: Fix regression MaxScale would crash if two or more listeners used the same service and one of the listeners failed to start. --- server/core/service.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/server/core/service.c b/server/core/service.c index 1aa106eb3..83850a198 100644 --- a/server/core/service.c +++ b/server/core/service.c @@ -270,8 +270,6 @@ serviceStartPort(SERVICE *service, SERV_LISTENER *port) } if (loaded == -1) { - users_free(service->users); - service->users = NULL; dcb_close(port->listener); port->listener = NULL; goto retblock; @@ -353,10 +351,7 @@ serviceStartPort(SERVICE *service, SERV_LISTENER *port) if ((funcs = (GWPROTOCOL *)load_module(port->protocol, MODULE_PROTOCOL)) == NULL) { - users_free(service->users); - service->users = NULL; dcb_close(port->listener); - service->users = NULL; port->listener = NULL; MXS_ERROR("Unable to load protocol module %s. Listener " "for service %s not started.", @@ -389,11 +384,8 @@ serviceStartPort(SERVICE *service, SERV_LISTENER *port) MXS_ERROR("Failed to create session to service %s.", service->name); - users_free(service->users); - service->users = NULL; dcb_close(port->listener); port->listener = NULL; - service->users = NULL; goto retblock; } } @@ -403,8 +395,6 @@ serviceStartPort(SERVICE *service, SERV_LISTENER *port) port->port, port->protocol, service->name); - users_free(service->users); - service->users = NULL; dcb_close(port->listener); port->listener = NULL; } From 8d893b4e567a17ed2d7d143ffe8d1952d7466b23 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Wed, 9 Nov 2016 07:50:31 +0200 Subject: [PATCH 27/34] Reassing master reference after sorting backends The master reference used by the readwritesplit sessions needs to be reassigned if slave reconnection occurs. This happens because the reference refers to a certain place in the backend reference array instead of the actual backend reference and those places are mixed when the array is sorted. --- .../routing/readwritesplit/readwritesplit.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index 082a59465..3317f3ef7 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -3092,11 +3092,26 @@ static bool select_connect_backend_servers(backend_ref_t **p_master_ref, int (*p)(const void *, const void *) = criteria_cmpfun[select_criteria]; ss_dassert(p); + SERVER *old_master = *p_master_ref ? (*p_master_ref)->bref_backend->backend_server : NULL; + /** Sort the pointer list to servers according to slave selection criteria. * The servers that match the criteria the best are at the beginning of * the list. */ qsort(backend_ref, (size_t) router_nservers, sizeof(backend_ref_t), p); + if (master_connected && old_master) + { + /** We sorted the array so the pointer to the old master was changed and + * we need to find the old master again. */ + for (int i = 0; i < router_nservers; i++) + { + if (backend_ref[i].bref_backend->backend_server == old_master) + { + *p_master_ref = &backend_ref[i]; + } + } + } + if (MXS_LOG_PRIORITY_IS_ENABLED(LOG_INFO)) { log_server_connections(select_criteria, backend_ref, router_nservers); From b12a87ef041f5b46738dbb4f9ca375482b4e6c46 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Wed, 9 Nov 2016 08:01:02 +0200 Subject: [PATCH 28/34] Fix false error message in readwritesplit The error message for failure to find master wasn't included in the new master error logging. --- .../modules/routing/readwritesplit/readwritesplit.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index 3317f3ef7..2fd3c984b 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -1859,12 +1859,16 @@ static int routeQuery(ROUTER *instance, void *router_session, GWBUF *querybuf) * * @param rses Router session */ -static void log_master_routing_failure(ROUTER_CLIENT_SES *rses, DCB *master_dcb, - DCB *curr_master_dcb) +static void log_master_routing_failure(ROUTER_CLIENT_SES *rses, bool found, + DCB *master_dcb, DCB *curr_master_dcb) { char errmsg[MAX_SERVER_NAME_LEN * 2 + 100]; // Extra space for error message - if (master_dcb && curr_master_dcb) + if (!found) + { + sprintf(errmsg, "Could not find a valid master connection"); + } + else if (master_dcb && curr_master_dcb) { /** We found a master but it's not the same connection */ ss_dassert(master_dcb != curr_master_dcb); @@ -2338,7 +2342,7 @@ static bool route_single_stmt(ROUTER_INSTANCE *inst, ROUTER_CLIENT_SES *rses, } else { - log_master_routing_failure(rses, master_dcb, curr_master_dcb); + log_master_routing_failure(rses, succp, master_dcb, curr_master_dcb); succp = false; } From 7ef8b187b541410810fc090814de48a107d729b7 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Wed, 9 Nov 2016 13:46:02 +0200 Subject: [PATCH 29/34] Do hangups only after server states have been updated The hangup code was refactored into a common function which should only be used after the server states have been updated. This will remove erroneus connections to already failed servers. --- server/core/monitor.c | 13 +++++++++++++ server/include/monitor.h | 9 +++++++++ server/modules/monitor/galeramon.c | 9 ++------- server/modules/monitor/mmmon.c | 11 ++--------- server/modules/monitor/mysql_mon.c | 18 ++---------------- server/modules/monitor/ndbclustermon.c | 2 ++ 6 files changed, 30 insertions(+), 32 deletions(-) diff --git a/server/core/monitor.c b/server/core/monitor.c index fbfbea784..85cca4190 100644 --- a/server/core/monitor.c +++ b/server/core/monitor.c @@ -1055,3 +1055,16 @@ void mon_log_state_change(MONITOR_SERVERS *ptr) free(prev); free(next); } + +void mon_hangup_failed_servers(MONITOR *monitor) +{ + for (MONITOR_SERVERS *ptr = monitor->databases; ptr; ptr = ptr->next) + { + if (mon_status_changed(ptr) && + (!(SERVER_IS_RUNNING(ptr->server)) || + !(SERVER_IS_IN_CLUSTER(ptr->server)))) + { + dcb_hangup_foreach(ptr->server); + } + } +} diff --git a/server/include/monitor.h b/server/include/monitor.h index 97f717958..600e1365a 100644 --- a/server/include/monitor.h +++ b/server/include/monitor.h @@ -222,4 +222,13 @@ connect_result_t mon_connect_to_db(MONITOR* mon, MONITOR_SERVERS *database); void mon_log_connect_error(MONITOR_SERVERS* database, connect_result_t rval); void mon_log_state_change(MONITOR_SERVERS *ptr); +/** + * @brief Hangup connections to failed servers + * + * Injects hangup events for DCB that are connected to servers that are down. + * + * @param monitor Monitor object + */ +void mon_hangup_failed_servers(MONITOR *monitor); + #endif diff --git a/server/modules/monitor/galeramon.c b/server/modules/monitor/galeramon.c index d94994377..31aea325b 100644 --- a/server/modules/monitor/galeramon.c +++ b/server/modules/monitor/galeramon.c @@ -538,16 +538,9 @@ monitorMain(void *arg) STRSRVSTATUS(ptr->server)); } - if (!(SERVER_IS_RUNNING(ptr->server)) || - !(SERVER_IS_IN_CLUSTER(ptr->server))) - { - dcb_hangup_foreach(ptr->server); - } - if (SERVER_IS_DOWN(ptr->server)) { /** Increase this server'e error count */ - dcb_hangup_foreach(ptr->server); ptr->mon_err_count += 1; } @@ -653,6 +646,8 @@ monitorMain(void *arg) } ptr = ptr->next; } + + mon_hangup_failed_servers(mon); } } diff --git a/server/modules/monitor/mmmon.c b/server/modules/monitor/mmmon.c index 0ad8b9873..16bfb2125 100644 --- a/server/modules/monitor/mmmon.c +++ b/server/modules/monitor/mmmon.c @@ -599,15 +599,6 @@ monitorMain(void *arg) /* monitor current node */ monitorDatabase(mon, ptr); - if (mon_status_changed(ptr)) - { - if (!(SERVER_IS_RUNNING(ptr->server)) || - !(SERVER_IS_IN_CLUSTER(ptr->server))) - { - dcb_hangup_foreach(ptr->server); - } - } - if (mon_status_changed(ptr) || mon_print_fail_status(ptr)) { @@ -679,6 +670,8 @@ monitorMain(void *arg) } ptr = ptr->next; } + + mon_hangup_failed_servers(mon); } } diff --git a/server/modules/monitor/mysql_mon.c b/server/modules/monitor/mysql_mon.c index 4305505c2..1046fea10 100644 --- a/server/modules/monitor/mysql_mon.c +++ b/server/modules/monitor/mysql_mon.c @@ -860,22 +860,6 @@ monitorMain(void *arg) ptr->server->name, ptr->server->port); } - /** - * Here we say: If the server's state changed - * so that it isn't running or some other way - * lost cluster membership, call call-back function - * of every DCB for which such callback was - * registered for this kind of issue (DCB_REASON_...) - */ - if (!(SERVER_IS_RUNNING(ptr->server)) || - !(SERVER_IS_IN_CLUSTER(ptr->server))) - { - dcb_hangup_foreach(ptr->server); - } - - - - } if (mon_status_changed(ptr)) @@ -1090,6 +1074,8 @@ monitorMain(void *arg) ptr = ptr->next; } } + + mon_hangup_failed_servers(mon); } /*< while (1) */ } diff --git a/server/modules/monitor/ndbclustermon.c b/server/modules/monitor/ndbclustermon.c index b0b1cdef6..c872c686c 100644 --- a/server/modules/monitor/ndbclustermon.c +++ b/server/modules/monitor/ndbclustermon.c @@ -447,6 +447,8 @@ monitorMain(void *arg) } ptr = ptr->next; } + + mon_hangup_failed_servers(mon); } } From 8edd0d3baee8e5b0ad816777819237e66d7fab1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E9=87=91=E6=A0=8B?= Date: Wed, 2 Nov 2016 19:35:51 +0800 Subject: [PATCH 30/34] Save time when domain name contains wildcard '%' --- server/core/gw_utils.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/server/core/gw_utils.c b/server/core/gw_utils.c index ccf2a12b2..a8cbc3e7b 100644 --- a/server/core/gw_utils.c +++ b/server/core/gw_utils.c @@ -61,6 +61,12 @@ setipaddress(struct in_addr *a, char *p) hint.ai_socktype = SOCK_STREAM; + if (strchr(p, '%') != NULL) + { + MXS_INFO("Host %s contains wildcard, return", p); + return 0; + } + /* * This is for the listening socket, matching INADDR_ANY only for now. * For future specific addresses bind, a dedicated routine woulbd be better From ba4ab9d35fd204f6840811ff3b5f76943ff6c4a2 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Mon, 14 Nov 2016 14:45:18 +0200 Subject: [PATCH 31/34] Remove sorting of backends with qsort The backends are no longer sorted with qsort. This removes the possibility of stale backend references. --- .../routing/readwritesplit/readwritesplit.c | 161 ++++++++++++------ 1 file changed, 113 insertions(+), 48 deletions(-) diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index 2fd3c984b..db8461d2a 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -3035,6 +3035,73 @@ void log_server_connections(select_criteria_t select_criteria, } } +/** + * @brief Check whether it's possible to connect to this server + * + * @param bref Backend reference + * @return True if a connection to this server can be attempted + */ +static bool bref_valid_for_connect(const backend_ref_t *bref) +{ + return !BREF_HAS_FAILED(bref) && + SERVER_IS_RUNNING(bref->bref_backend->backend_server); +} + +/** + * Check whether it's possible to use this server as a slave + * + * @param bref Backend reference + * @param master_host The master server + * @return True if this server is a valid slave candidate + */ +static bool bref_valid_for_slave(const backend_ref_t *bref, const SERVER *master_host) +{ + SERVER *server = bref->bref_backend->backend_server; + + return (SERVER_IS_SLAVE(server) || SERVER_IS_RELAY_SERVER(server)) && + (master_host == NULL || (server != master_host)); +} + +/** + * @brief Find the best slave candidate + * + * This function iterates through @c bref and tries to find the best backend + * reference that is not in use. @c cmpfun will be called to compare the backends. + * + * @param bref Backend reference + * @param n Size of @c bref + * @param master The master server + * @param cmpfun qsort() compatible comparison function + * @return The best slave backend reference or NULL if no candidates could be found + */ +backend_ref_t* get_slave_candidate(backend_ref_t *bref, int n, const SERVER *master, + int (*cmpfun)(const void *, const void *)) +{ + backend_ref_t *candidate = NULL; + + for (int i = 0; i < n; i++) + { + if (!BREF_IS_IN_USE(&bref[i]) && + bref_valid_for_connect(&bref[i]) && + bref_valid_for_slave(&bref[i], master)) + { + if (candidate) + { + if (cmpfun(candidate, &bref[i]) > 0) + { + candidate = &bref[i]; + } + } + else + { + candidate = &bref[i]; + } + } + } + + return candidate; +} + /** * @brief Search suitable backend servers from those of router instance * @@ -3071,10 +3138,11 @@ static bool select_connect_backend_servers(backend_ref_t **p_master_ref, } /* get the root Master */ - BACKEND *master_host = get_root_master(backend_ref, router_nservers); + BACKEND *master_backend = get_root_master(backend_ref, router_nservers); + SERVER *master_host = master_backend ? master_backend->backend_server : NULL; if (router->rwsplit_config.rw_master_failure_mode == RW_FAIL_INSTANTLY && - (master_host == NULL || SERVER_IS_DOWN(master_host->backend_server))) + (master_host == NULL || SERVER_IS_DOWN(master_host))) { MXS_ERROR("Couldn't find suitable Master from %d candidates.", router_nservers); return false; @@ -3098,24 +3166,6 @@ static bool select_connect_backend_servers(backend_ref_t **p_master_ref, SERVER *old_master = *p_master_ref ? (*p_master_ref)->bref_backend->backend_server : NULL; - /** Sort the pointer list to servers according to slave selection criteria. - * The servers that match the criteria the best are at the beginning of - * the list. */ - qsort(backend_ref, (size_t) router_nservers, sizeof(backend_ref_t), p); - - if (master_connected && old_master) - { - /** We sorted the array so the pointer to the old master was changed and - * we need to find the old master again. */ - for (int i = 0; i < router_nservers; i++) - { - if (backend_ref[i].bref_backend->backend_server == old_master) - { - *p_master_ref = &backend_ref[i]; - } - } - } - if (MXS_LOG_PRIORITY_IS_ENABLED(LOG_INFO)) { log_server_connections(select_criteria, backend_ref, router_nservers); @@ -3126,44 +3176,59 @@ static bool select_connect_backend_servers(backend_ref_t **p_master_ref, const int min_nslaves = 0; /*< not configurable at the time */ bool succp = false; - /** - * Choose at least 1+min_nslaves (master and slave) and at most 1+max_nslaves - * servers from the sorted list. First master found is selected. - */ - for (int i = 0; i < router_nservers && - (slaves_connected < max_nslaves || !master_connected); i++) + if (!master_connected) { - SERVER *serv = backend_ref[i].bref_backend->backend_server; - - if (!BREF_HAS_FAILED(&backend_ref[i]) && SERVER_IS_RUNNING(serv)) + /** Find a master server */ + for (int i = 0; i < router_nservers; i++) { - /* check also for relay servers and don't take the master_host */ - if (slaves_found < max_nslaves && - (max_slave_rlag == MAX_RLAG_UNDEFINED || - (serv->rlag != MAX_RLAG_NOT_AVAILABLE && - serv->rlag <= max_slave_rlag)) && - (SERVER_IS_SLAVE(serv) || SERVER_IS_RELAY_SERVER(serv)) && - (master_host == NULL || (serv != master_host->backend_server))) - { - slaves_found += 1; + SERVER *serv = backend_ref[i].bref_backend->backend_server; - if (BREF_IS_IN_USE((&backend_ref[i])) || - connect_server(&backend_ref[i], session, true)) - { - slaves_connected += 1; - } - } - /* take the master_host for master */ - else if (!master_connected && master_host && serv == master_host->backend_server) + if (bref_valid_for_connect(&backend_ref[i]) && + master_host && serv == master_host) { if (connect_server(&backend_ref[i], session, false)) { *p_master_ref = &backend_ref[i]; - master_connected = true; + break; } } } - } /*< for */ + } + + /** Calculate how many connections we already have */ + for (int i = 0; i < router_nservers; i++) + { + if (bref_valid_for_connect(&backend_ref[i]) && + bref_valid_for_slave(&backend_ref[i], master_host)) + { + slaves_found += 1; + + if (BREF_IS_IN_USE(&backend_ref[i])) + { + slaves_connected += 1; + } + } + } + + ss_dassert(slaves_connected < max_nslaves); + + backend_ref_t *bref = get_slave_candidate(backend_ref, router_nservers, master_host, p); + + /** Connect to all possible slaves */ + while (bref && slaves_connected < max_nslaves) + { + if (connect_server(bref, session, true)) + { + slaves_connected += 1; + } + else + { + /** Failed to connect, mark server as failed */ + bref_set_state(bref, BREF_FATAL_FAILURE); + } + + bref = get_slave_candidate(backend_ref, router_nservers, master_host, p); + } /** * Successful cases From da129025eb25e5d56f314220aa95b060d40e5110 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Mon, 14 Nov 2016 16:14:20 +0200 Subject: [PATCH 32/34] Use common backend closing function The backend references now use a common closing function so that all variables are reset to proper states. The stored queries weren't always freed and they would leak memory if left open. --- .../routing/readwritesplit/readwritesplit.c | 91 ++++++++++--------- 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index db8461d2a..0b0f7ecd8 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -1040,6 +1040,40 @@ static void freeSession(ROUTER *router_instance, void *router_client_session) return; } +/** + * @brief Mark a backend reference as failed + * + * @param bref Backend reference to close + * @param fatal Whether the failure was fatal + */ +static void close_failed_bref(backend_ref_t *bref, bool fatal) +{ + if (BREF_IS_WAITING_RESULT(bref)) + { + bref_clear_state(bref, BREF_WAITING_RESULT); + } + + bref_clear_state(bref, BREF_QUERY_ACTIVE); + bref_clear_state(bref, BREF_IN_USE); + bref_set_state(bref, BREF_CLOSED); + + if (fatal) + { + bref_set_state(bref, BREF_FATAL_FAILURE); + } + + if (sescmd_cursor_is_active(&bref->bref_sescmd_cur)) + { + sescmd_cursor_set_active(&bref->bref_sescmd_cur, false); + } + + if (bref->bref_pending_cmd) + { + gwbuf_free(bref->bref_pending_cmd); + bref->bref_pending_cmd = NULL; + } +} + /** * Provide the router with a pointer to a suitable backend dcb. * @@ -3272,13 +3306,8 @@ static bool select_connect_backend_servers(backend_ref_t **p_master_ref, { ss_dassert(backend_ref[i].bref_backend->backend_conn_count > 0); - /** disconnect opened connections */ - if (BREF_IS_WAITING_RESULT(&backend_ref[i])) - { - bref_clear_state(&backend_ref[i], BREF_WAITING_RESULT); - } - bref_clear_state(&backend_ref[i], BREF_IN_USE); - bref_set_state(&backend_ref[i], BREF_CLOSED); + close_failed_bref(&backend_ref[i], true); + /** Decrease backend's connection counter. */ atomic_add(&backend_ref[i].bref_backend->backend_conn_count, -1); dcb_close(backend_ref[i].bref_dcb); @@ -3513,16 +3542,14 @@ static GWBUF *sescmd_cursor_process_replies(GWBUF *replybuf, /** Set response status received */ bref_clear_state(bref, BREF_WAITING_RESULT); - if (bref->reply_cmd != scmd->reply_cmd) + if (bref->reply_cmd != scmd->reply_cmd && BREF_IS_IN_USE(bref)) { MXS_ERROR("Slave server '%s': response differs from master's response. " "Closing connection due to inconsistent session state.", bref->bref_backend->backend_server->unique_name); - sescmd_cursor_set_active(scur, false); - bref_clear_state(bref, BREF_QUERY_ACTIVE); - bref_clear_state(bref, BREF_IN_USE); - bref_set_state(bref, BREF_CLOSED); - bref_set_state(bref, BREF_FATAL_FAILURE); + + close_failed_bref(bref, true); + if (bref->bref_dcb) { dcb_close(bref->bref_dcb); @@ -3560,10 +3587,9 @@ static GWBUF *sescmd_cursor_process_replies(GWBUF *replybuf, !BREF_IS_CLOSED(&ses->rses_backend_ref[i]) && BREF_IS_IN_USE(&ses->rses_backend_ref[i])) { - bref_clear_state(&ses->rses_backend_ref[i], BREF_QUERY_ACTIVE); - bref_clear_state(&ses->rses_backend_ref[i], BREF_IN_USE); - bref_set_state(&ses->rses_backend_ref[i], BREF_CLOSED); - bref_set_state(bref, BREF_FATAL_FAILURE); + + close_failed_bref(&ses->rses_backend_ref[i], true); + if (ses->rses_backend_ref[i].bref_dcb) { dcb_close(ses->rses_backend_ref[i].bref_dcb); @@ -4457,13 +4483,6 @@ static void handleError(ROUTER *instance, void *router_session, * case the safest thing to do is to close the client * connection. */ can_continue = true; - - if (bref) - { - /** Mark the master as failed so that it won't be - * taken into use if it comes back up. */ - bref_set_state(bref, BREF_FATAL_FAILURE); - } } else if (!srv->master_err_is_logged) { @@ -4478,13 +4497,7 @@ static void handleError(ROUTER *instance, void *router_session, if (bref != NULL) { CHK_BACKEND_REF(bref); - bref_clear_state(bref, BREF_IN_USE); - bref_set_state(bref, BREF_CLOSED); - - if (BREF_IS_WAITING_RESULT(bref)) - { - bref_clear_state(bref, BREF_WAITING_RESULT); - } + close_failed_bref(bref, true); } else { @@ -4551,13 +4564,7 @@ static void handle_error_reply_client(SESSION *ses, ROUTER_CLIENT_SES *rses, if (BREF_IS_IN_USE(bref)) { - bref_clear_state(bref, BREF_IN_USE); - bref_set_state(bref, BREF_CLOSED); - if (BREF_IS_WAITING_RESULT(bref)) - { - bref_clear_state(bref, BREF_WAITING_RESULT); - } - + close_failed_bref(bref, false); dcb_close(backend_dcb); } } @@ -4632,13 +4639,11 @@ static bool handle_error_new_connection(ROUTER_INSTANCE *inst, */ if (BREF_IS_WAITING_RESULT(bref)) { - DCB *client_dcb; - client_dcb = ses->client_dcb; + DCB *client_dcb = ses->client_dcb; client_dcb->func.write(client_dcb, gwbuf_clone(errmsg)); - bref_clear_state(bref, BREF_WAITING_RESULT); } - bref_clear_state(bref, BREF_IN_USE); - bref_set_state(bref, BREF_CLOSED); + + close_failed_bref(bref, false); /** * Error handler is already called for this DCB because From 7ddb4d8e5aee4bf22a5c52c50b0e117e3718f07f Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Mon, 14 Nov 2016 16:35:03 +0200 Subject: [PATCH 33/34] Treat connection_timeout as an integer for `reload_config` The connection_timeout parameter was treated as a boolean which caused errors when the configuration was reloaded. --- server/core/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/core/config.c b/server/core/config.c index 6e6b517d9..f137fadb5 100644 --- a/server/core/config.c +++ b/server/core/config.c @@ -1383,7 +1383,7 @@ process_config_update(CONFIG_CONTEXT *context) if (connection_timeout) { - serviceSetTimeout(service, config_truth_value(connection_timeout)); + serviceSetTimeout(service, atoi(connection_timeout)); } if (strlen(max_connections)) From a7c21eee88c3c2530c06fea8a20c718004eed244 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Tue, 15 Nov 2016 22:14:23 +0200 Subject: [PATCH 34/34] Always treat master failures as fatal errors When a connection to the master fails, readwritesplit should always treat it the same way. Previously, if a connection to the master was lost but it hadn't lost the master status, the failure would be treated like a slave server failure. --- server/modules/routing/readwritesplit/readwritesplit.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index 0b0f7ecd8..c5e33c0d6 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -4462,12 +4462,10 @@ static void handleError(ROUTER *instance, void *router_session, * If master has lost its Master status error can't be * handled so that session could continue. */ - if (rses->rses_master_ref && rses->rses_master_ref->bref_dcb == problem_dcb && - !SERVER_IS_MASTER(rses->rses_master_ref->bref_backend->backend_server)) + if (rses->rses_master_ref && rses->rses_master_ref->bref_dcb == problem_dcb) { SERVER *srv = rses->rses_master_ref->bref_backend->backend_server; - backend_ref_t *bref; - bref = get_bref_from_dcb(rses, problem_dcb); + backend_ref_t *bref = get_bref_from_dcb(rses, problem_dcb); bool can_continue = false; if (rses->rses_config.rw_master_failure_mode != RW_FAIL_INSTANTLY && @@ -4484,7 +4482,7 @@ static void handleError(ROUTER *instance, void *router_session, * connection. */ can_continue = true; } - else if (!srv->master_err_is_logged) + else if (!SERVER_IS_MASTER(srv) && !srv->master_err_is_logged) { MXS_ERROR("Server %s:%d lost the master status. Readwritesplit " "service can't locate the master. Client sessions "