From 968d43799fa90ab72ae25ed104d04c28ff84a8fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Wed, 4 Nov 2020 12:24:55 +0200 Subject: [PATCH] MXS-3273: Ignore unexpected responses Unlike readwritesplit, schemarouter will process all responses from backends as if they are expected. There are cases where errors are generated that aren't sent as a response to a query. These queries must be ignored and not routed to the client. Copying the code as-is from readwritesplit isn't the cleanest solution but it avoids refactoring code in a patch release. The custom error number (2003) used by the backend protocol code was not an actual error number that the server would send. The error code in question was for an error that only the C connector returns: CR_CONN_HOST_ERROR. Using ER_CONNECTION_KILLED as the error number better conveys the fact that the connection was killed due to a reason not related to any ongoing query. By using a known error number that is correctly handled, we also avoid writing errors to the client in the middle of a resultset or as the initial response to a result. This explains why the problem described in MXS-3267 happened in the first place: an unrelated connection was lost in the middle of a resultset and the error was interpreted as the end of a resultset. As a result of there being more data to be read, the unexpected result state messages were logged. --- server/modules/protocol/MySQL/mysql_common.cc | 2 +- .../schemarouter/schemaroutersession.cc | 88 +++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/server/modules/protocol/MySQL/mysql_common.cc b/server/modules/protocol/MySQL/mysql_common.cc index ca2cc8c8f..c887be2f9 100644 --- a/server/modules/protocol/MySQL/mysql_common.cc +++ b/server/modules/protocol/MySQL/mysql_common.cc @@ -207,7 +207,7 @@ GWBUF* mysql_create_custom_error(int packet_number, mysql_state = "HY000"; field_count = 0xff; - gw_mysql_set_byte2(mysql_err, /* mysql_errno */ 2003); + gw_mysql_set_byte2(mysql_err, ER_CONNECTION_KILLED); mysql_statemsg[0] = '#'; memcpy(mysql_statemsg + 1, mysql_state, 5); diff --git a/server/modules/routing/schemarouter/schemaroutersession.cc b/server/modules/routing/schemarouter/schemaroutersession.cc index b085d5809..fa45b06a2 100644 --- a/server/modules/routing/schemarouter/schemaroutersession.cc +++ b/server/modules/routing/schemarouter/schemaroutersession.cc @@ -539,6 +539,55 @@ void SchemaRouterSession::process_sescmd_response(SSRBackend& bref, GWBUF** ppPa } } +namespace +{ +static bool connection_was_killed(GWBUF* buffer) +{ + bool rval = false; + + if (mxs_mysql_is_err_packet(buffer)) + { + uint8_t buf[2]; + // First two bytes after the 0xff byte are the error code + gwbuf_copy_data(buffer, MYSQL_HEADER_LEN + 1, 2, buf); + uint16_t errcode = gw_mysql_get_byte2(buf); + rval = errcode == ER_CONNECTION_KILLED; + } + + return rval; +} + +bool server_is_shutting_down(GWBUF* writebuf) +{ + uint64_t err = mxs_mysql_get_mysql_errno(writebuf); + return err == ER_SERVER_SHUTDOWN || err == ER_NORMAL_SHUTDOWN || err == ER_SHUTDOWN_COMPLETE; +} + +mxs::Buffer::iterator skip_packet(mxs::Buffer::iterator it) +{ + uint32_t len = *it++; + len |= (*it++) << 8; + len |= (*it++) << 16; + it.advance(len + 1); // Payload length plus the fourth header byte (packet sequence) + return it; +} + +GWBUF* erase_last_packet(GWBUF* input) +{ + mxs::Buffer buf(input); + auto it = buf.begin(); + auto end = it; + + while ((end = skip_packet(it)) != buf.end()) + { + it = end; + } + + buf.erase(it, end); + return buf.release(); +} +} + void SchemaRouterSession::clientReply(GWBUF* pPacket, DCB* pDcb) { SSRBackend bref = get_bref_from_dcb(pDcb); @@ -549,8 +598,47 @@ void SchemaRouterSession::clientReply(GWBUF* pPacket, DCB* pDcb) return; } + if (bref->get_reply_state() == REPLY_STATE_DONE + && !connection_was_killed(pPacket) + && !server_is_shutting_down(pPacket)) + { + /** If we receive an unexpected response from the server, the internal + * logic cannot handle this situation. Routing the reply straight to + * the client should be the safest thing to do at this point. */ + MXS_SESSION_ROUTE_REPLY(pDcb->session, pPacket); + return; + } + bref->process_reply(pPacket); + const RWBackend::Error& error = bref->error(); + + if (error.is_unexpected_error()) + { + // The connection was killed, we can safely ignore it. When the TCP connection is + // closed, the router's error handling will sort it out. + if (error.code() == ER_CONNECTION_KILLED) + { + bref->set_close_reason("Connection was killed"); + } + else + { + mxb_assert(error.code() == ER_SERVER_SHUTDOWN + || error.code() == ER_NORMAL_SHUTDOWN + || error.code() == ER_SHUTDOWN_COMPLETE); + bref->set_close_reason(std::string("Server '") + bref->name() + "' is shutting down"); + } + + // The server sent an error that we didn't expect: treat it as if the connection was closed. The + // client shouldn't see this error as we can replace the closed connection. + + if (!(pPacket = erase_last_packet(pPacket))) + { + // Nothing to route to the client + return; + } + } + if (m_state & INIT_MAPPING) { handle_mapping_reply(bref, &pPacket);