From 220fea3546d98ab85cc09f607986a4390e367c85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Fri, 31 May 2019 09:01:39 +0300 Subject: [PATCH] MXS-2464: Retry failed session commands If the execution of a session command fails on a master, it is retried again. If the master is not available, the response will be returned from one of the slaves. --- .../routing/readwritesplit/rwsplitsession.cc | 44 +++++++++++++++++-- .../routing/readwritesplit/rwsplitsession.hh | 1 + 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/server/modules/routing/readwritesplit/rwsplitsession.cc b/server/modules/routing/readwritesplit/rwsplitsession.cc index 9acf51944..875a8d63c 100644 --- a/server/modules/routing/readwritesplit/rwsplitsession.cc +++ b/server/modules/routing/readwritesplit/rwsplitsession.cc @@ -747,6 +747,7 @@ void RWSplitSession::clientReply(GWBUF* writebuf, DCB* backend_dcb) // Backend is still in use and has more session commands to execute if (backend->execute_session_command() && backend->is_waiting_result()) { + MXS_INFO("%lu session commands left on '%s'", backend->session_command_count(), backend->name()); m_expected_responses++; } } @@ -881,6 +882,44 @@ bool RWSplitSession::start_trx_replay() return rval; } +bool RWSplitSession::retry_master_query(SRWBackend& backend) +{ + bool can_continue = false; + + if (backend->has_session_commands()) + { + // Try to route the session command again. If the master is not available, the response will be + // returned from one of the slaves. + + mxb_assert(!m_current_query.get()); + mxb_assert(!m_sescmd_list.empty()); + mxb_assert(m_sescmd_count >= 2); + MXS_INFO("Retrying session command due to master failure: %s", + backend->next_session_command()->to_string().c_str()); + + // Before routing it, pop the failed session command off the list and decrement the number of + // executed session commands. This "overwrites" the existing command and prevents history duplication. + m_sescmd_list.pop_back(); + --m_sescmd_count; + + retry_query(backend->next_session_command()->deep_copy_buffer()); + can_continue = true; + } + else if (m_current_query.get()) + { + retry_query(m_current_query.release()); + can_continue = true; + } + else + { + // This should never happen + mxb_assert_message(!true, "m_current_query is empty and no session commands being executed"); + MXS_ERROR("Current query unexpectedly empty when trying to retry query on master"); + } + + return can_continue; +} + /** * @brief Router error handling routine * @@ -945,10 +984,9 @@ void RWSplitSession::handleError(GWBUF* errmsgbuf, m_expected_responses--; errmsg += " Lost connection to master server while waiting for a result."; - if (m_current_query.get() && can_retry_query()) + if (can_retry_query()) { - can_continue = true; - retry_query(m_current_query.release()); + can_continue = retry_master_query(backend); } else if (m_config.master_failure_mode == RW_ERROR_ON_WRITE) { diff --git a/server/modules/routing/readwritesplit/rwsplitsession.hh b/server/modules/routing/readwritesplit/rwsplitsession.hh index 6860ef0d7..9da56dcf2 100644 --- a/server/modules/routing/readwritesplit/rwsplitsession.hh +++ b/server/modules/routing/readwritesplit/rwsplitsession.hh @@ -225,6 +225,7 @@ private: int get_max_replication_lag(); mxs::SRWBackend& get_backend_from_dcb(DCB* dcb); + bool retry_master_query(mxs::SRWBackend& backend); void handle_error_reply_client(DCB* backend_dcb, GWBUF* errmsg); bool handle_error_new_connection(DCB* backend_dcb, GWBUF* errmsg); void manage_transactions(mxs::SRWBackend& backend, GWBUF* writebuf);