From 933ce0c86a87b42e267ffacdaa0a8b3f5cccf495 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Tue, 13 Oct 2020 14:31:49 +0300 Subject: [PATCH] MXS-3220: Fix crash on failed master history replay This could happen if a session command triggers a master reconnection and the connection fails while the history replay is ongoing. The code assumed that history replay would only happen when a query was in the query queue. --- .../routing/readwritesplit/rwsplitsession.cc | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/server/modules/routing/readwritesplit/rwsplitsession.cc b/server/modules/routing/readwritesplit/rwsplitsession.cc index 63d57f531..2b5d7200d 100644 --- a/server/modules/routing/readwritesplit/rwsplitsession.cc +++ b/server/modules/routing/readwritesplit/rwsplitsession.cc @@ -996,11 +996,11 @@ bool RWSplitSession::retry_master_query(RWBackend* backend) { bool can_continue = false; - if (backend->is_replaying_history()) + if (backend->is_replaying_history() && !m_query_queue.empty()) { - // Master failed while it was replaying the session command history + // Master failed while it was replaying the session command history while a query was queued for + // execution. Re-execute it to trigger a reconnection. mxb_assert(m_config.master_reconnection); - mxb_assert(!m_query_queue.empty()); retry_query(m_query_queue.front().release()); m_query_queue.pop_front(); @@ -1012,13 +1012,14 @@ bool RWSplitSession::retry_master_query(RWBackend* backend) // was expected failed: try to route the session command again. If the master is not available, // the response will be returned from one of the slaves if the configuration allows it. - mxb_assert(backend->next_session_command()->get_position() == m_recv_sescmd + 1); + mxb_assert_message(backend->next_session_command()->get_position() == m_recv_sescmd + 1 + || backend->is_replaying_history(), + "The master should be executing the latest session command " + "or attempting to replay existing history."); mxb_assert(m_qc.current_route_info().target() == TARGET_ALL); mxb_assert(!m_current_query.get()); mxb_assert(!m_sescmd_list.empty()); mxb_assert(m_sescmd_count >= 2); - MXS_INFO("Retrying session command due to master failure: %s", - backend->next_session_command()->to_string().c_str()); // MXS-2609: Maxscale crash in RWSplitSession::retry_master_query() // To prevent a crash from happening, we make sure the session command list is not empty before @@ -1030,12 +1031,17 @@ bool RWSplitSession::retry_master_query(RWBackend* backend) return false; } + MXS_INFO("Retrying session command due to master failure: %s", + m_sescmd_list.back()->to_string().c_str()); + + GWBUF* buffer = m_sescmd_list.back()->deep_copy_buffer(); + // Before routing it, pop the failed session command off the list and decrement the number of // executed session commands. This "overwrites" the existing command and prevents history duplication. m_sescmd_list.pop_back(); --m_sescmd_count; - retry_query(backend->next_session_command()->deep_copy_buffer()); + retry_query(buffer); can_continue = true; } else if (m_current_query.get())