MXS-3220: Fix crash on failed master history replay

This could happen if a session command triggers a master reconnection and
the connection fails while the history replay is ongoing. The code assumed
that history replay would only happen when a query was in the query queue.
This commit is contained in:
Markus Mäkelä 2020-10-13 14:31:49 +03:00
parent 96840a18be
commit 933ce0c86a
No known key found for this signature in database
GPG Key ID: 5CE746D557ACC499

View File

@ -996,11 +996,11 @@ bool RWSplitSession::retry_master_query(RWBackend* backend)
{
bool can_continue = false;
if (backend->is_replaying_history())
if (backend->is_replaying_history() && !m_query_queue.empty())
{
// Master failed while it was replaying the session command history
// Master failed while it was replaying the session command history while a query was queued for
// execution. Re-execute it to trigger a reconnection.
mxb_assert(m_config.master_reconnection);
mxb_assert(!m_query_queue.empty());
retry_query(m_query_queue.front().release());
m_query_queue.pop_front();
@ -1012,13 +1012,14 @@ bool RWSplitSession::retry_master_query(RWBackend* backend)
// was expected failed: try to route the session command again. If the master is not available,
// the response will be returned from one of the slaves if the configuration allows it.
mxb_assert(backend->next_session_command()->get_position() == m_recv_sescmd + 1);
mxb_assert_message(backend->next_session_command()->get_position() == m_recv_sescmd + 1
|| backend->is_replaying_history(),
"The master should be executing the latest session command "
"or attempting to replay existing history.");
mxb_assert(m_qc.current_route_info().target() == TARGET_ALL);
mxb_assert(!m_current_query.get());
mxb_assert(!m_sescmd_list.empty());
mxb_assert(m_sescmd_count >= 2);
MXS_INFO("Retrying session command due to master failure: %s",
backend->next_session_command()->to_string().c_str());
// MXS-2609: Maxscale crash in RWSplitSession::retry_master_query()
// To prevent a crash from happening, we make sure the session command list is not empty before
@ -1030,12 +1031,17 @@ bool RWSplitSession::retry_master_query(RWBackend* backend)
return false;
}
MXS_INFO("Retrying session command due to master failure: %s",
m_sescmd_list.back()->to_string().c_str());
GWBUF* buffer = m_sescmd_list.back()->deep_copy_buffer();
// Before routing it, pop the failed session command off the list and decrement the number of
// executed session commands. This "overwrites" the existing command and prevents history duplication.
m_sescmd_list.pop_back();
--m_sescmd_count;
retry_query(backend->next_session_command()->deep_copy_buffer());
retry_query(buffer);
can_continue = true;
}
else if (m_current_query.get())