MXS-2609: Fix session command mixup on master failure
If a master failed during an ongoing session command history replay, it would be treated as if a normal session command failed which would result in the already executed session command being re-executed on all servers at the wrong logical position. To fix this, the history replay must be distinguished from normal session command execution. When a connection replaying the history fails, the query routing simply needs to be attempted again.
This commit is contained in:
@ -879,17 +879,40 @@ bool RWSplitSession::retry_master_query(SRWBackend& backend)
|
|||||||
{
|
{
|
||||||
bool can_continue = false;
|
bool can_continue = false;
|
||||||
|
|
||||||
if (backend->has_session_commands())
|
if (backend->is_replaying_history())
|
||||||
{
|
{
|
||||||
// Try to route the session command again. If the master is not available, the response will be
|
// Master failed while it was replaying the session command history
|
||||||
// returned from one of the slaves.
|
mxb_assert(m_config.master_reconnection);
|
||||||
|
mxb_assert(!m_query_queue.empty());
|
||||||
|
|
||||||
|
retry_query(m_query_queue.front().release());
|
||||||
|
m_query_queue.pop_front();
|
||||||
|
can_continue = true;
|
||||||
|
}
|
||||||
|
else if (backend->has_session_commands())
|
||||||
|
{
|
||||||
|
// We were routing a session command to all servers but the master server from which the response
|
||||||
|
// was expected failed: try to route the session command again. If the master is not available,
|
||||||
|
// the response will be returned from one of the slaves if the configuration allows it.
|
||||||
|
|
||||||
|
mxb_assert(backend->next_session_command()->get_position() == m_recv_sescmd + 1);
|
||||||
|
mxb_assert(m_qc.current_route_info().target() == TARGET_ALL);
|
||||||
mxb_assert(!m_current_query.get());
|
mxb_assert(!m_current_query.get());
|
||||||
mxb_assert(!m_sescmd_list.empty());
|
mxb_assert(!m_sescmd_list.empty());
|
||||||
mxb_assert(m_sescmd_count >= 2);
|
mxb_assert(m_sescmd_count >= 2);
|
||||||
MXS_INFO("Retrying session command due to master failure: %s",
|
MXS_INFO("Retrying session command due to master failure: %s",
|
||||||
backend->next_session_command()->to_string().c_str());
|
backend->next_session_command()->to_string().c_str());
|
||||||
|
|
||||||
|
// MXS-2609: Maxscale crash in RWSplitSession::retry_master_query()
|
||||||
|
// To prevent a crash from happening, we make sure the session command list is not empty before
|
||||||
|
// we touch it. This should be converted into a debug assertion once the true root cause of the
|
||||||
|
// problem is found.
|
||||||
|
if (m_sescmd_count < 2 || m_sescmd_list.empty())
|
||||||
|
{
|
||||||
|
MXS_WARNING("Session command list was empty when it should not be");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Before routing it, pop the failed session command off the list and decrement the number of
|
// Before routing it, pop the failed session command off the list and decrement the number of
|
||||||
// executed session commands. This "overwrites" the existing command and prevents history duplication.
|
// executed session commands. This "overwrites" the existing command and prevents history duplication.
|
||||||
m_sescmd_list.pop_back();
|
m_sescmd_list.pop_back();
|
||||||
@ -900,6 +923,8 @@ bool RWSplitSession::retry_master_query(SRWBackend& backend)
|
|||||||
}
|
}
|
||||||
else if (m_current_query.get())
|
else if (m_current_query.get())
|
||||||
{
|
{
|
||||||
|
// A query was in progress, try to route it again
|
||||||
|
mxb_assert(m_prev_target == backend);
|
||||||
retry_query(m_current_query.release());
|
retry_query(m_current_query.release());
|
||||||
can_continue = true;
|
can_continue = true;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user