MXS-2456: Cap transaction replay attempts

In most cases it is reasonable to stop attempting transaction replays
after a certain number of failed attempts. This prevents transactions from
being repeatedly replayed on the same server over and over again if, for
example, it keeps crashing.
This commit is contained in:
Markus Mäkelä
2019-04-30 13:45:48 +03:00
parent bc654849e8
commit 26b2897280
4 changed files with 26 additions and 20 deletions

View File

@ -504,25 +504,26 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE()
MXS_MODULE_OPT_NONE,
master_failure_mode_values
},
{"max_slave_replication_lag", MXS_MODULE_PARAM_DURATION, "0s", MXS_MODULE_OPT_DURATION_S },
{"max_slave_connections", MXS_MODULE_PARAM_STRING, MAX_SLAVE_COUNT},
{"retry_failed_reads", MXS_MODULE_PARAM_BOOL, "true" },
{"prune_sescmd_history", MXS_MODULE_PARAM_BOOL, "false" },
{"disable_sescmd_history", MXS_MODULE_PARAM_BOOL, "false" },
{"max_sescmd_history", MXS_MODULE_PARAM_COUNT, "50" },
{"strict_multi_stmt", MXS_MODULE_PARAM_BOOL, "false" },
{"strict_sp_calls", MXS_MODULE_PARAM_BOOL, "false" },
{"master_accept_reads", MXS_MODULE_PARAM_BOOL, "false" },
{"connection_keepalive", MXS_MODULE_PARAM_DURATION, "300s", MXS_MODULE_OPT_DURATION_S },
{"causal_reads", MXS_MODULE_PARAM_BOOL, "false" },
{"causal_reads_timeout", MXS_MODULE_PARAM_DURATION, "10s", MXS_MODULE_OPT_DURATION_S },
{"master_reconnection", MXS_MODULE_PARAM_BOOL, "false" },
{"delayed_retry", MXS_MODULE_PARAM_BOOL, "false" },
{"delayed_retry_timeout", MXS_MODULE_PARAM_DURATION, "10s", MXS_MODULE_OPT_DURATION_S },
{"transaction_replay", MXS_MODULE_PARAM_BOOL, "false" },
{"transaction_replay_max_size",MXS_MODULE_PARAM_SIZE, "1Mi" },
{"optimistic_trx", MXS_MODULE_PARAM_BOOL, "false" },
{"lazy_connect", MXS_MODULE_PARAM_BOOL, "false" },
{"max_slave_replication_lag", MXS_MODULE_PARAM_DURATION, "0s", MXS_MODULE_OPT_DURATION_S},
{"max_slave_connections", MXS_MODULE_PARAM_STRING, MAX_SLAVE_COUNT},
{"retry_failed_reads", MXS_MODULE_PARAM_BOOL, "true"},
{"prune_sescmd_history", MXS_MODULE_PARAM_BOOL, "false"},
{"disable_sescmd_history", MXS_MODULE_PARAM_BOOL, "false"},
{"max_sescmd_history", MXS_MODULE_PARAM_COUNT, "50"},
{"strict_multi_stmt", MXS_MODULE_PARAM_BOOL, "false"},
{"strict_sp_calls", MXS_MODULE_PARAM_BOOL, "false"},
{"master_accept_reads", MXS_MODULE_PARAM_BOOL, "false"},
{"connection_keepalive", MXS_MODULE_PARAM_DURATION, "300s", MXS_MODULE_OPT_DURATION_S},
{"causal_reads", MXS_MODULE_PARAM_BOOL, "false"},
{"causal_reads_timeout", MXS_MODULE_PARAM_DURATION, "10s", MXS_MODULE_OPT_DURATION_S},
{"master_reconnection", MXS_MODULE_PARAM_BOOL, "false"},
{"delayed_retry", MXS_MODULE_PARAM_BOOL, "false"},
{"delayed_retry_timeout", MXS_MODULE_PARAM_DURATION, "10s", MXS_MODULE_OPT_DURATION_S},
{"transaction_replay", MXS_MODULE_PARAM_BOOL, "false"},
{"transaction_replay_max_size",MXS_MODULE_PARAM_SIZE, "1Mi"},
{"transaction_replay_attempts",MXS_MODULE_PARAM_COUNT, "5"},
{"optimistic_trx", MXS_MODULE_PARAM_BOOL, "false"},
{"lazy_connect", MXS_MODULE_PARAM_BOOL, "false"},
{MXS_END_MODULE_PARAMS}
}
};

View File

@ -160,6 +160,7 @@ struct Config
, delayed_retry_timeout(params->get_duration<seconds>("delayed_retry_timeout").count())
, transaction_replay(params->get_bool("transaction_replay"))
, trx_max_size(params->get_size("transaction_replay_max_size"))
, trx_max_attempts(params->get_integer("transaction_replay_attempts"))
, optimistic_trx(params->get_bool("optimistic_trx"))
, lazy_connect(params->get_bool("lazy_connect"))
{
@ -223,6 +224,7 @@ struct Config
uint64_t delayed_retry_timeout; /**< How long to delay until an error is returned */
bool transaction_replay; /**< Replay failed transactions */
size_t trx_max_size; /**< Max transaction size for replaying */
int64_t trx_max_attempts; /**< Maximum number of transaction replay attempts */
bool optimistic_trx; /**< Enable optimistic transactions */
bool lazy_connect; /**< Create connections only when needed */
};

View File

@ -406,6 +406,7 @@ void RWSplitSession::trx_replay_next_stmt()
// No more statements to execute
m_is_replay_active = false;
mxb::atomic::add(&m_router->stats().n_trx_replay, 1, mxb::atomic::RELAXED);
m_num_trx_replays = 0;
if (!m_replayed_trx.empty())
{
@ -882,7 +883,7 @@ bool RWSplitSession::start_trx_replay()
{
bool rval = false;
if (m_config.transaction_replay && m_can_replay_trx)
if (m_config.transaction_replay && m_can_replay_trx && m_num_trx_replays < m_config.trx_max_attempts)
{
if (!m_is_replay_active)
{
@ -950,6 +951,7 @@ bool RWSplitSession::start_trx_replay()
"transaction had no statements and no query was interrupted");
}
++m_num_trx_replays;
rval = true;
}

View File

@ -343,6 +343,7 @@ private:
mxs::Buffer m_interrupted_query; /**< Query that was interrupted mid-transaction. */
Trx m_orig_trx; /**< The backup of the transaction we're replaying */
mxs::Buffer m_orig_stmt; /**< The backup of the statement that was interrupted */
int64_t m_num_trx_replays = 0; /**< How many times trx replay has been attempted */
otrx_state m_otrx_state = OTRX_INACTIVE; /**< Optimistic trx state*/