MXS-2456: Cap transaction replay attempts
In most cases it is reasonable to stop attempting transaction replays after a certain number of failed attempts. This prevents transactions from being repeatedly replayed on the same server over and over again if, for example, it keeps crashing.
This commit is contained in:
@ -504,25 +504,26 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE()
|
||||
MXS_MODULE_OPT_NONE,
|
||||
master_failure_mode_values
|
||||
},
|
||||
{"max_slave_replication_lag", MXS_MODULE_PARAM_DURATION, "0s", MXS_MODULE_OPT_DURATION_S },
|
||||
{"max_slave_connections", MXS_MODULE_PARAM_STRING, MAX_SLAVE_COUNT},
|
||||
{"retry_failed_reads", MXS_MODULE_PARAM_BOOL, "true" },
|
||||
{"prune_sescmd_history", MXS_MODULE_PARAM_BOOL, "false" },
|
||||
{"disable_sescmd_history", MXS_MODULE_PARAM_BOOL, "false" },
|
||||
{"max_sescmd_history", MXS_MODULE_PARAM_COUNT, "50" },
|
||||
{"strict_multi_stmt", MXS_MODULE_PARAM_BOOL, "false" },
|
||||
{"strict_sp_calls", MXS_MODULE_PARAM_BOOL, "false" },
|
||||
{"master_accept_reads", MXS_MODULE_PARAM_BOOL, "false" },
|
||||
{"connection_keepalive", MXS_MODULE_PARAM_DURATION, "300s", MXS_MODULE_OPT_DURATION_S },
|
||||
{"causal_reads", MXS_MODULE_PARAM_BOOL, "false" },
|
||||
{"causal_reads_timeout", MXS_MODULE_PARAM_DURATION, "10s", MXS_MODULE_OPT_DURATION_S },
|
||||
{"master_reconnection", MXS_MODULE_PARAM_BOOL, "false" },
|
||||
{"delayed_retry", MXS_MODULE_PARAM_BOOL, "false" },
|
||||
{"delayed_retry_timeout", MXS_MODULE_PARAM_DURATION, "10s", MXS_MODULE_OPT_DURATION_S },
|
||||
{"transaction_replay", MXS_MODULE_PARAM_BOOL, "false" },
|
||||
{"transaction_replay_max_size",MXS_MODULE_PARAM_SIZE, "1Mi" },
|
||||
{"optimistic_trx", MXS_MODULE_PARAM_BOOL, "false" },
|
||||
{"lazy_connect", MXS_MODULE_PARAM_BOOL, "false" },
|
||||
{"max_slave_replication_lag", MXS_MODULE_PARAM_DURATION, "0s", MXS_MODULE_OPT_DURATION_S},
|
||||
{"max_slave_connections", MXS_MODULE_PARAM_STRING, MAX_SLAVE_COUNT},
|
||||
{"retry_failed_reads", MXS_MODULE_PARAM_BOOL, "true"},
|
||||
{"prune_sescmd_history", MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{"disable_sescmd_history", MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{"max_sescmd_history", MXS_MODULE_PARAM_COUNT, "50"},
|
||||
{"strict_multi_stmt", MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{"strict_sp_calls", MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{"master_accept_reads", MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{"connection_keepalive", MXS_MODULE_PARAM_DURATION, "300s", MXS_MODULE_OPT_DURATION_S},
|
||||
{"causal_reads", MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{"causal_reads_timeout", MXS_MODULE_PARAM_DURATION, "10s", MXS_MODULE_OPT_DURATION_S},
|
||||
{"master_reconnection", MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{"delayed_retry", MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{"delayed_retry_timeout", MXS_MODULE_PARAM_DURATION, "10s", MXS_MODULE_OPT_DURATION_S},
|
||||
{"transaction_replay", MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{"transaction_replay_max_size",MXS_MODULE_PARAM_SIZE, "1Mi"},
|
||||
{"transaction_replay_attempts",MXS_MODULE_PARAM_COUNT, "5"},
|
||||
{"optimistic_trx", MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{"lazy_connect", MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{MXS_END_MODULE_PARAMS}
|
||||
}
|
||||
};
|
||||
|
@ -160,6 +160,7 @@ struct Config
|
||||
, delayed_retry_timeout(params->get_duration<seconds>("delayed_retry_timeout").count())
|
||||
, transaction_replay(params->get_bool("transaction_replay"))
|
||||
, trx_max_size(params->get_size("transaction_replay_max_size"))
|
||||
, trx_max_attempts(params->get_integer("transaction_replay_attempts"))
|
||||
, optimistic_trx(params->get_bool("optimistic_trx"))
|
||||
, lazy_connect(params->get_bool("lazy_connect"))
|
||||
{
|
||||
@ -223,6 +224,7 @@ struct Config
|
||||
uint64_t delayed_retry_timeout; /**< How long to delay until an error is returned */
|
||||
bool transaction_replay; /**< Replay failed transactions */
|
||||
size_t trx_max_size; /**< Max transaction size for replaying */
|
||||
int64_t trx_max_attempts; /**< Maximum number of transaction replay attempts */
|
||||
bool optimistic_trx; /**< Enable optimistic transactions */
|
||||
bool lazy_connect; /**< Create connections only when needed */
|
||||
};
|
||||
|
@ -406,6 +406,7 @@ void RWSplitSession::trx_replay_next_stmt()
|
||||
// No more statements to execute
|
||||
m_is_replay_active = false;
|
||||
mxb::atomic::add(&m_router->stats().n_trx_replay, 1, mxb::atomic::RELAXED);
|
||||
m_num_trx_replays = 0;
|
||||
|
||||
if (!m_replayed_trx.empty())
|
||||
{
|
||||
@ -882,7 +883,7 @@ bool RWSplitSession::start_trx_replay()
|
||||
{
|
||||
bool rval = false;
|
||||
|
||||
if (m_config.transaction_replay && m_can_replay_trx)
|
||||
if (m_config.transaction_replay && m_can_replay_trx && m_num_trx_replays < m_config.trx_max_attempts)
|
||||
{
|
||||
if (!m_is_replay_active)
|
||||
{
|
||||
@ -950,6 +951,7 @@ bool RWSplitSession::start_trx_replay()
|
||||
"transaction had no statements and no query was interrupted");
|
||||
}
|
||||
|
||||
++m_num_trx_replays;
|
||||
rval = true;
|
||||
}
|
||||
|
||||
|
@ -343,6 +343,7 @@ private:
|
||||
mxs::Buffer m_interrupted_query; /**< Query that was interrupted mid-transaction. */
|
||||
Trx m_orig_trx; /**< The backup of the transaction we're replaying */
|
||||
mxs::Buffer m_orig_stmt; /**< The backup of the statement that was interrupted */
|
||||
int64_t m_num_trx_replays = 0; /**< How many times trx replay has been attempted */
|
||||
|
||||
otrx_state m_otrx_state = OTRX_INACTIVE; /**< Optimistic trx state*/
|
||||
|
||||
|
Reference in New Issue
Block a user