cherrypick to 3.1_opensource_release

This commit is contained in:
handora
2021-08-11 15:33:27 +08:00
committed by wangzelin.wzl
parent a6d5591ddf
commit 4a58022b57
18 changed files with 249 additions and 46 deletions

View File

@ -844,8 +844,15 @@ int ObMemtableCtx::trans_replay_end(const bool commit, const int64_t trans_versi
int ret = OB_SUCCESS;
int cs_ret = OB_SUCCESS;
if (commit && 0 != checksum && !ObServerConfig::get_instance().ignore_replay_checksum_error) {
const uint64_t checksum4 = calc_checksum4();
// We must calculate the checksum and generate the checksum_log_ts even when
// the checksum verification is unnecessary. This because the trans table
// merge may be triggered after clear state in which the callback has already
const uint64_t checksum4 = calc_checksum4();
if (commit
&& 0 != checksum
&& GET_MIN_CLUSTER_VERSION() >= CLUSTER_VERSION_3100
&& !ObServerConfig::get_instance().ignore_replay_checksum_error) {
if (checksum != checksum4) {
cs_ret = OB_CHECKSUM_ERROR;
TRANS_LOG(ERROR, "MT_CTX: replay checksum error", K(ret), K(*this), K(commit), K(checksum), K(checksum4));
@ -1572,6 +1579,49 @@ int ObMemtableCtx::remove_callback_for_uncommited_txn(ObMemtable* mt, int64_t& c
return ret;
}
// If leader switches to follower and marks some callbacks which have not been
// submmitted to sliding window. We need clean the dirty callbacks because
// follower can not submit the log and decrement the pending_cb_count_ and the
// mini merge will not be allowed before txn finishes. Which add the dependency
// from minor merge to txn termination.
//
// NB: Because we remove the trans node before txn termination, we should guarantee
// the txn should finally abort. Except the case 3.2(TODO: handora.qc): generalize
// the process of clean dirty callbacks
//
// We solve the problem in the following way:
// - When leader is revoking and no non-2pc logs of txn has already been
// submitted to sliding window and no on-the-fly log:
// - Case 1: We rollback the txn immediately, so no dirty callbacks need to be
// removed
// - When leader is revoking and some non-2pc logs of txn has already been
// submitted to sliding window:
// - Case 2.1: We only solve the case with no on-the-fly logs(because we have no idea
// whether the on-the-fly log is paxos-choosen or not)
// - If the state is not synced successfully(txn need abort), so we remove all
// marked trans node
// - When replaying start working:
// - Case 3.1: If txn has a on-the-fly log, it means some logs are not paxos-choosen
// successfully(txn need abort), so we remove all marked trans node
// - Case 3.2: If txn has no on-the-fly log and no trans state is synced by the leader
// transfer(txn may need abort, while we donot have the information whether the
// original leader successfully synced the log), and we also remove all marked trans node.
//
int ObMemtableCtx::clean_dirty_callbacks()
{
int ret = OB_SUCCESS;
ObByteLockGuard guard(lock_);
if (OB_FAIL(trans_mgr_.clean_dirty_callbacks())) {
TRANS_LOG(WARN, "fail to dirty callbacks", K(ret));
} else {
TRANS_LOG(INFO, "clean dirty callbacks successfully", K(*this));
}
return ret;
}
int ObMemtableCtx::mark_frozen_data(
const ObMemtable* const frozen_memtable, const ObMemtable* const active_memtable, bool& marked, int64_t& cb_cnt)
{