From aba3d456da43439c7a35bf4514a34268c98e4c9a Mon Sep 17 00:00:00 2001 From: obdev Date: Fri, 17 May 2024 13:16:16 +0000 Subject: [PATCH] [FEAT MERGE] remove retain_ctx use tx_data to hold mds op and optimise transfer move tx with check tx is needed --- deps/oblib/src/lib/lock/ob_spin_rwlock.h | 31 + mittest/CMakeLists.txt | 4 +- mittest/env/ob_simple_server_helper.cpp | 51 ++ mittest/env/ob_simple_server_helper.h | 2 + mittest/mtlenv/storage/test_memtable_v2.cpp | 5 +- .../test_multi_version_sstable_merge.cpp | 4 + mittest/mtlenv/test_tx_data_table.cpp | 28 +- mittest/simple_server/CMakeLists.txt | 2 + .../env/ob_simple_cluster_test_base.cpp | 6 +- .../rewrite_function_for_test_big_tx_data.cpp | 19 +- mittest/simple_server/test_big_tx_data.cpp | 2 +- mittest/simple_server/test_transfer_tx.cpp | 6 +- .../simple_server/test_tx_ctx_table_mit.cpp | 2 +- mittest/simple_server/test_tx_data.cpp | 373 +++++++++++ .../virtual_table/ob_all_virtual_tx_data.cpp | 6 +- .../virtual_table/ob_all_virtual_tx_data.h | 6 +- .../ob_shared_memory_allocator_mgr.h | 6 +- src/share/allocator/ob_tx_data_allocator.cpp | 58 +- src/share/allocator/ob_tx_data_allocator.h | 31 +- src/storage/CMakeLists.txt | 1 + .../high_availability/ob_transfer_handler.cpp | 84 ++- .../high_availability/ob_transfer_handler.h | 5 +- .../high_availability/ob_transfer_struct.cpp | 3 + .../high_availability/ob_transfer_struct.h | 3 +- src/storage/ls/ob_ls.h | 1 + src/storage/ls/ob_ls_tablet_service.cpp | 5 + src/storage/ls/ob_ls_tx_service.cpp | 30 +- src/storage/ls/ob_ls_tx_service.h | 14 +- src/storage/multi_data_source/buffer_ctx.cpp | 21 +- src/storage/multi_data_source/buffer_ctx.h | 6 +- .../runtime_utility/mds_factory.cpp | 34 +- .../runtime_utility/mds_factory.h | 4 +- .../tablelock/ob_mem_ctx_table_lock.cpp | 14 + src/storage/tablelock/ob_mem_ctx_table_lock.h | 2 + .../ob_tablet_start_transfer_mds_helper.cpp | 50 +- .../tablet/ob_tablet_transfer_tx_ctx.cpp | 99 +-- .../tablet/ob_tablet_transfer_tx_ctx.h | 34 +- src/storage/tx/ob_ctx_tx_data.cpp | 128 +--- src/storage/tx/ob_ctx_tx_data.h | 9 +- src/storage/tx/ob_multi_data_source.cpp | 139 +++++ src/storage/tx/ob_multi_data_source.h | 30 +- src/storage/tx/ob_trans_ctx_mgr_v4.cpp | 122 ++-- src/storage/tx/ob_trans_ctx_mgr_v4.h | 15 +- src/storage/tx/ob_trans_functor.h | 78 ++- src/storage/tx/ob_trans_part_ctx.cpp | 578 +++++++++++++----- src/storage/tx/ob_trans_part_ctx.h | 24 +- src/storage/tx/ob_trans_submit_log_cb.cpp | 14 + src/storage/tx/ob_trans_submit_log_cb.h | 12 +- src/storage/tx/ob_tx_data_define.cpp | 278 +++++++-- src/storage/tx/ob_tx_data_define.h | 66 +- src/storage/tx/ob_tx_data_functor.cpp | 33 +- src/storage/tx/ob_tx_data_functor.h | 9 + src/storage/tx/ob_tx_data_op.cpp | 333 ++++++++++ src/storage/tx/ob_tx_data_op.h | 213 +++++++ src/storage/tx/ob_tx_log.h | 3 +- src/storage/tx/ob_tx_replay_executor.cpp | 2 +- src/storage/tx_table/ob_tx_data_cache.cpp | 28 +- src/storage/tx_table/ob_tx_data_cache.h | 2 +- src/storage/tx_table/ob_tx_data_hash_map.cpp | 3 +- src/storage/tx_table/ob_tx_data_memtable.cpp | 25 +- src/storage/tx_table/ob_tx_data_memtable.h | 7 +- src/storage/tx_table/ob_tx_data_table.cpp | 97 +-- src/storage/tx_table/ob_tx_data_table.h | 4 +- src/storage/tx_table/ob_tx_table.cpp | 16 +- src/storage/tx_table/ob_tx_table.h | 20 +- .../tx_table/ob_tx_table_interface.cpp | 11 + src/storage/tx_table/ob_tx_table_interface.h | 2 + src/storage/tx_table/ob_tx_table_iterator.cpp | 4 + unittest/storage/tx/it/test_register_mds.cpp | 2 + .../storage/tx/mock_utils/basic_fake_define.h | 9 +- .../storage/tx_table/test_tx_ctx_table.cpp | 2 + 71 files changed, 2682 insertions(+), 688 deletions(-) create mode 100644 mittest/simple_server/test_tx_data.cpp create mode 100644 src/storage/tx/ob_tx_data_op.cpp create mode 100644 src/storage/tx/ob_tx_data_op.h diff --git a/deps/oblib/src/lib/lock/ob_spin_rwlock.h b/deps/oblib/src/lib/lock/ob_spin_rwlock.h index 34962a22f..22847cb53 100644 --- a/deps/oblib/src/lib/lock/ob_spin_rwlock.h +++ b/deps/oblib/src/lib/lock/ob_spin_rwlock.h @@ -84,6 +84,37 @@ private: DISALLOW_COPY_AND_ASSIGN(SpinRLockGuard); }; +class SpinRLockManualGuard +{ +public: + explicit SpinRLockManualGuard() + : lock_(nullptr), ret_(OB_SUCCESS) + { + } + ~SpinRLockManualGuard() + { + if (OB_LIKELY(OB_SUCCESS == ret_) && OB_NOT_NULL(lock_)) { + if (OB_UNLIKELY(OB_SUCCESS != (ret_ = lock_->unlock()))) { + COMMON_LOG_RET(WARN, ret_, "Fail to unlock, ", K_(ret)); + } else { + lock_ = nullptr; + } + } + } + void lock(SpinRWLock &lock) { + lock_ = &lock; + if (OB_UNLIKELY(OB_SUCCESS != (ret_ = lock_->rdlock()))) { + COMMON_LOG_RET(WARN, ret_, "Fail to read lock, ", K_(ret)); + } + } + inline int get_ret() const { return ret_; } +private: + SpinRWLock *lock_; + int ret_; +private: + DISALLOW_COPY_AND_ASSIGN(SpinRLockManualGuard); +}; + class SpinWLockGuard { public: diff --git a/mittest/CMakeLists.txt b/mittest/CMakeLists.txt index eb18cc658..2e98a7460 100644 --- a/mittest/CMakeLists.txt +++ b/mittest/CMakeLists.txt @@ -1,4 +1,6 @@ -add_library(mit_env env/ob_simple_server_helper.cpp) +set(MIT_SRCS env/ob_simple_server_helper.cpp) + +add_library(mit_env ${MIT_SRCS}) target_include_directories(mit_env PUBLIC ${CMAKE_SOURCE_DIR}/unittest ${CMAKE_SOURCE_DIR}/mittest) diff --git a/mittest/env/ob_simple_server_helper.cpp b/mittest/env/ob_simple_server_helper.cpp index ae32bc370..843142c24 100644 --- a/mittest/env/ob_simple_server_helper.cpp +++ b/mittest/env/ob_simple_server_helper.cpp @@ -311,6 +311,57 @@ int SimpleServerHelper::freeze(uint64_t tenant_id, ObLSID ls_id, ObTabletID tabl return ret; } +int SimpleServerHelper::freeze_tx_data(uint64_t tenant_id, ObLSID ls_id) +{ + int ret = OB_SUCCESS; + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + } else { + storage::checkpoint::ObCheckpointExecutor *checkpoint_executor = ls_handle.get_ls()->get_checkpoint_executor(); + ObTxDataMemtableMgr *tx_data_memtable_mgr + = dynamic_cast( + dynamic_cast( + checkpoint_executor->handlers_[logservice::TRANS_SERVICE_LOG_BASE_TYPE]) + ->common_checkpoints_[storage::checkpoint::ObCommonCheckpointType::TX_DATA_MEMTABLE_TYPE]); + if (OB_ISNULL(tx_data_memtable_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("checkpoint obj is null", KR(ret)); + } else if (OB_FAIL(tx_data_memtable_mgr->flush(share::SCN::max_scn(), + checkpoint::INVALID_TRACE_ID))) { + } else { + usleep(10 * 1000 * 1000); + } + } + } + return ret; +} + +int SimpleServerHelper::freeze_tx_ctx(uint64_t tenant_id, ObLSID ls_id) +{ + int ret = OB_SUCCESS; + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + } else { + storage::checkpoint::ObCheckpointExecutor *checkpoint_executor = ls_handle.get_ls()->get_checkpoint_executor(); + ObTxCtxMemtable *tx_ctx_memtable + = dynamic_cast( + dynamic_cast( + checkpoint_executor->handlers_[logservice::TRANS_SERVICE_LOG_BASE_TYPE]) + ->common_checkpoints_[storage::checkpoint::ObCommonCheckpointType::TX_CTX_MEMTABLE_TYPE]); + if (OB_ISNULL(tx_ctx_memtable)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("checkpoint obj is null", KR(ret)); + } else if (OB_FAIL(tx_ctx_memtable->flush(share::SCN::max_scn(), 0))) { + } else { + usleep(10 * 1000 * 1000); + } + } + } + return ret; +} + int SimpleServerHelper::wait_flush_finish(uint64_t tenant_id, ObLSID ls_id, ObTabletID tablet_id) { int ret = OB_SUCCESS; diff --git a/mittest/env/ob_simple_server_helper.h b/mittest/env/ob_simple_server_helper.h index 41d136355..6bf4bcbf8 100644 --- a/mittest/env/ob_simple_server_helper.h +++ b/mittest/env/ob_simple_server_helper.h @@ -70,6 +70,8 @@ public: static int get_ls_end_scn(uint64_t tenant_id, ObLSID ls_id, SCN &end_scn); static int wait_replay_advance(uint64_t tenant_id, ObLSID ls_id, SCN end_scn); static int wait_checkpoint_newest(uint64_t tenant_id, ObLSID ls_id); + static int freeze_tx_ctx(uint64_t tenant_id, ObLSID ls_id); + static int freeze_tx_data(uint64_t tenant_id, ObLSID ls_id); static int wait_tx(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id, ObTxState tx_state); static int wait_tx_exit(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id); static int wait_flush_finish(uint64_t tenant_id, ObLSID ls_id, ObTabletID tablet_id); diff --git a/mittest/mtlenv/storage/test_memtable_v2.cpp b/mittest/mtlenv/storage/test_memtable_v2.cpp index 54a214046..b6b243658 100644 --- a/mittest/mtlenv/storage/test_memtable_v2.cpp +++ b/mittest/mtlenv/storage/test_memtable_v2.cpp @@ -257,8 +257,9 @@ public: { ObUndoAction undo(from, to); ObPartTransCtx *tx_ctx = store_ctx->mvcc_acc_ctx_.tx_ctx_; - EXPECT_EQ(OB_SUCCESS, - tx_ctx->ctx_tx_data_.add_undo_action(undo)); + ObTxDataGuard tx_data_guard; + EXPECT_EQ(OB_SUCCESS, tx_ctx->ls_tx_ctx_mgr_->get_tx_table()->alloc_tx_data(tx_data_guard)); + EXPECT_EQ(OB_SUCCESS, tx_ctx->insert_undo_action_to_tx_table_(undo, tx_data_guard, SCN::min_scn())); ObMemtableCtx *mt_ctx = store_ctx->mvcc_acc_ctx_.mem_ctx_; ObTxCallbackList &cb_list = mt_ctx->trans_mgr_.callback_list_; for (ObMvccRowCallback *iter = (ObMvccRowCallback *)(cb_list.get_guard()->get_next()); diff --git a/mittest/mtlenv/storage/test_multi_version_sstable_merge.cpp b/mittest/mtlenv/storage/test_multi_version_sstable_merge.cpp index ceed7ab1b..70c8793a5 100644 --- a/mittest/mtlenv/storage/test_multi_version_sstable_merge.cpp +++ b/mittest/mtlenv/storage/test_multi_version_sstable_merge.cpp @@ -1113,6 +1113,7 @@ TEST_F(TestMultiVersionMerge, test_merge_with_multi_trans) for (int64_t i = 1; i <= 4; i++) { ObTxData *tx_data = new ObTxData(); + ASSERT_EQ(OB_SUCCESS, tx_data->init_tx_op()); transaction::ObTransID tx_id = i; // fill in data @@ -1276,6 +1277,7 @@ TEST_F(TestMultiVersionMerge, test_merge_with_multi_trans_can_compact) for (int64_t i = 1; i <= 5; i++) { ObTxData *tx_data = new ObTxData(); + ASSERT_EQ(OB_SUCCESS, tx_data->init_tx_op()); transaction::ObTransID tx_id = i; // fill in data @@ -1439,6 +1441,7 @@ TEST_F(TestMultiVersionMerge, test_merge_with_multi_trans_can_not_compact) for (int64_t i = 1; i <= 5; i++) { ObTxData *tx_data = new ObTxData(); + ASSERT_EQ(OB_SUCCESS, tx_data->init_tx_op()); transaction::ObTransID tx_id = i; // fill in data @@ -3338,6 +3341,7 @@ TEST_F(TestMultiVersionMerge, test_running_trans_cross_macro_with_abort_sql_seq) ASSERT_NE(nullptr, tx_table = tx_table_guard.get_tx_table()); ObTxData *tx_data = new ObTxData(); + ASSERT_EQ(OB_SUCCESS, tx_data->init_tx_op()); transaction::ObTransID tx_id = 1; // fill in data diff --git a/mittest/mtlenv/test_tx_data_table.cpp b/mittest/mtlenv/test_tx_data_table.cpp index aafd7770c..18fe919bd 100644 --- a/mittest/mtlenv/test_tx_data_table.cpp +++ b/mittest/mtlenv/test_tx_data_table.cpp @@ -250,6 +250,7 @@ void TestTxDataTable::insert_tx_data_() tx_data_guard.reset(); ASSERT_EQ(OB_SUCCESS, tx_data_table_.alloc_tx_data(tx_data_guard, false)); ASSERT_NE(nullptr, tx_data = tx_data_guard.tx_data()); + ASSERT_EQ(OB_SUCCESS, tx_data->init_tx_op()); // fill in data tx_data->tx_id_ = tx_id; @@ -287,6 +288,7 @@ void TestTxDataTable::insert_rollback_tx_data_() ObTxData *tx_data = nullptr; ASSERT_EQ(OB_SUCCESS, tx_data_table_.alloc_tx_data(tx_data_guard, false)); ASSERT_NE(nullptr, tx_data = tx_data_guard.tx_data()); + ASSERT_EQ(OB_SUCCESS, tx_data->init_tx_op()); // fill in data tx_data->tx_id_ = tx_id; @@ -464,23 +466,24 @@ void TestTxDataTable::do_undo_status_test() ObTxDataGuard tx_data_guard; ASSERT_EQ(OB_SUCCESS, tx_data_table_.alloc_tx_data(tx_data_guard, false)); ASSERT_NE(nullptr, tx_data = tx_data_guard.tx_data()); + ASSERT_EQ(OB_SUCCESS, tx_data->init_tx_op()); tx_data->tx_id_ = rand(); for (int i = 1; i <= 1001; i++) { transaction::ObUndoAction undo_action(ObTxSEQ(10 * (i + 1), 0), ObTxSEQ(10 * i, 0)); ASSERT_EQ(OB_SUCCESS, tx_data->add_undo_action(&tx_table_, undo_action)); } - ASSERT_EQ(1000 / TX_DATA_UNDO_ACT_MAX_NUM_PER_NODE + 1, tx_data->undo_status_list_.undo_node_cnt_); + ASSERT_EQ(1000 / TX_DATA_UNDO_ACT_MAX_NUM_PER_NODE + 1, tx_data->op_guard_->get_undo_status_list().undo_node_cnt_); { transaction::ObUndoAction undo_action(ObTxSEQ(10000000, 0), ObTxSEQ(10,0)); ASSERT_EQ(OB_SUCCESS, tx_data->add_undo_action(&tx_table_, undo_action)); } - STORAGETEST_LOG(INFO, "", K(tx_data->undo_status_list_)); - ASSERT_EQ(1, tx_data->undo_status_list_.head_->size_); - ASSERT_EQ(nullptr, tx_data->undo_status_list_.head_->next_); - ASSERT_EQ(1, tx_data->undo_status_list_.undo_node_cnt_); + STORAGETEST_LOG(INFO, "", K(tx_data->op_guard_->get_undo_status_list())); + ASSERT_EQ(1, tx_data->op_guard_->get_undo_status_list().head_->size_); + ASSERT_EQ(nullptr, tx_data->op_guard_->get_undo_status_list().head_->next_); + ASSERT_EQ(1, tx_data->op_guard_->get_undo_status_list().undo_node_cnt_); } { @@ -490,23 +493,24 @@ void TestTxDataTable::do_undo_status_test() ObTxDataGuard tx_data_guard; ASSERT_EQ(OB_SUCCESS, tx_data_table_.alloc_tx_data(tx_data_guard, false)); ASSERT_NE(nullptr, tx_data = tx_data_guard.tx_data()); + ASSERT_EQ(OB_SUCCESS, tx_data->init_tx_op()); tx_data->tx_id_ = rand(); for (int i = 1; i <= 14; i++) { transaction::ObUndoAction undo_action(ObTxSEQ(i + 1,0), ObTxSEQ(i,0)); ASSERT_EQ(OB_SUCCESS, tx_data->add_undo_action(&tx_table_, undo_action)); } - ASSERT_EQ(2, tx_data->undo_status_list_.undo_node_cnt_); + ASSERT_EQ(2, tx_data->op_guard_->get_undo_status_list().undo_node_cnt_); { transaction::ObUndoAction undo_action(ObTxSEQ(15, 0), ObTxSEQ(7,0)); ASSERT_EQ(OB_SUCCESS, tx_data->add_undo_action(&tx_table_, undo_action)); } - STORAGETEST_LOG(INFO, "", K(tx_data->undo_status_list_)); - ASSERT_EQ(7, tx_data->undo_status_list_.head_->size_); - ASSERT_EQ(nullptr, tx_data->undo_status_list_.head_->next_); - ASSERT_EQ(1, tx_data->undo_status_list_.undo_node_cnt_); + STORAGETEST_LOG(INFO, "", K(tx_data->op_guard_->get_undo_status_list())); + ASSERT_EQ(7, tx_data->op_guard_->get_undo_status_list().head_->size_); + ASSERT_EQ(nullptr, tx_data->op_guard_->get_undo_status_list().head_->next_); + ASSERT_EQ(1, tx_data->op_guard_->get_undo_status_list().undo_node_cnt_); } } @@ -516,6 +520,7 @@ void TestTxDataTable::test_serialize_with_action_cnt_(int cnt) ObTxDataGuard tx_data_guard; ASSERT_EQ(OB_SUCCESS, tx_data_table_.alloc_tx_data(tx_data_guard, false)); ASSERT_NE(nullptr, tx_data = tx_data_guard.tx_data()); + ASSERT_EQ(OB_SUCCESS, tx_data->init_tx_op()); tx_data->tx_id_ = transaction::ObTransID(269381); tx_data->commit_version_.convert_for_logservice(ObTimeUtil::current_time_ns()); tx_data->end_scn_.convert_for_logservice(ObTimeUtil::current_time_ns()); @@ -532,7 +537,7 @@ void TestTxDataTable::test_serialize_with_action_cnt_(int cnt) } else { node_cnt = cnt / 7 + 1; } - ASSERT_EQ(node_cnt, tx_data->undo_status_list_.undo_node_cnt_); + ASSERT_EQ(node_cnt, tx_data->op_guard_->get_undo_status_list().undo_node_cnt_); char *buf = nullptr; ObArenaAllocator allocator; @@ -652,6 +657,7 @@ void TestTxDataTable::do_repeat_insert_test() { ObTxDataGuard tx_data_guard; ASSERT_EQ(OB_SUCCESS, tx_data_table_.alloc_tx_data(tx_data_guard, false)); ASSERT_NE(nullptr, tx_data = tx_data_guard.tx_data()); + ASSERT_EQ(OB_SUCCESS, tx_data->init_tx_op()); // fill in data tx_data->tx_id_ = tx_id; diff --git a/mittest/simple_server/CMakeLists.txt b/mittest/simple_server/CMakeLists.txt index 93b7cb193..91e9357f9 100644 --- a/mittest/simple_server/CMakeLists.txt +++ b/mittest/simple_server/CMakeLists.txt @@ -35,6 +35,7 @@ function(ob_offline_observer case case_file) EXCLUDE_FROM_ALL ${case_file} ${OBSERVER_TEST_SRCS} + ../${MIT_SRCS} ) target_include_directories(${case} PUBLIC ${CMAKE_SOURCE_DIR}/unittest ${CMAKE_SOURCE_DIR}/mittest) @@ -53,6 +54,7 @@ endfunction() ob_offline_observer(test_simple_ob test_ob_simple_cluster.cpp) ob_offline_observer(test_transfer_tx test_transfer_tx.cpp) +ob_offline_observer(test_tx_data test_tx_data.cpp) ob_unittest_observer(test_transfer_no_kill_tx test_transfer_tx.cpp) ob_unittest_observer(test_standby_balance test_standby_balance_ls_group.cpp) diff --git a/mittest/simple_server/env/ob_simple_cluster_test_base.cpp b/mittest/simple_server/env/ob_simple_cluster_test_base.cpp index a873972c0..748543f58 100644 --- a/mittest/simple_server/env/ob_simple_cluster_test_base.cpp +++ b/mittest/simple_server/env/ob_simple_cluster_test_base.cpp @@ -106,7 +106,8 @@ ObSimpleClusterTestBase::~ObSimpleClusterTestBase() void ObSimpleClusterTestBase::SetUp() { - SERVER_LOG(INFO, "SetUp"); + auto case_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); + SERVER_LOG(INFO, "SetUp>>>>>>>>>>>>>>", K(case_name)); int ret = OB_SUCCESS; if (!is_started_) { if (OB_FAIL(start())) { @@ -126,7 +127,8 @@ void ObSimpleClusterTestBase::SetUp() void ObSimpleClusterTestBase::TearDown() { - + auto case_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); + SERVER_LOG(INFO, "TearDown>>>>>>>>>>>>>>", K(case_name)); } void ObSimpleClusterTestBase::TearDownTestCase() diff --git a/mittest/simple_server/rewrite_function_for_test_big_tx_data.cpp b/mittest/simple_server/rewrite_function_for_test_big_tx_data.cpp index 084594d05..1e5be9d36 100644 --- a/mittest/simple_server/rewrite_function_for_test_big_tx_data.cpp +++ b/mittest/simple_server/rewrite_function_for_test_big_tx_data.cpp @@ -84,9 +84,10 @@ int ObTxData::add_undo_action(ObTxTable *tx_table, // STORAGE_LOG(DEBUG, "do add_undo_action"); UNUSED(undo_node); int ret = OB_SUCCESS; - SpinWLockGuard guard(undo_status_list_.lock_); + init_tx_op(); + SpinWLockGuard guard(op_guard_->get_undo_status_list().lock_); ObTxDataTable *tx_data_table = nullptr; - ObUndoStatusNode *node = undo_status_list_.head_; + ObUndoStatusNode *node = op_guard_->get_undo_status_list().head_; if (OB_ISNULL(tx_table)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "tx table is nullptr.", KR(ret)); @@ -104,9 +105,9 @@ int ObTxData::add_undo_action(ObTxTable *tx_table, STORAGE_LOG(WARN, "alloc_undo_status_node() fail", KR(ret)); } else { new_node->next_ = node; - undo_status_list_.head_ = new_node; + op_guard_->get_undo_status_list().head_ = new_node; node = new_node; - undo_status_list_.undo_node_cnt_++; + op_guard_->get_undo_status_list().undo_node_cnt_++; } for (int64_t idx = 0; idx < TX_DATA_UNDO_ACT_MAX_NUM_PER_NODE; ++idx) { node->undo_actions_[node->size_++] = new_undo_action; @@ -140,8 +141,8 @@ int ObTxDataMemtableScanIterator // not exactly accurate, but enough for unittest ATOMIC_STORE(&BIGGEST_TX_DATA_SIZE, buffer_len_); } - if (tx_data_->undo_status_list_.undo_node_cnt_ > 0) { - std::cout << "tx_id:" << tx_data_->tx_id_.get_id() << ", undo cnt:" << tx_data_->undo_status_list_.undo_node_cnt_ << ", generate size:" << generate_size_ << std::endl; + if (tx_data_->op_guard_->get_undo_status_list().undo_node_cnt_ > 0) { + std::cout << "tx_id:" << tx_data_->tx_id_.get_id() << ", undo cnt:" << tx_data_->op_guard_->get_undo_status_list().undo_node_cnt_ << ", generate size:" << generate_size_ << std::endl; } ATOMIC_STORE(&DUMP_BIG_TX_DATA, true); /**************************************************************************************************/ @@ -225,7 +226,11 @@ int ObTxDataSingleRowGetter::deserialize_tx_data_from_store_buffers_(ObTxData &t tx_data.tx_id_.get_id(), tx_data_buffers_.count()); ATOMIC_STORE(&LOAD_BIG_TX_DATA, true); - std::cout << "read big tx id from sstable, tx_id:" << ATOMIC_LOAD(&TEST_TX_ID) << ", undo cnt:" << tx_data.undo_status_list_.undo_node_cnt_ << ", buffer cnt:" << tx_data_buffers_.count() << std::endl; + int64_t undo_cnt = 0; + if (tx_data.op_guard_.is_valid()) { + undo_cnt = tx_data.op_guard_->get_undo_status_list().undo_node_cnt_; + } + std::cout << "read big tx id from sstable, tx_id:" << ATOMIC_LOAD(&TEST_TX_ID) << ", undo cnt:" << undo_cnt << ", buffer cnt:" << tx_data_buffers_.count() << std::endl; } } /**************************************************************************************************/ diff --git a/mittest/simple_server/test_big_tx_data.cpp b/mittest/simple_server/test_big_tx_data.cpp index 996d07c04..94529d132 100644 --- a/mittest/simple_server/test_big_tx_data.cpp +++ b/mittest/simple_server/test_big_tx_data.cpp @@ -64,7 +64,7 @@ class DoNothingOP : public ObITxDataCheckFunctor { virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) { UNUSED(tx_cc_ctx); - cout << "read tx data:" << tx_data.tx_id_.get_id() << ", undo cnt:" << tx_data.undo_status_list_.undo_node_cnt_ << endl; + cout << "read tx data:" << tx_data.tx_id_.get_id() << ", undo cnt:" << tx_data.op_guard_->get_undo_status_list().undo_node_cnt_ << endl; STORAGE_LOG_RET(INFO, 0, "read tx data", K(tx_data.tx_id_), K(lbt())); return OB_SUCCESS; } diff --git a/mittest/simple_server/test_transfer_tx.cpp b/mittest/simple_server/test_transfer_tx.cpp index 830653709..8d9fa0a27 100644 --- a/mittest/simple_server/test_transfer_tx.cpp +++ b/mittest/simple_server/test_transfer_tx.cpp @@ -352,6 +352,7 @@ TEST_F(ObSimpleClusterExampleTest, tx_exit) EQ(OB_TRANS_CTX_NOT_EXIST, SSH::wait_tx_exit(R.tenant_id_, loc2, tx_id)); } +/* TEST_F(ObSimpleClusterExampleTest, large_query) { TRANSFER_CASE_PREPARE; @@ -435,7 +436,7 @@ TEST_F(ObSimpleClusterExampleTest, large_query) LOGI("large_query: row_count:%ld", row_count); //get_curr_simple_server().get_sql_proxy().write("alter system set syslog_level='INFO'", affected_rows); } - +*/ TEST_F(ObSimpleClusterExampleTest, epoch_recover_from_active_info) { @@ -1215,7 +1216,9 @@ TEST_F(ObSimpleClusterExampleTest, transfer_tx_ctx_merge) TEST_F(ObSimpleClusterExampleTest, transfer_batch) { TRANSFER_CASE_PREPARE; + sql_proxy.write("alter system set _transfer_start_trans_timeout='5s'",affected_rows); + sql_proxy.write("alter system set _transfer_start_trans_timeout = '10s'", affected_rows); std::set jobs; for (int i =0 ;i< 5000;i++) { sqlclient::ObISQLConnection *conn = NULL; @@ -1243,6 +1246,7 @@ TEST_F(ObSimpleClusterExampleTest, transfer_batch) int64_t sum = 0; EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu2", sum)); EQ(100 * 5000, sum); + sql_proxy.write("alter system set _transfer_start_trans_timeout='1s'",affected_rows); } TEST_F(ObSimpleClusterExampleTest, transfer_retain_ctx) diff --git a/mittest/simple_server/test_tx_ctx_table_mit.cpp b/mittest/simple_server/test_tx_ctx_table_mit.cpp index 072b96a2d..3f4ea07de 100644 --- a/mittest/simple_server/test_tx_ctx_table_mit.cpp +++ b/mittest/simple_server/test_tx_ctx_table_mit.cpp @@ -85,7 +85,7 @@ int ObTxCtxMemtableScanIterator::serialize_next_tx_ctx_(ObTxLocalBuffer &buffer, if (OB_FAIL(ret)) { STORAGE_LOG(INFO, "get next tx ctx table info failed", KR(ret), KPC(tx_ctx)); } else if (SLEEP_BEFORE_DUMP_TX_CTX) { - fprintf(stdout, "ready to dump tx ctx, undo status node ptr : %p\n", tx_ctx->ctx_tx_data_.tx_data_guard_.tx_data()->undo_status_list_.head_); + fprintf(stdout, "ready to dump tx ctx, undo status node ptr : %p\n", tx_ctx->ctx_tx_data_.tx_data_guard_.tx_data()->op_guard_->get_undo_status_list().head_); fprintf(stdout, "sleep 20 seconds before dump\n"); HAS_GOT_TX_CTX = true; SLEEP_BEFORE_DUMP_TX_CTX = false; diff --git a/mittest/simple_server/test_tx_data.cpp b/mittest/simple_server/test_tx_data.cpp new file mode 100644 index 000000000..39961fe0f --- /dev/null +++ b/mittest/simple_server/test_tx_data.cpp @@ -0,0 +1,373 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#define USING_LOG_PREFIX SERVER +#define protected public +#define private public + +#include "env/ob_simple_cluster_test_base.h" +#include "mittest/env/ob_simple_server_helper.h" +#include "storage/tx_storage/ob_ls_service.h" + +namespace oceanbase +{ +namespace unittest +{ + +using namespace oceanbase::transaction; +using namespace oceanbase::storage; + + +#define EQ(x, y) GTEST_ASSERT_EQ(x, y); +#define NEQ(x, y) GTEST_ASSERT_NE(x, y); +#define LE(x, y) GTEST_ASSERT_LE(x, y); +#define GE(x, y) GTEST_ASSERT_GE(x, y); + +class TestRunCtx +{ +public: + uint64_t tenant_id_ = 0; + int64_t time_sec_ = 0; +}; + +TestRunCtx R; + +class ObTxDataTest : public ObSimpleClusterTestBase +{ +public: + // 指定case运行目录前缀 test_ob_simple_cluster_ + ObTxDataTest() : ObSimpleClusterTestBase("test_tx_data_", "50G", "50G") {} +}; + +TEST_F(ObTxDataTest, observer_start) +{ + SERVER_LOG(INFO, "observer_start succ"); +} + +// 创建租户并不轻量,看场景必要性使用 +TEST_F(ObTxDataTest, add_tenant) +{ + // 创建普通租户tt1 + ASSERT_EQ(OB_SUCCESS, create_tenant("tt1", "40G", "40G", false, 10)); + // 获取租户tt1的tenant_id + ASSERT_EQ(OB_SUCCESS, get_tenant_id(R.tenant_id_)); + ASSERT_NE(0, R.tenant_id_); + // 初始化普通租户tt1的sql proxy + ASSERT_EQ(OB_SUCCESS, get_curr_simple_server().init_sql_proxy2()); +} + +TEST_F(ObTxDataTest, create_new_ls) +{ + // 在单节点ObServer下创建新的日志流, 注意避免被RS任务GC掉 + EQ(0, SSH::create_ls(R.tenant_id_, get_curr_observer().self_addr_)); + int64_t ls_count = 0; + EQ(0, SSH::g_select_int64(R.tenant_id_, "select count(ls_id) as val from __all_ls where ls_id!=1", ls_count)); + EQ(2, ls_count); +} + +TEST_F(ObTxDataTest, rollback_to) +{ + common::ObMySQLProxy &sql_proxy = get_curr_simple_server().get_sql_proxy2(); + int64_t affected_rows; + EQ(0, sql_proxy.write("drop table if exists stu1", affected_rows)); + EQ(0, sql_proxy.write("create table stu1(col1 int)", affected_rows)); + sqlclient::ObISQLConnection *conn1 = NULL; + EQ(0, sql_proxy.acquire(conn1)); + EQ(0, SSH::write(conn1, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn1, "insert into stu1 values(100)", affected_rows)); + ObTransID tx_id; + EQ(0, SSH::find_tx(conn1, tx_id)); + LOGI("find_tx:%ld", tx_id.get_id()); + EQ(0, SSH::write(conn1, "savepoint sp1")); + EQ(0, SSH::write(conn1, "insert into stu1 values(200)", affected_rows)); + EQ(0, SSH::write(conn1, "rollback to sp1")); + int64_t val = 0; + EQ(0, SSH::select_int64(conn1, "select sum(col1) val from stu1", val)); + EQ(100, val); + EQ(0, SSH::write(conn1, "commit")); +} + +TEST_F(ObTxDataTest, rollback_to_with_redo) +{ + common::ObMySQLProxy &sql_proxy = get_curr_simple_server().get_sql_proxy2(); + int64_t affected_rows; + EQ(0, sql_proxy.write("drop table if exists stu1", affected_rows)); + EQ(0, sql_proxy.write("create table stu1(col1 int)", affected_rows)); + + sqlclient::ObISQLConnection *conn1 = NULL; + EQ(0, sql_proxy.acquire(conn1)); + EQ(0, SSH::write(conn1, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn1, "insert into stu1 values(100)", affected_rows)); + ObTransID tx_id; + EQ(0, SSH::find_tx(conn1, tx_id)); + LOGI("find_tx:%ld", tx_id.get_id()); + + EQ(0, SSH::write(conn1, "savepoint sp1")); + EQ(0, SSH::write(conn1, "insert into stu1 values(200)", affected_rows)); + ObLSID loc1; + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + // when tx has redo, rollback to need write log + EQ(0, SSH::submit_redo(R.tenant_id_, loc1)); + + EQ(0, SSH::write(conn1, "rollback to sp1")); + int64_t val = 0; + EQ(0, SSH::select_int64(conn1, "select sum(col1) val from stu1", val)); + EQ(100, val); + EQ(0, SSH::write(conn1, "commit")); +} + +TEST_F(ObTxDataTest, rollback_to_with_read_sstable_uncommit) +{ + common::ObMySQLProxy &sql_proxy = get_curr_simple_server().get_sql_proxy2(); + int64_t affected_rows; + EQ(0, sql_proxy.write("drop table if exists stu1", affected_rows)); + EQ(0, sql_proxy.write("create table stu1(col1 int)", affected_rows)); + + sqlclient::ObISQLConnection *conn1 = NULL; + EQ(0, sql_proxy.acquire(conn1)); + EQ(0, SSH::write(conn1, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn1, "insert into stu1 values(100)", affected_rows)); + ObTransID tx_id; + EQ(0, SSH::find_tx(conn1, tx_id)); + LOGI("find_tx:%ld", tx_id.get_id()); + + EQ(0, SSH::write(conn1, "savepoint sp1")); + EQ(0, SSH::write(conn1, "insert into stu1 values(200)", affected_rows)); + ObLSID loc1; + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + // when tx has redo, rollback to need write log + EQ(0, SSH::submit_redo(R.tenant_id_, loc1)); + + EQ(0, SSH::write(conn1, "rollback to sp1")); + int64_t val = 0; + EQ(0, SSH::select_int64(conn1, "select sum(col1) val from stu1", val)); + EQ(100, val); + + EQ(0, sql_proxy.write("alter system minor freeze", affected_rows)); + EQ(0, SSH::wait_checkpoint_newest(R.tenant_id_, loc1)); + // read from sstable uncommit row + EQ(0, SSH::select_int64(conn1, "select sum(col1) val from stu1", val)); + EQ(100, val); + + EQ(0, SSH::write(conn1, "commit")); + EQ(0, SSH::select_int64(sql_proxy, "select sum(col1) as val from stu1",val)); + EQ(100, val); +} + +TEST_F(ObTxDataTest, rollback_to_with_ls_replay) +{ + common::ObMySQLProxy &sql_proxy = get_curr_simple_server().get_sql_proxy2(); + int64_t affected_rows; + EQ(0, sql_proxy.write("drop table if exists stu1", affected_rows)); + EQ(0, sql_proxy.write("create table stu1(col1 int)", affected_rows)); + + sqlclient::ObISQLConnection *conn1 = NULL; + EQ(0, sql_proxy.acquire(conn1)); + EQ(0, SSH::write(conn1, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn1, "insert into stu1 values(100)", affected_rows)); + ObTransID tx_id; + EQ(0, SSH::find_tx(conn1, tx_id)); + LOGI("find_tx:%ld", tx_id.get_id()); + + EQ(0, SSH::write(conn1, "savepoint sp1")); + EQ(0, SSH::write(conn1, "insert into stu1 values(200)", affected_rows)); + ObLSID loc1; + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + // when tx has redo, rollback to need write log + EQ(0, SSH::submit_redo(R.tenant_id_, loc1)); + + EQ(0, SSH::write(conn1, "rollback to sp1")); + int64_t val = 0; + EQ(0, SSH::select_int64(conn1, "select sum(col1) val from stu1", val)); + EQ(100, val); + + EQ(0, sql_proxy.write("alter system minor freeze", affected_rows)); + EQ(0, SSH::wait_checkpoint_newest(R.tenant_id_, loc1)); + // read from sstable uncommit row + EQ(0, SSH::select_int64(conn1, "select sum(col1) val from stu1", val)); + EQ(100, val); + + LOGI("ls_reboot:%ld", loc1.id()); + // tx has not commit, tx ctx recover from tx_sstable + EQ(0, SSH::ls_reboot(R.tenant_id_, loc1)); + + EQ(0, SSH::write(conn1, "commit")); + EQ(0, SSH::select_int64(sql_proxy, "select sum(col1) as val from stu1",val)); + EQ(100, val); +} + +TEST_F(ObTxDataTest, rollback_to_with_ls_replay_from_middle) +{ + common::ObMySQLProxy &sql_proxy = get_curr_simple_server().get_sql_proxy2(); + int64_t affected_rows; + EQ(0, sql_proxy.write("drop table if exists stu1", affected_rows)); + EQ(0, sql_proxy.write("create table stu1(col1 int)", affected_rows)); + + sqlclient::ObISQLConnection *conn1 = NULL; + EQ(0, sql_proxy.acquire(conn1)); + EQ(0, SSH::write(conn1, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn1, "insert into stu1 values(100)", affected_rows)); + ObTransID tx_id; + EQ(0, SSH::find_tx(conn1, tx_id)); + LOGI("find_tx:%ld", tx_id.get_id()); + + EQ(0, SSH::write(conn1, "savepoint sp1")); + EQ(0, SSH::write(conn1, "insert into stu1 values(200)", affected_rows)); + ObLSID loc1; + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + // when tx has redo, rollback to need write log + EQ(0, SSH::submit_redo(R.tenant_id_, loc1)); + + EQ(0, sql_proxy.write("alter system minor freeze", affected_rows)); + EQ(0, SSH::wait_checkpoint_newest(R.tenant_id_, loc1)); + + EQ(0, SSH::write(conn1, "rollback to sp1")); + int64_t val = 0; + EQ(0, SSH::select_int64(conn1, "select sum(col1) val from stu1", val)); + EQ(100, val); + + EQ(0, SSH::write(conn1, "commit")); + // make tx_ctx checkpoint + EQ(0, SSH::freeze_tx_ctx(R.tenant_id_, loc1)); + + LOGI("ls_reboot:%ld", loc1.id()); + EQ(0, SSH::ls_reboot(R.tenant_id_, loc1)); + + EQ(0, SSH::select_int64(sql_proxy, "select sum(col1) as val from stu1",val)); + EQ(100, val); +} + +TEST_F(ObTxDataTest, retain_ctx) +{ + common::ObMySQLProxy &sql_proxy = get_curr_simple_server().get_sql_proxy2(); + int64_t affected_rows = 0; + ObMySQLTransaction trans; + EQ(0, trans.start(GCTX.sql_proxy_, R.tenant_id_)); + observer::ObInnerSQLConnection *conn = static_cast(trans.get_connection()); + char buf[10]; + ObRegisterMdsFlag flag; + ObLSID ls_id1(1001); + ObLSID ls_id2(1002); + EQ(0, conn->register_multi_data_source(R.tenant_id_, + ls_id1, + ObTxDataSourceType::TEST3, + buf, + 10, + flag)); + + EQ(0, SSH::submit_redo(R.tenant_id_, ls_id1)); + EQ(0, sql_proxy.write("alter system minor freeze", affected_rows)); + EQ(0, SSH::wait_checkpoint_newest(R.tenant_id_, ls_id1)); + + EQ(0, conn->register_multi_data_source(R.tenant_id_, + ls_id2, + ObTxDataSourceType::TEST3, + buf, + 10, + flag)); + ObTransID tx_id; + EQ(0, SSH::g_select_int64(R.tenant_id_, "select trans_id as val from __all_virtual_trans_stat where is_exiting=0 and session_id<=1 limit 1", tx_id.tx_id_)); + LOGI("find active_tx tx_id:%ld", tx_id.get_id()); + + EQ(0, trans.end(true)); + // make tx_ctx checkpoint + EQ(0, SSH::freeze_tx_ctx(R.tenant_id_, ls_id1)); + LOGI("ls_reboot:%ld", ls_id1.id()); + EQ(0, SSH::ls_reboot(R.tenant_id_, ls_id1)); + + EQ(0, SSH::freeze_tx_ctx(R.tenant_id_, ls_id2)); + LOGI("ls_reboot:%ld", ls_id2.id()); + EQ(0, SSH::ls_reboot(R.tenant_id_, ls_id2)); +} + +TEST_F(ObTxDataTest, retain_ctx2) +{ + common::ObMySQLProxy &sql_proxy = get_curr_simple_server().get_sql_proxy2(); + int64_t affected_rows = 0; + EQ(0, sql_proxy.write("drop table if exists stu1", affected_rows)); + EQ(0, sql_proxy.write("create table stu1(col1 int)", affected_rows)); + ObMySQLTransaction trans; + EQ(0, trans.start(GCTX.sql_proxy_, R.tenant_id_)); + observer::ObInnerSQLConnection *conn = static_cast(trans.get_connection()); + char buf[10]; + ObRegisterMdsFlag flag; + ObLSID ls_id1(1001); + EQ(0, conn->register_multi_data_source(R.tenant_id_, + ls_id1, + ObTxDataSourceType::TEST3, + buf, + 10, + flag)); + + EQ(0, SSH::submit_redo(R.tenant_id_, ls_id1)); + EQ(0, sql_proxy.write("alter system minor freeze", affected_rows)); + EQ(0, SSH::wait_checkpoint_newest(R.tenant_id_, ls_id1)); + + EQ(0, sql_proxy.write("insert into stu1 values(100)", affected_rows)); + + EQ(0, conn->register_multi_data_source(R.tenant_id_, + ls_id1, + ObTxDataSourceType::TEST3, + buf, + 10, + flag)); + ObTransID tx_id; + EQ(0, SSH::g_select_int64(R.tenant_id_, "select trans_id as val from __all_virtual_trans_stat where is_exiting=0 and session_id<=1 limit 1", tx_id.tx_id_)); + LOGI("find active_tx tx_id:%ld", tx_id.get_id()); + + EQ(0, trans.end(true)); + // make tx_ctx checkpoint + EQ(0, SSH::freeze_tx_ctx(R.tenant_id_, ls_id1)); + // make tx_data checkpoint + EQ(0, SSH::freeze_tx_data(R.tenant_id_, ls_id1)); + LOGI("ls_reboot:%ld", ls_id1.id()); + EQ(0, SSH::ls_reboot(R.tenant_id_, ls_id1)); +} + +TEST_F(ObTxDataTest, end) +{ + if (R.time_sec_ > 0) { + ::sleep(R.time_sec_); + } +} + +} // end unittest +} // end oceanbase + + +int main(int argc, char **argv) +{ + int64_t c = 0; + int64_t time_sec = 0; + char *log_level = (char*)"INFO"; + while(EOF != (c = getopt(argc,argv,"t:l:"))) { + switch(c) { + case 't': + time_sec = atoi(optarg); + break; + case 'l': + log_level = optarg; + oceanbase::unittest::ObSimpleClusterTestBase::enable_env_warn_log_ = false; + break; + default: + break; + } + } + oceanbase::unittest::init_log_and_gtest(argc, argv); + OB_LOGGER.set_log_level(log_level); + + LOG_INFO("main>>>"); + oceanbase::unittest::R.time_sec_ = time_sec; + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/observer/virtual_table/ob_all_virtual_tx_data.cpp b/src/observer/virtual_table/ob_all_virtual_tx_data.cpp index 27e7650d1..bf8e29845 100644 --- a/src/observer/virtual_table/ob_all_virtual_tx_data.cpp +++ b/src/observer/virtual_table/ob_all_virtual_tx_data.cpp @@ -101,6 +101,10 @@ int ObAllVirtualTxData::fill_in_row_(const VirtualTxDataRow &row_data, common::O cur_row_.cells_[i].set_varchar(row_data.undo_status_list_str_); cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); break; + case TX_OP_COL: + cur_row_.cells_[i].set_varchar(row_data.tx_op_str_); + cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; default: ret = OB_ERR_UNEXPECTED; break; @@ -207,4 +211,4 @@ int ObAllVirtualTxData::generate_virtual_tx_data_row_(VirtualTxDataRow &tx_data_ } } // namespace observer -} // namespace oceanbase \ No newline at end of file +} // namespace oceanbase diff --git a/src/observer/virtual_table/ob_all_virtual_tx_data.h b/src/observer/virtual_table/ob_all_virtual_tx_data.h index f02bb67c6..15566a58e 100644 --- a/src/observer/virtual_table/ob_all_virtual_tx_data.h +++ b/src/observer/virtual_table/ob_all_virtual_tx_data.h @@ -35,10 +35,11 @@ struct VirtualTxDataRow { share::SCN end_scn_; share::SCN commit_version_; char undo_status_list_str_[common::MAX_UNDO_LIST_CHAR_LENGTH]; + char tx_op_str_[common::MAX_TX_OP_CHAR_LENGTH]; VirtualTxDataRow() : state_(0), start_scn_(), end_scn_(), commit_version_() {} - TO_STRING_KV(K(state_), K(start_scn_), K(end_scn_), K(commit_version_), K(undo_status_list_str_)); + TO_STRING_KV(K(state_), K(start_scn_), K(end_scn_), K(commit_version_), K(undo_status_list_str_), K(tx_op_str_)); }; class ObAllVirtualTxData : public common::ObVirtualTableScannerIterator { @@ -53,7 +54,8 @@ private: START_SCN_COL, END_SCN_COL, COMMIT_VERSION_COL, - UNDO_STATUS_COL + UNDO_STATUS_COL, + TX_OP_COL }; diff --git a/src/share/allocator/ob_shared_memory_allocator_mgr.h b/src/share/allocator/ob_shared_memory_allocator_mgr.h index df57da87b..554ad3b76 100644 --- a/src/share/allocator/ob_shared_memory_allocator_mgr.h +++ b/src/share/allocator/ob_shared_memory_allocator_mgr.h @@ -44,6 +44,8 @@ public: SHARE_LOG(ERROR, "init memstore allocator failed", KR(ret)); } else if (OB_FAIL(mds_allocator_.init())) { SHARE_LOG(ERROR, "init mds allocator failed", KR(ret)); + } else if (OB_FAIL(tx_data_op_allocator_.init())) { + SHARE_LOG(ERROR, "init tx data op allocator failed", KR(ret)); } else if (OB_FAIL( share_resource_throttle_tool_.init(&memstore_allocator_, &tx_data_allocator_, &mds_allocator_))) { SHARE_LOG(ERROR, "init share resource throttle tool failed", KR(ret)); @@ -65,6 +67,7 @@ public: ObTenantTxDataAllocator &tx_data_allocator() { return tx_data_allocator_; } ObTenantMdsAllocator &mds_allocator() { return mds_allocator_; } TxShareThrottleTool &share_resource_throttle_tool() { return share_resource_throttle_tool_; } + ObTenantTxDataOpAllocator &tx_data_op_allocator() { return tx_data_op_allocator_; } private: void update_share_throttle_config_(const int64_t total_memory, omt::ObTenantConfigGuard &config); @@ -78,6 +81,7 @@ private: ObMemstoreAllocator memstore_allocator_; ObTenantTxDataAllocator tx_data_allocator_; ObTenantMdsAllocator mds_allocator_; + ObTenantTxDataOpAllocator tx_data_op_allocator_; }; class TxShareMemThrottleUtil @@ -156,4 +160,4 @@ public: } // namespace share } // namespace oceanbase -#endif \ No newline at end of file +#endif diff --git a/src/share/allocator/ob_tx_data_allocator.cpp b/src/share/allocator/ob_tx_data_allocator.cpp index a14c76f44..a1f10d051 100644 --- a/src/share/allocator/ob_tx_data_allocator.cpp +++ b/src/share/allocator/ob_tx_data_allocator.cpp @@ -23,6 +23,8 @@ namespace oceanbase { namespace share { +thread_local int64_t ObTenantTxDataOpAllocator::local_alloc_size_ = 0; + int64_t ObTenantTxDataAllocator::resource_unit_size() { static const int64_t TX_DATA_RESOURCE_UNIT_SIZE = OB_MALLOC_NORMAL_BLOCK_SIZE; /* 8KB */ @@ -149,5 +151,59 @@ ObTxDataThrottleGuard::~ObTxDataThrottleGuard() } } +int ObTenantTxDataOpAllocator::init() +{ + int ret = OB_SUCCESS; + ObMemAttr mem_attr; + mem_attr.tenant_id_ = MTL_ID(); + mem_attr.ctx_id_ = ObCtxIds::MDS_DATA_ID; + mem_attr.label_ = "TX_OP"; + ObSharedMemAllocMgr *share_mem_alloc_mgr = MTL(ObSharedMemAllocMgr *); + throttle_tool_ = &(share_mem_alloc_mgr->share_resource_throttle_tool()); + if (IS_INIT){ + ret = OB_INIT_TWICE; + SHARE_LOG(WARN, "init tenant mds allocator twice", KR(ret), KPC(this)); + } else if (OB_ISNULL(throttle_tool_)) { + ret = OB_ERR_UNEXPECTED; + SHARE_LOG(WARN, "throttle tool is unexpected null", KP(throttle_tool_), KP(share_mem_alloc_mgr)); + } else if (OB_FAIL(allocator_.init(OB_MALLOC_NORMAL_BLOCK_SIZE, block_alloc_, mem_attr))) { + MDS_LOG(WARN, "init vslice allocator failed", K(ret), K(OB_MALLOC_NORMAL_BLOCK_SIZE), KP(this), K(mem_attr)); + } else { + allocator_.set_nway(MDS_ALLOC_CONCURRENCY); + is_inited_ = true; + } + return ret; +} + +void *ObTenantTxDataOpAllocator::alloc(const int64_t size) +{ + int64_t abs_expire_time = THIS_WORKER.get_timeout_ts(); + void * buf = alloc(size, abs_expire_time); + if (OB_NOT_NULL(buf)) { + local_alloc_size_ += size; + } + return buf; +} + +void *ObTenantTxDataOpAllocator::alloc(const int64_t size, const ObMemAttr &attr) +{ + UNUSED(attr); + void *obj = alloc(size); + return obj; +} + +void *ObTenantTxDataOpAllocator::alloc(const int64_t size, const int64_t abs_expire_time) +{ + void *obj = allocator_.alloc(size); + return obj; +} + +void ObTenantTxDataOpAllocator::free(void *ptr) +{ + allocator_.free(ptr); +} + +void ObTenantTxDataOpAllocator::set_attr(const ObMemAttr &attr) { allocator_.set_attr(attr); } + } // namespace share -} // namespace oceanbase \ No newline at end of file +} // namespace oceanbase diff --git a/src/share/allocator/ob_tx_data_allocator.h b/src/share/allocator/ob_tx_data_allocator.h index ab146549f..56c3bf8cb 100644 --- a/src/share/allocator/ob_tx_data_allocator.h +++ b/src/share/allocator/ob_tx_data_allocator.h @@ -16,6 +16,7 @@ #include "lib/allocator/ob_slice_alloc.h" #include "share/ob_delegate.h" #include "share/throttle/ob_share_throttle_define.h" +#include "lib/allocator/ob_vslice_alloc.h" namespace oceanbase { namespace share { @@ -75,7 +76,35 @@ private: share::TxShareThrottleTool *throttle_tool_; }; +class ObTenantTxDataOpAllocator : public ObIAllocator { +private: + static const int64_t MDS_ALLOC_CONCURRENCY = 32; +public: + DEFINE_CUSTOM_FUNC_FOR_THROTTLE(Mds); + +public: + ObTenantTxDataOpAllocator() : is_inited_(false), throttle_tool_(nullptr), block_alloc_(), allocator_() {} + + int init(); + void destroy() { is_inited_ = false; } + void *alloc(const int64_t size, const int64_t expire_ts); + virtual void *alloc(const int64_t size) override; + virtual void *alloc(const int64_t size, const ObMemAttr &attr) override; + virtual void free(void *ptr) override; + virtual void set_attr(const ObMemAttr &attr) override; + int64_t hold() { return allocator_.hold(); } + int64_t get_local_alloc_size() { return local_alloc_size_; } + void reset_local_alloc_size() { local_alloc_size_ = 0; } + TO_STRING_KV(K(is_inited_), KP(this), KP(throttle_tool_), KP(&block_alloc_), KP(&allocator_)); + +private: + bool is_inited_; + share::TxShareThrottleTool *throttle_tool_; + common::ObBlockAllocMgr block_alloc_; + common::ObVSliceAlloc allocator_; + static thread_local int64_t local_alloc_size_; +}; } // namespace share } // namespace oceanbase -#endif \ No newline at end of file +#endif diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index 71de7d88b..07298df63 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -368,6 +368,7 @@ ob_set_subtarget(ob_storage tx tx/ob_tx_2pc_msg_handler.cpp tx/ob_tx_2pc_ctx_impl.cpp tx/ob_tx_data_define.cpp + tx/ob_tx_data_op.cpp tx/ob_tx_data_functor.cpp tx/ob_tx_serialization.cpp tx/ob_tx_log.cpp diff --git a/src/storage/high_availability/ob_transfer_handler.cpp b/src/storage/high_availability/ob_transfer_handler.cpp index 4eb0b4574..a164106f6 100644 --- a/src/storage/high_availability/ob_transfer_handler.cpp +++ b/src/storage/high_availability/ob_transfer_handler.cpp @@ -444,9 +444,7 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta int tmp_ret = OB_SUCCESS; const int64_t start_ts = ObTimeUtil::current_time(); omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); - // TODO lana compatible - bool new_transfer = true; - LOG_INFO("[TRANSFER] start do with start status", K(task_info), K(new_transfer)); + LOG_INFO("[TRANSFER] start do with start status", K(task_info)); ObTimeoutCtx timeout_ctx; ObMySQLTransaction trans; @@ -461,6 +459,7 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta process_perf_diagnose_info_(ObStorageHACostItemName::TRANSFER_START_BEGIN, ObStorageHADiagTaskType::TRANSFER_START, start_ts, round_, false/*is_report*/); bool commit_succ = false; + bool new_transfer = true; if (!is_inited_) { ret = OB_NOT_INIT; @@ -1053,7 +1052,8 @@ int ObTransferHandler::do_trans_transfer_start_( } else if (!task_info.is_valid() || !config_version.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("do trans transfer start get invalid argument", K(ret), K(task_info), K(config_version)); - } else if (OB_FAIL(do_tx_start_transfer_out_(task_info, trans, transaction::ObTxDataSourceType::START_TRANSFER_OUT))) { + } else if (OB_FAIL(do_tx_start_transfer_out_(task_info, trans, + transaction::ObTxDataSourceType::START_TRANSFER_OUT, SCN::min_scn(), nullptr))) { LOG_WARN("failed to do tx start transfer out", K(ret), K(task_info)); } else if (OB_FAIL(check_config_version_(config_version))) { LOG_WARN("failed to check config version", K(ret), K(task_info)); @@ -1192,10 +1192,13 @@ int ObTransferHandler::do_trans_transfer_start_v2_( src_info.cluster_id_ = GCONF.cluster_id; omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); SCN data_end_scn; + ObArray tablet_list; + ObArray move_tx_ids; int64_t move_tx_count = 0; int64_t start_time = ObTimeUtil::current_time(); int64_t transfer_out_prepare_cost = 0; int64_t wait_tablet_write_end_cost = 0; + int64_t filter_tx_cost = 0; int64_t transfer_out_cost = 0; int64_t wait_src_replay_cost = 0; int64_t get_transfer_out_scn_cost = 0; @@ -1218,9 +1221,17 @@ int ObTransferHandler::do_trans_transfer_start_v2_( } else if (OB_FAIL(get_ls_leader_(task_info.src_ls_id_, src_ls_leader))) { LOG_WARN("failed to get src ls leader", K(ret), K(task_info)); } else if (FALSE_IT(src_info.src_addr_ = src_ls_leader)) { + } else { + for (int64_t idx = 0; OB_SUCC(ret) && idx < task_info.tablet_list_.count(); idx++) { + if (OB_FAIL(tablet_list.push_back(task_info.tablet_list_.at(idx).tablet_id()))) { + LOG_WARN("push to array failed", KR(ret)); + } + } + } + if (OB_FAIL(ret)) { // MDS transaction operation for block tablet write } else if (OB_FAIL(do_tx_start_transfer_out_(task_info, trans, - transaction::ObTxDataSourceType::START_TRANSFER_OUT_PREPARE))) { + transaction::ObTxDataSourceType::START_TRANSFER_OUT_PREPARE, SCN::min_scn(), nullptr))) { LOG_WARN("failed to do tx start transfer prepare", K(ret), K(task_info)); } else if (STEP_COST_AND_CHECK_TIMEOUT(transfer_out_prepare_cost)) { // resubmit tx log promise transfer tablet redo complete @@ -1230,7 +1241,11 @@ int ObTransferHandler::do_trans_transfer_start_v2_( } else if (!data_end_scn.is_valid()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("transfer data_end_scn is invalid", K(ret), K(task_info), K(data_end_scn)); - } else if (OB_FAIL(do_tx_start_transfer_out_(task_info, trans, transaction::ObTxDataSourceType::START_TRANSFER_OUT_V2, data_end_scn))) { + } else if (OB_FAIL(ls_->filter_tx_need_transfer(tablet_list, data_end_scn, move_tx_ids))) { + LOG_WARN("filter tx need transfer", KR(ret), K(task_info)); + } else if (STEP_COST_AND_CHECK_TIMEOUT(filter_tx_cost)) { + } else if (OB_FAIL(do_tx_start_transfer_out_(task_info, trans, + transaction::ObTxDataSourceType::START_TRANSFER_OUT_V2, data_end_scn, &move_tx_ids))) { LOG_WARN("failed to do tx start transfer out", K(ret), K(task_info)); } else if (STEP_COST_AND_CHECK_TIMEOUT(transfer_out_cost)) { } else if (OB_FAIL(get_start_transfer_out_scn_(task_info, timeout_ctx, start_scn))) { @@ -1244,7 +1259,8 @@ int ObTransferHandler::do_trans_transfer_start_v2_( LOG_WARN("failed to get transfer tablets meta", K(ret), K(task_info)); } else if (STEP_COST_AND_CHECK_TIMEOUT(get_tablets_meta_cost)) { // move tx - } else if (OB_FAIL(do_move_tx_to_dest_ls_(task_info, timeout_ctx, trans, data_end_scn, start_scn, move_tx_count))) { + } else if (move_tx_ids.count() > 0 && OB_FAIL(do_move_tx_to_dest_ls_(task_info, timeout_ctx, trans, + data_end_scn, start_scn, tablet_list, move_tx_ids, move_tx_count))) { LOG_WARN("failed to do move tx to dest_ls", K(ret), K(task_info)); } else if (STEP_COST_AND_CHECK_TIMEOUT(move_tx_cost)) { // transfer in @@ -1255,16 +1271,18 @@ int ObTransferHandler::do_trans_transfer_start_v2_( LOG_WARN("failed to update transfer status", K(ret), K(task_info)); } - LOG_INFO("[TRANSFER] finish do trans transfer start", K(ret), K(task_info), "cost", ObTimeUtil::current_time() - start_time, - K(transfer_out_prepare_cost), - K(wait_tablet_write_end_cost), - K(transfer_out_cost), - K(get_transfer_out_scn_cost), - K(wait_src_replay_cost), - K(get_tablets_meta_cost), - K(move_tx_cost), - K(transfer_in_cost), - K(move_tx_count)); + LOG_INFO("[TRANSFER] finish transfer start", K(ret), K(task_info), "cost", ObTimeUtil::current_time() - start_time, + K(transfer_out_prepare_cost), + K(wait_tablet_write_end_cost), + K(filter_tx_cost), + K(transfer_out_cost), + K(get_transfer_out_scn_cost), + K(wait_src_replay_cost), + K(get_tablets_meta_cost), + K(move_tx_cost), + K(transfer_in_cost), + K(move_tx_count), + K(move_tx_ids)); return ret; } @@ -1359,7 +1377,8 @@ int ObTransferHandler::do_tx_start_transfer_out_( const share::ObTransferTaskInfo &task_info, common::ObMySQLTransaction &trans, const transaction::ObTxDataSourceType data_source_type, - SCN data_end_scn) + SCN data_end_scn, + ObIArray *move_tx_ids) { LOG_INFO("[TRANSFER] register start transfer out", K(task_info), K(data_source_type)); int ret = OB_SUCCESS; @@ -1368,7 +1387,7 @@ int ObTransferHandler::do_tx_start_transfer_out_( ObArenaAllocator allocator; SCN dest_base_scn; const int64_t start_ts = ObTimeUtil::current_time(); - + const int64_t ENABLE_FILTER_TX_LIST_LIMIT = 1000; if (!is_inited_) { ret = OB_NOT_INIT; LOG_WARN("transfer handler do not init", K(ret)); @@ -1388,9 +1407,17 @@ int ObTransferHandler::do_tx_start_transfer_out_( start_transfer_out_info.transfer_epoch_ = task_info.task_id_.id(); start_transfer_out_info.task_id_ = task_info.task_id_; start_transfer_out_info.data_version_ = DEFAULT_MIN_DATA_VERSION; + start_transfer_out_info.filter_tx_need_transfer_ = false; if (OB_FAIL(start_transfer_out_info.tablet_list_.assign(task_info.tablet_list_))) { LOG_WARN("failed to assign transfer tablet list", K(ret), K(task_info)); - } else { + } else if (OB_NOT_NULL(move_tx_ids) && move_tx_ids->count() <= ENABLE_FILTER_TX_LIST_LIMIT) { + // if has too many tx_ids, we just use data_end_scn to filter + start_transfer_out_info.filter_tx_need_transfer_ = true; + if (OB_FAIL(start_transfer_out_info.move_tx_ids_.assign(*move_tx_ids))) { + LOG_WARN("assign failed", KR(ret), K(move_tx_ids)); + } + } + if (OB_SUCC(ret)) { int64_t buf_len = start_transfer_out_info.get_serialize_size(); int64_t pos = 0; char *buf = (char*)allocator.alloc(buf_len); @@ -2645,6 +2672,8 @@ int ObTransferHandler::do_move_tx_to_dest_ls_(const share::ObTransferTaskInfo &t ObMySQLTransaction &trans, const SCN data_end_scn, const SCN transfer_scn, + ObIArray &tablet_list, + ObIArray &move_tx_ids, int64_t &move_tx_count) { LOG_INFO("[TRANSFER] do_move_tx_to_dest_ls_", K(task_info), K(data_end_scn)); @@ -2661,19 +2690,12 @@ int ObTransferHandler::do_move_tx_to_dest_ls_(const share::ObTransferTaskInfo &t int64_t tx_count = 0; int64_t buf_len = 0; int64_t collect_count = 0; - ObArray tablet_list; - for (int64_t idx = 0; OB_SUCC(ret) && idx < task_info.tablet_list_.count(); idx++) { - if (OB_FAIL(tablet_list.push_back(task_info.tablet_list_.at(idx).tablet_id()))) { - LOG_WARN("push to array failed", KR(ret)); - } - } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(MTL(ObLSService*)->get_ls(task_info.src_ls_id_,src_ls_handle, ObLSGetMod::STORAGE_MOD))) { + if (OB_FAIL(MTL(ObLSService*)->get_ls(task_info.src_ls_id_,src_ls_handle, ObLSGetMod::STORAGE_MOD))) { LOG_WARN("get ls failed", KR(ret), K(task_info)); } else if (OB_FAIL(src_ls_handle.get_ls()->collect_tx_ctx(task_info.dest_ls_id_, data_end_scn, - const_cast&>(tablet_list), - tx_count, + tablet_list, + move_tx_ids, collect_count, collect_res.args_))) { LOG_WARN("collect tx ctx failed", KR(ret), K(task_info)); @@ -2723,7 +2745,7 @@ int ObTransferHandler::do_move_tx_to_dest_ls_(const share::ObTransferTaskInfo &t int64_t end_time = ObTimeUtility::current_time(); LOG_INFO("do_move_tx_to_dest_ls_", KR(ret), "cost", end_time-start_time, K(task_info), - "tx_count", collect_res.args_.count(), + "move_tx_count", collect_res.args_.count(), "buf_size", buf_len); return ret; } diff --git a/src/storage/high_availability/ob_transfer_handler.h b/src/storage/high_availability/ob_transfer_handler.h index 553eb34a2..d61f22d0d 100644 --- a/src/storage/high_availability/ob_transfer_handler.h +++ b/src/storage/high_availability/ob_transfer_handler.h @@ -193,6 +193,8 @@ private: ObMySQLTransaction &trans, const SCN data_end_scn, const SCN transfer_scn, + ObIArray &tablet_list, + ObIArray &move_tx_ids, int64_t &move_tx_count); int start_trans_( ObTimeoutCtx &timeout_ctx, @@ -205,7 +207,8 @@ private: const share::ObTransferTaskInfo &task_info, common::ObMySQLTransaction &trans, const transaction::ObTxDataSourceType data_source_type, - SCN data_end_scn = SCN::min_scn()); + SCN data_end_scn, + ObIArray *move_tx_ids); int lock_transfer_task_( const share::ObTransferTaskInfo &task_info, common::ObISQLClient &trans); diff --git a/src/storage/high_availability/ob_transfer_struct.cpp b/src/storage/high_availability/ob_transfer_struct.cpp index f84054d51..b956c20b8 100644 --- a/src/storage/high_availability/ob_transfer_struct.cpp +++ b/src/storage/high_availability/ob_transfer_struct.cpp @@ -69,6 +69,8 @@ int ObTXStartTransferOutInfo::assign(const ObTXStartTransferOutInfo &start_trans LOG_WARN("assign start transfer out info get invalid argument", K(ret), K(start_transfer_out_info)); } else if (OB_FAIL(tablet_list_.assign(start_transfer_out_info.tablet_list_))) { LOG_WARN("failed to assign start transfer out info", K(ret), K(start_transfer_out_info)); + } else if (OB_FAIL(move_tx_ids_.assign(start_transfer_out_info.move_tx_ids_))) { + LOG_WARN("failed to assign move_tx_ids", K(ret), K(start_transfer_out_info)); } else { src_ls_id_ = start_transfer_out_info.src_ls_id_; dest_ls_id_ = start_transfer_out_info.dest_ls_id_; @@ -76,6 +78,7 @@ int ObTXStartTransferOutInfo::assign(const ObTXStartTransferOutInfo &start_trans data_end_scn_ = start_transfer_out_info.data_end_scn_; transfer_epoch_ = start_transfer_out_info.transfer_epoch_; data_version_ = start_transfer_out_info.data_version_; + filter_tx_need_transfer_ = start_transfer_out_info.filter_tx_need_transfer_; } return ret; } diff --git a/src/storage/high_availability/ob_transfer_struct.h b/src/storage/high_availability/ob_transfer_struct.h index 876743311..65eb9a04e 100644 --- a/src/storage/high_availability/ob_transfer_struct.h +++ b/src/storage/high_availability/ob_transfer_struct.h @@ -37,6 +37,7 @@ public: void reset(); bool is_valid() const; int assign(const ObTXStartTransferOutInfo &start_transfer_out_info); + bool empty_tx() { return filter_tx_need_transfer_ && move_tx_ids_.count() == 0; } TO_STRING_KV(K_(src_ls_id), K_(dest_ls_id), K_(tablet_list), K_(task_id), K_(data_end_scn), K_(transfer_epoch), K_(data_version), K_(filter_tx_need_transfer), K_(move_tx_ids)); @@ -47,7 +48,7 @@ public: share::ObTransferTaskID task_id_; share::SCN data_end_scn_; int64_t transfer_epoch_; - uint64_t data_version_; //transfer_dml_ctrl_42x # placeholde + uint64_t data_version_; //transfer_dml_ctrl_42x # placeholder bool filter_tx_need_transfer_; common::ObSEArray move_tx_ids_; DISALLOW_COPY_AND_ASSIGN(ObTXStartTransferOutInfo); diff --git a/src/storage/ls/ob_ls.h b/src/storage/ls/ob_ls.h index 49dd5dc7d..c941df2de 100644 --- a/src/storage/ls/ob_ls.h +++ b/src/storage/ls/ob_ls.h @@ -839,6 +839,7 @@ public: CONST_DELEGATE_WITH_RET(dup_table_ls_handler_, get_dup_table_ls_meta, int); DELEGATE_WITH_RET(dup_table_ls_handler_, set_dup_table_ls_meta, int); + DELEGATE_WITH_RET(ls_tx_svr_, filter_tx_need_transfer, int); // for transfer to modify active tx ctx state DELEGATE_WITH_RET(ls_tx_svr_, transfer_out_tx_op, int); diff --git a/src/storage/ls/ob_ls_tablet_service.cpp b/src/storage/ls/ob_ls_tablet_service.cpp index 758da9c96..96b9ca96a 100644 --- a/src/storage/ls/ob_ls_tablet_service.cpp +++ b/src/storage/ls/ob_ls_tablet_service.cpp @@ -5892,6 +5892,11 @@ int ObLSTabletService::get_ls_min_end_scn( } } } + // now tx_data contains mds tx_op to remove retain_ctx + // so we need wait ls_checkpoint advance to recycle tx_data + if (ls_checkpoint < min_end_scn_from_latest_tablets) { + min_end_scn_from_latest_tablets = ls_checkpoint; + } LOG_INFO("get ls min end scn finish", K(ls_checkpoint)); } return ret; diff --git a/src/storage/ls/ob_ls_tx_service.cpp b/src/storage/ls/ob_ls_tx_service.cpp index fda723038..9484ae552 100644 --- a/src/storage/ls/ob_ls_tx_service.cpp +++ b/src/storage/ls/ob_ls_tx_service.cpp @@ -936,25 +936,28 @@ int ObLSTxService::check_tx_blocked(bool &tx_blocked) const } return ret; } +int ObLSTxService::filter_tx_need_transfer(ObIArray &tablet_list, + const share::SCN data_end_scn, + ObIArray &move_tx_ids) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(mgr_->filter_tx_need_transfer(tablet_list, data_end_scn, move_tx_ids))) { + TRANS_LOG(WARN, "for each tx ctx error", KR(ret)); + } + return ret; +} -int ObLSTxService::transfer_out_tx_op(int64_t except_tx_id, - const share::SCN data_end_scn, - const share::SCN op_scn, - transaction::NotifyType op_type, - bool is_replay, - share::ObLSID dest_ls_id, - int64_t transfer_epoch, +int ObLSTxService::transfer_out_tx_op(const ObTransferOutTxParam ¶m, int64_t &active_tx_count, int64_t &op_tx_count) { int ret = OB_SUCCESS; int64_t start_time = ObTimeUtility::current_time(); - if (OB_FAIL(mgr_->transfer_out_tx_op(except_tx_id, data_end_scn, op_scn, op_type, is_replay, - dest_ls_id, transfer_epoch, active_tx_count, op_tx_count))) { + if (OB_FAIL(mgr_->transfer_out_tx_op(param, active_tx_count, op_tx_count))) { TRANS_LOG(WARN, "for each tx ctx error", KR(ret)); } int64_t end_time = ObTimeUtility::current_time(); - LOG_INFO("transfer_out_tx_op", KR(ret), K(op_type), "cost", end_time - start_time, K(active_tx_count), K(op_tx_count)); + LOG_INFO("transfer_out_tx_op", KR(ret), "cost", end_time - start_time, K(active_tx_count), K(op_tx_count)); return ret; } @@ -973,18 +976,17 @@ int ObLSTxService::wait_tx_write_end(ObTimeoutCtx &timeout_ctx) int ObLSTxService::collect_tx_ctx(const ObLSID dest_ls_id, const SCN log_scn, const ObIArray &tablet_list, - int64_t &tx_count, + const ObIArray &move_tx_ids, int64_t &collect_count, ObIArray &res) { int ret = OB_SUCCESS; int64_t start_time = ObTimeUtility::current_time(); - if (OB_FAIL(mgr_->collect_tx_ctx(dest_ls_id, log_scn, tablet_list, tx_count, collect_count, res))) { + if (OB_FAIL(mgr_->collect_tx_ctx(dest_ls_id, log_scn, tablet_list, move_tx_ids, collect_count, res))) { TRANS_LOG(WARN, "for each tx ctx error", KR(ret)); } int64_t end_time = ObTimeUtility::current_time(); - LOG_INFO("collect_tx_ctx", KR(ret), K(ls_id_), "cost_us", end_time - start_time, - K(tx_count), K(collect_count)); + LOG_INFO("collect_tx_ctx", KR(ret), K(ls_id_), "cost_us", end_time - start_time, K(collect_count)); return ret; } diff --git a/src/storage/ls/ob_ls_tx_service.h b/src/storage/ls/ob_ls_tx_service.h index e3fc68017..af1111399 100644 --- a/src/storage/ls/ob_ls_tx_service.h +++ b/src/storage/ls/ob_ls_tx_service.h @@ -34,6 +34,7 @@ namespace storage class ObLS; struct ObTxCtxMoveArg; struct ObTransferMoveTxParam; +struct ObTransferOutTxParam; } namespace transaction @@ -176,20 +177,17 @@ public: int get_common_checkpoint_info( ObIArray &common_checkpoint_array); - int transfer_out_tx_op(int64_t except_tx_id, - const share::SCN data_end_scn, - const share::SCN op_scn, - transaction::NotifyType op_type, - bool is_replay, - share::ObLSID dest_ls_id, - int64_t transfer_epoch, + int filter_tx_need_transfer(ObIArray &tablet_list, + const share::SCN data_end_scn, + ObIArray &move_tx_ids); + int transfer_out_tx_op(const ObTransferOutTxParam ¶m, int64_t &active_tx_count, int64_t &op_tx_count); int wait_tx_write_end(ObTimeoutCtx &timeout_ctx); int collect_tx_ctx(const share::ObLSID dest_ls_id, const share::SCN log_scn, const ObIArray &tablet_list, - int64_t &tx_count, + const ObIArray &move_tx_ids, int64_t &collect_count, ObIArray &args); int move_tx_op(const ObTransferMoveTxParam &move_tx_param, diff --git a/src/storage/multi_data_source/buffer_ctx.cpp b/src/storage/multi_data_source/buffer_ctx.cpp index 6b481d786..4cf4d4ce4 100644 --- a/src/storage/multi_data_source/buffer_ctx.cpp +++ b/src/storage/multi_data_source/buffer_ctx.cpp @@ -69,22 +69,22 @@ int BufferCtxNode::serialize(char *buf, const int64_t buf_len, int64_t &pos) con } template -int deserialize_(BufferCtx *&ctx_, int64_t type_idx, const char *buf, const int64_t buf_len, int64_t &pos) { +int deserialize_(BufferCtx *&ctx_, int64_t type_idx, const char *buf, const int64_t buf_len, int64_t &pos, ObIAllocator &allocator) { int ret = OB_SUCCESS; MDS_TG(10_ms); if (IDX == type_idx) { using ImplType = GET_CTX_TYPE_BY_TUPLE_IDX(IDX); ImplType *p_impl = nullptr; set_mds_mem_check_thread_local_info(MdsWriter(WriterType::UNKNOWN_WRITER, 0), typeid(ImplType).name()); - if (OB_ISNULL(p_impl = (ImplType *)MTL(ObTenantMdsService*)->get_buffer_ctx_allocator().alloc(sizeof(ImplType), - ObMemAttr(MTL_ID(), - "MDS_CTX_DESE", - ObCtxIds::MDS_CTX_ID)))) { + if (OB_ISNULL(p_impl = (ImplType *)allocator.alloc(sizeof(ImplType), + ObMemAttr(MTL_ID(), + "MDS_CTX_DESE", + ObCtxIds::MDS_CTX_ID)))) { ret = OB_ALLOCATE_MEMORY_FAILED; MDS_LOG(ERROR, "fail to alloc buffer ctx memory", KR(ret), K(type_idx), K(IDX)); } else if (FALSE_IT(new (p_impl) ImplType())) { } else if (MDS_FAIL(p_impl->deserialize(buf, buf_len, pos))) { - MTL(mds::ObTenantMdsService*)->get_buffer_ctx_allocator().free(p_impl); + allocator.free(p_impl); p_impl = nullptr; MDS_LOG(ERROR, "deserialzed from buffer failed", KR(ret), K(type_idx), K(IDX)); } else { @@ -98,7 +98,7 @@ int deserialize_(BufferCtx *&ctx_, int64_t type_idx, const char *buf, const int6 MDS_LOG(INFO, "deserialize ctx success", KR(ret), K(*p_impl), K(type_idx), K(IDX), K(buf_len), K(pos), K(lbt())); } reset_mds_mem_check_thread_local_info(); - } else if (MDS_FAIL(deserialize_(ctx_, type_idx, buf, buf_len, pos))) { + } else if (MDS_FAIL(deserialize_(ctx_, type_idx, buf, buf_len, pos, allocator))) { MDS_LOG(ERROR, "deserialzed from buffer failed", KR(ret), K(type_idx), K(IDX)); } return ret; @@ -109,14 +109,15 @@ int deserialize_(BufferCtx *&ctx_, int64_t type_idx, const char *buf, const int64_t buf_len, - int64_t &pos) + int64_t &pos, + ObIAllocator &allocator) { int ret = OB_ERR_UNEXPECTED; MDS_LOG(ERROR, "type idx out of tuple range", KR(ret), K(type_idx), K(BufferCtxTupleHelper::get_element_size())); return ret; } -int BufferCtxNode::deserialize(const char *buf, const int64_t buf_len, int64_t &pos) +int BufferCtxNode::deserialize(const char *buf, const int64_t buf_len, int64_t &pos, ObIAllocator &allocator) { int ret = OB_SUCCESS; MDS_TG(10_ms); @@ -125,7 +126,7 @@ int BufferCtxNode::deserialize(const char *buf, const int64_t buf_len, int64_t & MDS_LOG(ERROR, "fail to deserialize buffer ctx id", KR(ret), K(type_idx)); } else if (INVALID_VALUE == type_idx) { MDS_LOG(DEBUG, "deserialized INVALD buffer ctx", KR(ret), K(type_idx), K(buf_len), K(pos)); - } else if (MDS_FAIL(deserialize_<0>(ctx_, type_idx, buf, buf_len, pos))) { + } else if (MDS_FAIL(deserialize_<0>(ctx_, type_idx, buf, buf_len, pos, allocator))) { MDS_LOG(WARN, "deserialized buffer ctx failed", KR(ret), K(type_idx)); } return ret; diff --git a/src/storage/multi_data_source/buffer_ctx.h b/src/storage/multi_data_source/buffer_ctx.h index d78ff1a3c..86e12d8be 100644 --- a/src/storage/multi_data_source/buffer_ctx.h +++ b/src/storage/multi_data_source/buffer_ctx.h @@ -16,6 +16,7 @@ #include "lib/oblog/ob_log_module.h" #include "runtime_utility/common_define.h" #include "mds_writer.h" +#include "runtime_utility/mds_tenant_service.h" namespace oceanbase { @@ -82,7 +83,10 @@ public: } // 同事务状态一起持久化以及恢复 int serialize(char*, const int64_t, int64_t&) const;// 要把实际的ctx类型编码进二进制中 - int deserialize(const char*, const int64_t, int64_t&);// 要根据实际的ctx的类型,在编译期反射子类类型 + int deserialize(const char*, + const int64_t, + int64_t&, + ObIAllocator &allocator = MTL(ObTenantMdsService*)->get_buffer_ctx_allocator());// 要根据实际的ctx的类型,在编译期反射子类类型 int64_t get_serialize_size(void) const; TO_STRING_KV(KP(this), KP_(ctx), KPC_(ctx)); private: diff --git a/src/storage/multi_data_source/runtime_utility/mds_factory.cpp b/src/storage/multi_data_source/runtime_utility/mds_factory.cpp index 9b2aa363a..77ab75eb3 100644 --- a/src/storage/multi_data_source/runtime_utility/mds_factory.cpp +++ b/src/storage/multi_data_source/runtime_utility/mds_factory.cpp @@ -58,6 +58,7 @@ template int deepcopy(const transaction::ObTransID &trans_id, const BufferCtx &old_ctx, BufferCtx *&new_ctx, + ObIAllocator &allocator, const char *alloc_file, const char *alloc_func, const int64_t line) { @@ -74,11 +75,17 @@ int deepcopy(const transaction::ObTransID &trans_id, MDS_ASSERT(OB_NOT_NULL(p_old_impl_ctx)); const ImplType &old_impl_ctx = *p_old_impl_ctx; set_mds_mem_check_thread_local_info(MdsWriter(trans_id), typeid(ImplType).name(), alloc_file, alloc_func, line); - if (CLICK() && - OB_ISNULL(p_impl = (ImplType *)MTL(ObTenantMdsService*)->get_buffer_ctx_allocator().alloc(sizeof(ImplType), - ObMemAttr(MTL_ID(), - "MDS_CTX_COPY", - ObCtxIds::MDS_CTX_ID)))) { + // if pre_alloc buffer_ctx use it + if (OB_NOT_NULL(new_ctx)) { + ImplType *new_ctx_impl = dynamic_cast(new_ctx); + if (MDS_FAIL(common::meta::copy_or_assign(old_impl_ctx, *new_ctx_impl))) { + MDS_LOG(WARN, "fail to assign old ctx to new", KR(ret), K(IDX)); + } + } else if (CLICK() && + OB_ISNULL(p_impl = (ImplType *)allocator.alloc(sizeof(ImplType), + ObMemAttr(MTL_ID(), + "MDS_CTX_COPY", + ObCtxIds::MDS_CTX_ID)))) { ret = OB_ALLOCATE_MEMORY_FAILED; MDS_LOG(WARN, "alloc memory failed", KR(ret), K(IDX)); } else { @@ -86,7 +93,7 @@ int deepcopy(const transaction::ObTransID &trans_id, new (p_impl)ImplType(); if (MDS_FAIL(common::meta::copy_or_assign(old_impl_ctx, *p_impl))) { p_impl->~ImplType(); - MTL(mds::ObTenantMdsService*)->get_buffer_ctx_allocator().free(p_impl); + allocator.free(p_impl); MDS_LOG(WARN, "fail to assign old ctx to new", KR(ret), K(IDX)); } else { new_ctx = p_impl; @@ -95,7 +102,7 @@ int deepcopy(const transaction::ObTransID &trans_id, } reset_mds_mem_check_thread_local_info(); } else { - ret = deepcopy(trans_id, old_ctx, new_ctx, alloc_file, alloc_func, line); + ret = deepcopy(trans_id, old_ctx, new_ctx, allocator, alloc_file, alloc_func, line); } return ret; } @@ -104,6 +111,7 @@ template <> int deepcopy(const transaction::ObTransID &trans_id, const BufferCtx &old_ctx, BufferCtx *&new_ctx, + ObIAllocator &allocator, const char *alloc_file, const char *alloc_func, const int64_t line) @@ -116,6 +124,7 @@ int deepcopy(const transaction::ObTran int MdsFactory::deep_copy_buffer_ctx(const transaction::ObTransID &trans_id, const BufferCtx &old_ctx, BufferCtx *&new_ctx, + ObIAllocator &allocator, const char *alloc_file, const char *alloc_func, const int64_t line) @@ -126,7 +135,7 @@ int MdsFactory::deep_copy_buffer_ctx(const transaction::ObTransID &trans_id, ret = OB_INVALID_ARGUMENT; new_ctx = nullptr;// won't copy MDS_LOG(WARN, "invalid old_ctx", K(old_ctx.get_binding_type_id())); - } else if (MDS_FAIL(deepcopy<0>(trans_id, old_ctx, new_ctx, alloc_file, alloc_func, line))) { + } else if (MDS_FAIL(deepcopy<0>(trans_id, old_ctx, new_ctx, allocator, alloc_file, alloc_func, line))) { MDS_LOG(WARN, "fail to deep copy buffer ctx", K(old_ctx.get_binding_type_id())); } return ret; @@ -149,6 +158,7 @@ void try_set_writer(T &ctx, const transaction::ObTransID &trans_id) { int MdsFactory::create_buffer_ctx(const transaction::ObTxDataSourceType &data_source_type, const transaction::ObTransID &trans_id, BufferCtx *&buffer_ctx, + ObIAllocator &allocator, const char *alloc_file, const char *alloc_func, const int64_t line) { @@ -161,10 +171,10 @@ int MdsFactory::create_buffer_ctx(const transaction::ObTxDataSourceType &data_so set_mds_mem_check_thread_local_info(MdsWriter(trans_id), typeid(BUFFER_CTX_TYPE).name(), alloc_file, alloc_func, line);\ int64_t type_id = TupleTypeIdx::value;\ BUFFER_CTX_TYPE *ctx_impl = (BUFFER_CTX_TYPE *)\ - MTL(ObTenantMdsService*)->get_buffer_ctx_allocator().alloc(sizeof(BUFFER_CTX_TYPE),\ - ObMemAttr(MTL_ID(),\ - "MDS_CTX_CREATE",\ - ObCtxIds::MDS_CTX_ID));\ + allocator.alloc(sizeof(BUFFER_CTX_TYPE),\ + ObMemAttr(MTL_ID(),\ + "MDS_CTX_CREATE",\ + ObCtxIds::MDS_CTX_ID));\ if (OB_ISNULL(ctx_impl)) {\ ret = OB_ALLOCATE_MEMORY_FAILED;\ MDS_LOG(WARN, "alloc memory failed", KR(ret));\ diff --git a/src/storage/multi_data_source/runtime_utility/mds_factory.h b/src/storage/multi_data_source/runtime_utility/mds_factory.h index 94247fbd5..ee364af10 100644 --- a/src/storage/multi_data_source/runtime_utility/mds_factory.h +++ b/src/storage/multi_data_source/runtime_utility/mds_factory.h @@ -91,12 +91,14 @@ struct MdsFactory static int deep_copy_buffer_ctx(const transaction::ObTransID &trans_id, const BufferCtx &old_ctx, BufferCtx *&new_ctx, + ObIAllocator &allocator = MTL(ObTenantMdsService*)->get_buffer_ctx_allocator(), const char *alloc_file = __builtin_FILE(), const char *alloc_func = __builtin_FUNCTION(), const int64_t line = __builtin_LINE()); static int create_buffer_ctx(const transaction::ObTxDataSourceType &data_source_type, const transaction::ObTransID &trans_id, BufferCtx *&buffer_ctx, + ObIAllocator &allocator = MTL(ObTenantMdsService*)->get_buffer_ctx_allocator(), const char *alloc_file = __builtin_FILE(), const char *alloc_func = __builtin_FUNCTION(), const int64_t line = __builtin_LINE()); @@ -144,4 +146,4 @@ private: } } } -#endif \ No newline at end of file +#endif diff --git a/src/storage/tablelock/ob_mem_ctx_table_lock.cpp b/src/storage/tablelock/ob_mem_ctx_table_lock.cpp index b3dc9de0c..bb9ac74e1 100644 --- a/src/storage/tablelock/ob_mem_ctx_table_lock.cpp +++ b/src/storage/tablelock/ob_mem_ctx_table_lock.cpp @@ -377,6 +377,20 @@ int ObLockMemCtx::check_lock_exist( //TODO(lihongqin):check it return ret; } +int ObLockMemCtx::check_contain_tablet(ObTabletID tablet_id, bool &contain) +{ + int ret = OB_SUCCESS; + contain = false; + RDLockGuard guard(list_rwlock_); + DLIST_FOREACH(curr, lock_list_) { + if (curr->lock_op_.is_tablet_lock(tablet_id)) { + contain = true; + break; + } + } + return ret; +} + int ObLockMemCtx::check_modify_schema_elapsed( const ObLockID &lock_id, const int64_t schema_version) diff --git a/src/storage/tablelock/ob_mem_ctx_table_lock.h b/src/storage/tablelock/ob_mem_ctx_table_lock.h index 86baaf568..63a9d90d3 100644 --- a/src/storage/tablelock/ob_mem_ctx_table_lock.h +++ b/src/storage/tablelock/ob_mem_ctx_table_lock.h @@ -91,6 +91,8 @@ public: const ObTableLockOpType op_type, bool &is_exist, uint64_t lock_mode_cnt_in_same_trans[]) const; + int64_t get_lock_op_count() { return lock_list_.get_size(); } + int check_contain_tablet(ObTabletID tablet_id, bool &contain); // wait all the trans that modify with a smaller schema_version finished. int check_modify_schema_elapsed( const ObLockID &lock_id, diff --git a/src/storage/tablet/ob_tablet_start_transfer_mds_helper.cpp b/src/storage/tablet/ob_tablet_start_transfer_mds_helper.cpp index 03f082d4f..cae26ee85 100644 --- a/src/storage/tablet/ob_tablet_start_transfer_mds_helper.cpp +++ b/src/storage/tablet/ob_tablet_start_transfer_mds_helper.cpp @@ -810,6 +810,7 @@ int ObTabletStartTransferOutV2Helper::on_register( ObTxDataSourceType mds_op_type = ObTxDataSourceType::START_TRANSFER_OUT_V2; ObTabletStartTransferOutCommonHelper transfer_out_helper(mds_op_type); bool start_modify = false; + ObTransferOutTxParam param; if (OB_ISNULL(buf) || len < 0) { ret = OB_INVALID_ARGUMENT; @@ -824,11 +825,27 @@ int ObTabletStartTransferOutV2Helper::on_register( } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ls should not be NULL", KR(ret), K(info), KP(ls)); - } else if (OB_FAIL(transfer_tx_ctx.record_transfer_block_op(info.src_ls_id_, info.dest_ls_id_, info.data_end_scn_, info.transfer_epoch_, false))) { + } else if (OB_FAIL(transfer_tx_ctx.record_transfer_block_op(info.src_ls_id_, + info.dest_ls_id_, + info.data_end_scn_, + info.transfer_epoch_, + false, + info.filter_tx_need_transfer_, + info.move_tx_ids_))) { LOG_WARN("record transfer block op failed", KR(ret), K(info)); + } else { + param.except_tx_id_ = user_ctx.get_writer().writer_id_; + param.data_end_scn_ = info.data_end_scn_; + param.op_scn_ = op_scn; + param.op_type_ = NotifyType::REGISTER_SUCC; + param.is_replay_ = false; + param.dest_ls_id_ = info.dest_ls_id_; + param.transfer_epoch_ = info.transfer_epoch_; + param.move_tx_ids_ = &info.move_tx_ids_; + } + if (OB_FAIL(ret)) { } else if (FALSE_IT(start_modify = true)) { - } else if (OB_FAIL(ls->transfer_out_tx_op(user_ctx.get_writer().writer_id_, info.data_end_scn_, op_scn, - NotifyType::REGISTER_SUCC, false, info.dest_ls_id_, info.transfer_epoch_, active_tx_count, block_tx_count))) { + } else if (!info.empty_tx() && OB_FAIL(ls->transfer_out_tx_op(param, active_tx_count, block_tx_count))) { LOG_WARN("transfer block tx failed", KR(ret), K(info)); } else if (OB_FAIL(transfer_out_helper.update_tablets_transfer_out_(info, ls, ctx))) { LOG_WARN("update tablets transfer out failed", KR(ret), K(info), KP(ls)); @@ -840,8 +857,8 @@ int ObTabletStartTransferOutV2Helper::on_register( if (OB_FAIL(ret)) { // to clean int tmp_ret = OB_SUCCESS; - if (start_modify && OB_TMP_FAIL(ls->transfer_out_tx_op(user_ctx.get_writer().writer_id_, info.data_end_scn_, op_scn, - NotifyType::ON_ABORT, false, info.dest_ls_id_, info.transfer_epoch_, active_tx_count, block_tx_count))) { + param.op_type_ = NotifyType::ON_ABORT; + if (start_modify && !info.empty_tx() && OB_TMP_FAIL(ls->transfer_out_tx_op(param, active_tx_count, block_tx_count))) { LOG_ERROR("transfer out clean failed", K(tmp_ret), K(info), K(user_ctx.get_writer().writer_id_)); } } @@ -866,6 +883,7 @@ int ObTabletStartTransferOutV2Helper::on_replay(const char *buf, ObTransferOutTxCtx &transfer_tx_ctx = static_cast(ctx); ObTxDataSourceType mds_op_type = ObTxDataSourceType::START_TRANSFER_OUT_V2; ObTabletStartTransferOutCommonHelper transfer_out_helper(mds_op_type); + ObTransferOutTxParam param; if (OB_ISNULL(buf) || len < 0) { ret = OB_INVALID_ARGUMENT; @@ -880,10 +898,26 @@ int ObTabletStartTransferOutV2Helper::on_replay(const char *buf, } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ls should not be NULL", KR(ret), K(info), KP(ls)); - } else if (OB_FAIL(transfer_tx_ctx.record_transfer_block_op(info.src_ls_id_, info.dest_ls_id_, info.data_end_scn_, info.transfer_epoch_, true))) { + } else if (OB_FAIL(transfer_tx_ctx.record_transfer_block_op(info.src_ls_id_, + info.dest_ls_id_, + info.data_end_scn_, + info.transfer_epoch_, + true, + info.filter_tx_need_transfer_, + info.move_tx_ids_))) { LOG_WARN("record transfer block op failed", KR(ret), K(info)); - } else if (OB_FAIL(ls->transfer_out_tx_op(user_ctx.get_writer().writer_id_, info.data_end_scn_, scn, - NotifyType::ON_REDO, true, info.dest_ls_id_, info.transfer_epoch_, active_tx_count, block_tx_count))) { + } else { + param.except_tx_id_ = user_ctx.get_writer().writer_id_; + param.data_end_scn_ = info.data_end_scn_; + param.op_scn_ = scn; + param.op_type_ = NotifyType::REGISTER_SUCC; + param.is_replay_ = false; + param.dest_ls_id_ = info.dest_ls_id_; + param.transfer_epoch_ = info.transfer_epoch_; + param.move_tx_ids_ = &info.move_tx_ids_; + } + if (OB_FAIL(ret)) { + } else if (!info.empty_tx() && OB_FAIL(ls->transfer_out_tx_op(param, active_tx_count, block_tx_count))) { LOG_WARN("transfer block tx failed", KR(ret), K(info)); } else if (OB_FAIL(transfer_out_helper.on_replay_success_(scn, info, ctx))) { LOG_WARN("start transfer out on replay failed", KR(ret), K(info), KP(ls)); diff --git a/src/storage/tablet/ob_tablet_transfer_tx_ctx.cpp b/src/storage/tablet/ob_tablet_transfer_tx_ctx.cpp index 1a06578da..f8c3cc488 100644 --- a/src/storage/tablet/ob_tablet_transfer_tx_ctx.cpp +++ b/src/storage/tablet/ob_tablet_transfer_tx_ctx.cpp @@ -53,13 +53,17 @@ void ObTransferMoveTxParam::reset() is_incomplete_replay_ = false; } -ObTransferOutTxCtx::ObTransferOutTxCtx() - : do_transfer_block_(false), - src_ls_id_(), - dest_ls_id_(), - data_end_scn_(), - transfer_scn_(), - transfer_epoch_(0) {} +void ObTransferOutTxParam::reset() +{ + except_tx_id_ = 0; + data_end_scn_.reset(); + op_scn_.reset(); + op_type_ = NotifyType::UNKNOWN; + is_replay_ = false; + dest_ls_id_.reset(); + transfer_epoch_ = 0; + move_tx_ids_ = nullptr; +} void ObTransferOutTxCtx::reset() { @@ -69,6 +73,8 @@ void ObTransferOutTxCtx::reset() data_end_scn_.reset(); transfer_scn_.reset(); transfer_epoch_ = 0; + filter_tx_need_transfer_ = false; + move_tx_ids_.reset(); } bool ObTransferOutTxCtx::is_valid() @@ -87,6 +93,8 @@ int ObTransferOutTxCtx::assign(const ObTransferOutTxCtx &other) const mds::MdsCtx &mds_ctx = static_cast(other); if (OB_FAIL(MdsCtx::assign(mds_ctx))) { LOG_WARN("transfer out tx ctx assign failed", KR(ret), K(other)); + } else if (OB_FAIL(move_tx_ids_.assign(other.move_tx_ids_))) { + LOG_WARN("assign array failed", KR(ret)); } else { do_transfer_block_ = other.do_transfer_block_; src_ls_id_ = other.src_ls_id_; @@ -94,6 +102,7 @@ int ObTransferOutTxCtx::assign(const ObTransferOutTxCtx &other) data_end_scn_ = other.data_end_scn_; transfer_scn_ = other.transfer_scn_; transfer_epoch_ = other.transfer_epoch_; + filter_tx_need_transfer_ = other.filter_tx_need_transfer_; } return ret; } @@ -102,18 +111,23 @@ int ObTransferOutTxCtx::record_transfer_block_op(const share::ObLSID src_ls_id, const share::ObLSID dest_ls_id, const share::SCN data_end_scn, int64_t transfer_epoch, - bool is_replay) + bool is_replay, + bool filter_tx_need_transfer, + ObIArray &move_tx_ids) { int ret = OB_SUCCESS; if (!is_replay && do_transfer_block_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ctx do_transfer_block unexpectd", KR(ret), KP(this)); + } else if (OB_FAIL(move_tx_ids_.assign(move_tx_ids))) { + LOG_WARN("assgin array failed", KR(ret)); } else { src_ls_id_ = src_ls_id; dest_ls_id_ = dest_ls_id; data_end_scn_ = data_end_scn; transfer_epoch_ = transfer_epoch; do_transfer_block_ = true; + filter_tx_need_transfer_ = filter_tx_need_transfer; } return ret; } @@ -128,6 +142,19 @@ void ObTransferOutTxCtx::on_redo(const share::SCN &redo_scn) ObLS *ls = nullptr; int64_t active_tx_count = 0; int64_t block_tx_count = 0; + ObTransferOutTxParam param; + param.except_tx_id_ = get_writer().writer_id_; + param.data_end_scn_ = data_end_scn_; + param.op_scn_ = redo_scn; + param.op_type_ = transaction::NotifyType::ON_REDO; + param.is_replay_ = false; + param.dest_ls_id_ = dest_ls_id_; + param.transfer_epoch_ = transfer_epoch_; + if (filter_tx_need_transfer_) { + param.move_tx_ids_ = &move_tx_ids_; + } else { + param.move_tx_ids_ = nullptr; + } while (true) { int ret = OB_SUCCESS; @@ -139,15 +166,7 @@ void ObTransferOutTxCtx::on_redo(const share::SCN &redo_scn) } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ls should not be NULL", KR(ret), KP(this), KP(ls)); - } else if (OB_FAIL(ls->transfer_out_tx_op(get_writer().writer_id_, - data_end_scn_, - redo_scn, - transaction::NotifyType::ON_REDO, - false, - dest_ls_id_, - transfer_epoch_, - active_tx_count, - block_tx_count))) { + } else if (!empty_tx() && OB_FAIL(ls->transfer_out_tx_op(param, active_tx_count, block_tx_count))) { LOG_WARN("transfer out tx failed", KR(ret), K(tx_id), KP(this)); } if (OB_FAIL(ret)) { @@ -164,6 +183,19 @@ void ObTransferOutTxCtx::on_commit(const share::SCN &commit_version, const share LOG_INFO("transfer_out_tx on_commit", K(commit_version), K(commit_scn), K(tx_id), KP(this), KPC(this)); int ret = OB_SUCCESS; mds::MdsCtx::on_commit(commit_version, commit_scn); + ObTransferOutTxParam param; + param.except_tx_id_ = get_writer().writer_id_; + param.data_end_scn_ = data_end_scn_; + param.op_scn_ = commit_scn; + param.op_type_ = transaction::NotifyType::ON_COMMIT; + param.is_replay_ = false; + param.dest_ls_id_ = dest_ls_id_; + param.transfer_epoch_ = transfer_epoch_; + if (filter_tx_need_transfer_) { + param.move_tx_ids_ = &move_tx_ids_; + } else { + param.move_tx_ids_ = nullptr; + } while (true) { int ret = OB_SUCCESS; ObLSHandle ls_handle; @@ -180,15 +212,7 @@ void ObTransferOutTxCtx::on_commit(const share::SCN &commit_version, const share } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ls should not be NULL", KR(ret), KP(this)); - } else if (OB_FAIL(ls->transfer_out_tx_op(get_writer().writer_id_, - data_end_scn_, - commit_scn, - transaction::NotifyType::ON_COMMIT, - false, - dest_ls_id_, - transfer_epoch_, - active_tx_count, - op_tx_count))) { + } else if (!empty_tx() && OB_FAIL(ls->transfer_out_tx_op(param, active_tx_count, op_tx_count))) { LOG_WARN("transfer out tx op failed", KR(ret), K(tx_id), KP(this)); } else { int64_t end_time = ObTimeUtility::current_time(); @@ -211,6 +235,19 @@ void ObTransferOutTxCtx::on_abort(const share::SCN &abort_scn) transaction::ObTransID tx_id = writer_.writer_id_; LOG_INFO("transfer_out_tx on_abort", K(abort_scn), K(tx_id), KP(this), KPC(this)); mds::MdsCtx::on_abort(abort_scn); + ObTransferOutTxParam param; + param.except_tx_id_ = get_writer().writer_id_; + param.data_end_scn_ = data_end_scn_; + param.op_scn_ = abort_scn; + param.op_type_ = transaction::NotifyType::ON_ABORT; + param.is_replay_ = false; + param.dest_ls_id_ = dest_ls_id_; + param.transfer_epoch_ = transfer_epoch_; + if (filter_tx_need_transfer_) { + param.move_tx_ids_ = &move_tx_ids_; + } else { + param.move_tx_ids_ = nullptr; + } if (do_transfer_block_) { while (true) { int ret = OB_SUCCESS; @@ -227,15 +264,7 @@ void ObTransferOutTxCtx::on_abort(const share::SCN &abort_scn) } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ls should not be NULL", KR(ret), KP(this)); - } else if (OB_FAIL(ls->transfer_out_tx_op(get_writer().writer_id_, - data_end_scn_, - abort_scn, - transaction::NotifyType::ON_ABORT, - false, - dest_ls_id_, - transfer_epoch_, - active_tx_count, - op_tx_count))) { + } else if (!empty_tx() && OB_FAIL(ls->transfer_out_tx_op(param, active_tx_count, op_tx_count))) { LOG_WARN("transfer out tx op failed", KR(ret), K(tx_id), KP(this)); } if (OB_SUCC(ret)) { diff --git a/src/storage/tablet/ob_tablet_transfer_tx_ctx.h b/src/storage/tablet/ob_tablet_transfer_tx_ctx.h index a777f9be6..c1344b11d 100644 --- a/src/storage/tablet/ob_tablet_transfer_tx_ctx.h +++ b/src/storage/tablet/ob_tablet_transfer_tx_ctx.h @@ -80,6 +80,23 @@ public: bool is_incomplete_replay_; }; +struct ObTransferOutTxParam +{ + ObTransferOutTxParam() { reset(); } + ~ObTransferOutTxParam() { reset(); } + void reset(); + TO_STRING_KV(K_(except_tx_id), K_(data_end_scn), K_(op_scn), K_(op_type), + K_(is_replay), K_(dest_ls_id), K_(transfer_epoch), K_(move_tx_ids)); + int64_t except_tx_id_; + share::SCN data_end_scn_; + share::SCN op_scn_; + transaction::NotifyType op_type_; + bool is_replay_; + share::ObLSID dest_ls_id_; + int64_t transfer_epoch_; + ObIArray *move_tx_ids_; +}; + struct CollectTxCtxInfo final { OB_UNIS_VERSION(1); @@ -145,25 +162,30 @@ class ObTransferOutTxCtx : public mds::MdsCtx { OB_UNIS_VERSION(1); public: - ObTransferOutTxCtx(); + ObTransferOutTxCtx() { reset(); } ~ObTransferOutTxCtx() { reset(); } void reset(); int record_transfer_block_op(const share::ObLSID src_ls_id, const share::ObLSID dest_ls_id, const share::SCN data_end_scn, int64_t transfer_epoch, - bool is_replay); + bool is_replay, + bool filter_tx_need_transfer, + ObIArray &move_tx_ids); virtual void on_redo(const share::SCN &redo_scn) override; virtual void on_commit(const share::SCN &commit_version, const share::SCN &commit_scn) override; virtual void on_abort(const share::SCN &abort_scn) override; bool is_valid(); int assign(const ObTransferOutTxCtx &other); + bool empty_tx() { return filter_tx_need_transfer_ && move_tx_ids_.count() == 0; } TO_STRING_KV(K_(do_transfer_block), K_(src_ls_id), K_(dest_ls_id), K_(data_end_scn), - K_(transfer_scn)); + K_(transfer_scn), + K_(filter_tx_need_transfer), + K_(move_tx_ids)); private: bool do_transfer_block_; share::ObLSID src_ls_id_; @@ -171,6 +193,8 @@ private: share::SCN data_end_scn_; share::SCN transfer_scn_; int64_t transfer_epoch_; + bool filter_tx_need_transfer_; + ObSEArray move_tx_ids_; }; OB_SERIALIZE_MEMBER_TEMP(inline, ObTransferOutTxCtx, @@ -180,7 +204,9 @@ OB_SERIALIZE_MEMBER_TEMP(inline, ObTransferOutTxCtx, dest_ls_id_, data_end_scn_, transfer_scn_, - transfer_epoch_) + transfer_epoch_, + filter_tx_need_transfer_, + move_tx_ids_) class ObTransferMoveTxCtx : public mds::BufferCtx { diff --git a/src/storage/tx/ob_ctx_tx_data.cpp b/src/storage/tx/ob_ctx_tx_data.cpp index 076b2aae3..758107c5c 100644 --- a/src/storage/tx/ob_ctx_tx_data.cpp +++ b/src/storage/tx/ob_ctx_tx_data.cpp @@ -132,28 +132,6 @@ int ObCtxTxData::recover_tx_data(ObTxData *tmp_tx_data) return ret; } -int ObCtxTxData::deep_copy_tx_data_out(ObTxDataGuard &tmp_tx_data_guard) -{ - int ret = OB_SUCCESS; - RLockGuard guard(lock_); - - if (OB_FAIL(check_tx_data_writable_())) { - TRANS_LOG(WARN, "tx data is not writeable", K(ret), K(*this)); - } else { - ObTxTable *tx_table = nullptr; - GET_TX_TABLE_(tx_table) - if (OB_FAIL(ret)) { - } else if (OB_FAIL(deep_copy_tx_data_(tx_table, tmp_tx_data_guard))) { - TRANS_LOG(WARN, "deep copy tx data failed", K(ret), K(tmp_tx_data_guard), K(*this)); - } else if (OB_ISNULL(tmp_tx_data_guard.tx_data())) { - ret = OB_ERR_UNEXPECTED; - TRANS_LOG(ERROR, "copied tmp tx data is null", KR(ret), K(*this)); - } - } - - return ret; -} - int ObCtxTxData::free_tmp_tx_data(ObTxData *&tmp_tx_data) { int ret = OB_SUCCESS; @@ -212,6 +190,38 @@ int ObCtxTxData::set_state(int32_t state) return ret; } +int ObCtxTxData::add_abort_op(SCN op_scn) +{ + int ret = OB_SUCCESS; + RLockGuard guard(lock_); + + ObTxOp abort_op; + if (OB_FAIL(check_tx_data_writable_())) { + TRANS_LOG(WARN, "tx data is not writeable", KR(ret), K(*this)); + } else if (OB_FAIL(tx_data_guard_.tx_data()->init_tx_op())) { + TRANS_LOG(WARN, "init_tx_op failed", KR(ret)); + } else if (OB_FAIL(abort_op.init(ObTxOpCode::ABORT_OP, op_scn, &DEFAULT_TX_DUMMY_OP, 0))) { + TRANS_LOG(WARN, "init_tx_op failed", KR(ret)); + } else if (OB_FAIL(tx_data_guard_.tx_data()->op_guard_->add_tx_op(abort_op))) { + TRANS_LOG(WARN, "add_tx_op failed", KR(ret)); + } + return ret; +} + +int ObCtxTxData::reserve_tx_op_space(int64_t count) +{ + int ret = OB_SUCCESS; + RLockGuard guard(lock_); + if (OB_FAIL(check_tx_data_writable_())) { + TRANS_LOG(WARN, "tx data is not writeable", KR(ret), K(*this)); + } else if (OB_FAIL(tx_data_guard_.tx_data()->init_tx_op())) { + TRANS_LOG(WARN, "init_tx_op failed", KR(ret)); + } else if (OB_FAIL(tx_data_guard_.tx_data()->op_guard_->reserve_tx_op_space(count))) { + TRANS_LOG(WARN, "reserve tx_op space failed", KR(ret)); + } + return ret; +} + int ObCtxTxData::set_commit_version(const SCN &commit_version) { int ret = OB_SUCCESS; @@ -341,80 +351,6 @@ int ObCtxTxData::get_tx_data_ptr(storage::ObTxData *&tx_data_ptr) return ret; } -int ObCtxTxData::prepare_add_undo_action(ObUndoAction &undo_action, - storage::ObTxDataGuard &tmp_tx_data_guard, - storage::ObUndoStatusNode *&tmp_undo_status) -{ - int ret = OB_SUCCESS; - RLockGuard guard(lock_); - /* - * alloc undo_status_node used on commit stage - * alloc tx_data and add undo_action to it, which will be inserted - * into tx_data_table after RollbackSavepoint log sync success - */ - if (OB_FAIL(check_tx_data_writable_())) { - TRANS_LOG(WARN, "tx data is not writeable", K(ret), K(*this)); - } else { - ObTxTable *tx_table = nullptr; - GET_TX_TABLE_(tx_table); - if (OB_FAIL(ret)) { - } else if (OB_FAIL(tx_table->get_tx_data_table()->alloc_undo_status_node(tmp_undo_status))) { - TRANS_LOG(WARN, "alloc undo status fail", K(ret), KPC(this)); - } else if (OB_ISNULL(tmp_undo_status)) { - ret = OB_ERR_UNEXPECTED; - TRANS_LOG(ERROR, "undo status is null", KR(ret), KPC(this)); - } else if (OB_FAIL(tx_table->deep_copy_tx_data(tx_data_guard_, tmp_tx_data_guard))) { - TRANS_LOG(WARN, "copy tx data fail", K(ret), KPC(this)); - } else if (OB_ISNULL(tmp_tx_data_guard.tx_data())) { - ret = OB_ERR_UNEXPECTED; - TRANS_LOG(ERROR, "copied tx_data is null", KR(ret), KPC(this)); - } else if (OB_FAIL(tmp_tx_data_guard.tx_data()->add_undo_action(tx_table, undo_action))) { - TRANS_LOG(WARN, "add undo action fail", K(ret), KPC(this)); - } - - if (OB_FAIL(ret) && OB_NOT_NULL(tmp_undo_status)) { - tx_table->get_tx_data_table()->free_undo_status_node(tmp_undo_status); - } - } - return ret; -} - -int ObCtxTxData::cancel_add_undo_action(storage::ObUndoStatusNode *tmp_undo_status) -{ - int ret = OB_SUCCESS; - ObTxTable *tx_table = nullptr; - GET_TX_TABLE_(tx_table); - if (OB_SUCC(ret)) { - ret = tx_table->get_tx_data_table()->free_undo_status_node(tmp_undo_status); - } - return ret; -} - -int ObCtxTxData::commit_add_undo_action(ObUndoAction &undo_action, storage::ObUndoStatusNode *tmp_undo_status) -{ - return add_undo_action(undo_action, tmp_undo_status); -} - -int ObCtxTxData::add_undo_action(ObUndoAction &undo_action, storage::ObUndoStatusNode *tmp_undo_status) -{ - int ret = OB_SUCCESS; - RLockGuard guard(lock_); - - if (OB_FAIL(check_tx_data_writable_())) { - TRANS_LOG(WARN, "tx data is not writeable", K(ret), K(*this)); - } else { - ObTxTable *tx_table = nullptr; - GET_TX_TABLE_(tx_table); - if (OB_FAIL(ret)) { - // do nothing - } else if (OB_FAIL(tx_data_guard_.tx_data()->add_undo_action(tx_table, undo_action, tmp_undo_status))) { - TRANS_LOG(WARN, "add undo action failed", K(ret), K(undo_action), KP(tmp_undo_status), K(*this)); - }; - } - - return ret; -} - int ObCtxTxData::check_tx_data_writable_() { int ret = OB_SUCCESS; diff --git a/src/storage/tx/ob_ctx_tx_data.h b/src/storage/tx/ob_ctx_tx_data.h index 6e158b523..3df982fa7 100644 --- a/src/storage/tx/ob_ctx_tx_data.h +++ b/src/storage/tx/ob_ctx_tx_data.h @@ -51,9 +51,11 @@ public: void get_tx_table(storage::ObTxTable *&tx_table); int set_state(int32_t state); + int add_abort_op(share::SCN op_scn); int set_commit_version(const share::SCN &commit_version); int set_start_log_ts(const share::SCN &start_ts); int set_end_log_ts(const share::SCN &end_ts); + int reserve_tx_op_space(int64_t count); int32_t get_state() const; const share::SCN get_commit_version() const; @@ -62,13 +64,6 @@ public: ObTransID get_tx_id() const; - int prepare_add_undo_action(ObUndoAction &undo_action, - storage::ObTxDataGuard &tmp_tx_data_guard, - storage::ObUndoStatusNode *&tmp_undo_status); - int cancel_add_undo_action(storage::ObUndoStatusNode *tmp_undo_status); - int commit_add_undo_action(ObUndoAction &undo_action, storage::ObUndoStatusNode *tmp_undo_status); - int add_undo_action(ObUndoAction &undo_action, storage::ObUndoStatusNode *tmp_undo_status = NULL); - int get_tx_data(storage::ObTxDataGuard &tx_data_guard); // ATTENTION : use get_tx_data_ptr only if you can make sure the life cycle of ctx_tx_data is longer than your usage diff --git a/src/storage/tx/ob_multi_data_source.cpp b/src/storage/tx/ob_multi_data_source.cpp index 5465be1aa..59462a499 100644 --- a/src/storage/tx/ob_multi_data_source.cpp +++ b/src/storage/tx/ob_multi_data_source.cpp @@ -26,6 +26,7 @@ #include "storage/multi_data_source/compile_utility/mds_register.h" #undef NEED_MDS_REGISTER_DEFINE #include "share/ob_standby_upgrade.h" // ObStandbyUpgrade +#include "share/allocator/ob_shared_memory_allocator_mgr.h" namespace oceanbase { @@ -105,6 +106,144 @@ bool ObTxBufferNode::operator==(const ObTxBufferNode &buffer_node) const return is_same; } +ObTxBufferNodeWrapper::~ObTxBufferNodeWrapper() +{ + ObIAllocator &allocator = MTL(share::ObSharedMemAllocMgr*)->tx_data_op_allocator(); + if (OB_NOT_NULL(node_.get_ptr())) { + allocator.free(node_.get_ptr()); + } + storage::mds::BufferCtx *buffer_ctx = const_cast(node_.get_buffer_ctx_node().get_ctx()); + if (OB_NOT_NULL(buffer_ctx)) { + // TODO destructor without allocator is safe? + buffer_ctx->~BufferCtx(); + allocator.free(buffer_ctx); + } +} + +OB_DEF_SERIALIZE_SIZE(ObTxBufferNodeWrapper) +{ + int64_t len = 0; + len += serialization::encoded_length_vi64(tx_id_); + len += node_.get_serialize_size(); + len += node_.get_buffer_ctx_node().get_serialize_size(); + return len; +} + +OB_DEF_SERIALIZE(ObTxBufferNodeWrapper) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(serialization::encode_vi64(buf, buf_len, pos, tx_id_))) { + TRANS_LOG(WARN, "serialize node wrapper fail", KR(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(node_.serialize(buf, buf_len, pos))) { + TRANS_LOG(WARN, "serialize node wrapper fail", KR(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(node_.get_buffer_ctx_node().serialize(buf, buf_len, pos))) { + TRANS_LOG(WARN, "serialize node wrapper fail", KR(ret), K(buf_len), K(pos)); + } + return ret; +} + +OB_DEF_DESERIALIZE(ObTxBufferNodeWrapper) +{ + int ret = OB_SUCCESS; + ObIAllocator &allocator = MTL(share::ObSharedMemAllocMgr*)->tx_data_op_allocator(); + char *node_buf = NULL; + if (OB_FAIL(serialization::decode_vi64(buf, data_len, pos, &tx_id_))) { + TRANS_LOG(WARN, "deserialize node wrapper fail", KR(ret), K(data_len), K(pos)); + } else if (OB_FAIL(node_.deserialize(buf, data_len, pos))) { + TRANS_LOG(WARN, "deserialize node wrapper fail", KR(ret), K(data_len), K(pos)); + } else if (OB_ISNULL(node_buf = (char*)allocator.alloc(node_.get_data_size()))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + TRANS_LOG(WARN, "deserialize node wrapper fail", KR(ret), K(data_len), K(pos)); + } else if (FALSE_IT(MEMCPY(node_buf, node_.get_ptr(), node_.get_data_size()))) { + } else if (FALSE_IT((node_.get_data().assign_ptr(node_buf, node_.get_data_size())))) { + } else if (OB_FAIL(node_.get_buffer_ctx_node().deserialize(buf, data_len, pos, allocator))) { + TRANS_LOG(WARN, "deserialize node wrapper fail", KR(ret), K(data_len), K(pos)); + } + return ret; +} + +int ObTxBufferNodeWrapper::assign(ObIAllocator &allocator, const ObTxBufferNodeWrapper &wrapper) +{ + return assign(wrapper.get_tx_id(), wrapper.get_node(), allocator, false); +} + +int ObTxBufferNodeWrapper::pre_alloc(int64_t tx_id, const ObTxBufferNode &node, ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + int64_t buf_len = node.get_data_size(); + char *ptr = NULL; + node_.register_no_ = node.register_no_; + node_.type_ = node.type_; + + if (OB_ISNULL(ptr = (char*)allocator.alloc(buf_len))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + TRANS_LOG(WARN, "alloc mem fail", K(ret)); + } else { + node_.get_data().assign_ptr(ptr, buf_len); + } + + if (OB_SUCC(ret)) { + mds::BufferCtx *new_ctx = nullptr; + if (OB_ISNULL(node.get_buffer_ctx_node().get_ctx())) { + // do nothing + } else if (OB_FAIL(mds::MdsFactory::create_buffer_ctx(node.type_, + ObTransID(tx_id), + new_ctx, + allocator))) { + TRANS_LOG(WARN, "create buffer_ctx failed", KR(ret)); + } else { + node_.get_buffer_ctx_node().set_ctx(new_ctx); + } + } + return ret; +} + +int ObTxBufferNodeWrapper::assign(int64_t tx_id, + const ObTxBufferNode &node, + ObIAllocator &allocator, + bool has_pre_alloc) +{ + int ret = OB_SUCCESS; + int64_t buf_len = node.get_data_size(); + char *ptr = NULL; + tx_id_ = tx_id; + node_.register_no_ = node.register_no_; + node_.has_submitted_ = node.has_submitted_; + node_.has_synced_ = node.has_synced_; + node_.mds_base_scn_ = node.mds_base_scn_; + node_.type_ = node.type_; + + if (has_pre_alloc) { + MEMCPY(node_.get_ptr(), const_cast(node).get_ptr(), buf_len); + } else if (OB_ISNULL(ptr = (char*)allocator.alloc(buf_len))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + TRANS_LOG(WARN, "alloc mem fail", K(ret)); + } else { + MEMCPY(ptr, const_cast(node).get_ptr(), buf_len); + node_.get_data().assign_ptr(ptr, buf_len); + } + + if (OB_SUCC(ret)) { + mds::BufferCtx *new_ctx = nullptr; + if (OB_ISNULL(node.get_buffer_ctx_node().get_ctx())) { + // do nothing + } else if (has_pre_alloc && FALSE_IT(new_ctx = const_cast(node_.get_buffer_ctx_node().get_ctx()))) { + } else if (has_pre_alloc && OB_ISNULL(new_ctx)) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "has_pre_alloc but new_ctx is null", KR(ret), K(node_)); + } else if (OB_FAIL(mds::MdsFactory::deep_copy_buffer_ctx(ObTransID(tx_id_), + *(node.get_buffer_ctx_node().get_ctx()), + new_ctx, + allocator))) { + TRANS_LOG(WARN, "copy buffer_ctx failed", KR(ret)); + } else if (!has_pre_alloc) { + node_.get_buffer_ctx_node().set_ctx(new_ctx); + } + } + return ret; +} + + //##################################################### // ObMulSourceTxDataNotifier //##################################################### diff --git a/src/storage/tx/ob_multi_data_source.h b/src/storage/tx/ob_multi_data_source.h index 5fc535d1d..2d7e7f027 100644 --- a/src/storage/tx/ob_multi_data_source.h +++ b/src/storage/tx/ob_multi_data_source.h @@ -84,6 +84,7 @@ class ObTxBufferNode friend class ObTxExecInfo; friend class ObMulSourceTxDataNotifier; friend class ObTxMDSCache; + friend class ObTxBufferNodeWrapper; OB_UNIS_VERSION(1); public: @@ -116,10 +117,11 @@ public: // only for some mds types of CDC // can not be used by observer functions - bool allow_to_use_mds_big_segment() { return type_ == ObTxDataSourceType::DDL_TRANS; } + bool allow_to_use_mds_big_segment() const { return type_ == ObTxDataSourceType::DDL_TRANS; } void replace_data(const common::ObString &data); + ObString &get_data() { return data_; } int64_t get_data_size() const { return data_.length(); } ObTxDataSourceType get_data_source_type() const { return type_; } const ObString &get_data_buf() const { return data_; } @@ -141,6 +143,7 @@ public: has_synced_ = false; } storage::mds::BufferCtxNode &get_buffer_ctx_node() const { return buffer_ctx_node_; } + TO_STRING_KV(K(register_no_), K(has_submitted_), K(has_synced_), K_(type), K(data_.length())); private: @@ -156,6 +159,29 @@ private: typedef common::ObSEArray ObTxBufferNodeArray; typedef common::ObSEArray ObTxBufferCtxArray; +// manage mds_op contain (buffer_node, buffer, buffer_ctx) +class ObTxBufferNodeWrapper +{ + OB_UNIS_VERSION(1); +public: + ObTxBufferNodeWrapper() : tx_id_(0), node_() + {} + ObTxBufferNodeWrapper(const ObTxBufferNodeWrapper &) = delete; + ObTxBufferNodeWrapper &operator=(const ObTxBufferNodeWrapper &) = delete; + ~ObTxBufferNodeWrapper(); + const ObTxBufferNode &get_node() const { return node_; } + int64_t get_tx_id() const { return tx_id_; } + int pre_alloc(int64_t tx_id, const ObTxBufferNode &node, ObIAllocator &allocator); + // deep_copy by node + int assign(int64_t tx_id, const ObTxBufferNode &node, ObIAllocator &allocator, bool has_pre_alloc); + int assign(ObIAllocator &allocator, const ObTxBufferNodeWrapper &node_wrapper); + + TO_STRING_KV(K_(tx_id), K_(node)); +private: + int64_t tx_id_; + ObTxBufferNode node_; +}; + class ObMulSourceTxDataNotifier { public: @@ -190,7 +216,7 @@ private: class ObMulSourceTxDataDump { -public: +public: static const char* dump_buf(ObTxDataSourceType source_type, const char * buf,const int64_t len); private: diff --git a/src/storage/tx/ob_trans_ctx_mgr_v4.cpp b/src/storage/tx/ob_trans_ctx_mgr_v4.cpp index 454ebc6cb..af6e25535 100644 --- a/src/storage/tx/ob_trans_ctx_mgr_v4.cpp +++ b/src/storage/tx/ob_trans_ctx_mgr_v4.cpp @@ -2541,34 +2541,65 @@ OB_NOINLINE int ObLSTxCtxMgr::errsim_apply_start_working_log() return ret; } -int ObLSTxCtxMgr::transfer_out_tx_op(int64_t except_tx_id, - const SCN data_end_scn, - const SCN op_scn, - NotifyType op_type, - bool is_replay, - ObLSID dest_ls_id, - int64_t transfer_epoch, +int ObLSTxCtxMgr::filter_tx_need_transfer(ObIArray &tablet_list, + const share::SCN data_end_scn, + ObIArray &move_tx_ids) +{ + int ret = OB_SUCCESS; + FilterTransferTxFunctor fn(tablet_list, data_end_scn, move_tx_ids); + if (OB_FAIL(ls_tx_ctx_map_.for_each(fn))) { + ret = fn.get_ret(); + TRANS_LOG(WARN, "for each tx ctx error", KR(ret), "manager", *this); + } + return ret; +} + +int ObLSTxCtxMgr::transfer_out_tx_op(const ObTransferOutTxParam ¶m, int64_t& active_tx_count, int64_t &op_tx_count) { int ret = OB_SUCCESS; - const int64_t abs_expired_time = INT64_MAX; - TransferOutTxOpFunctor fn(abs_expired_time, except_tx_id, - data_end_scn, - op_scn, - op_type, - is_replay, - dest_ls_id, - transfer_epoch); - if (OB_FAIL(ls_tx_ctx_map_.for_each(fn))) { - TRANS_LOG(WARN, "for each tx ctx error", KR(ret), "manager", *this); - ret = fn.get_ret(); + if (OB_ISNULL(param.move_tx_ids_)) { + TransferOutTxOpFunctor fn(param); + if (OB_FAIL(ls_tx_ctx_map_.for_each(fn))) { + ret = fn.get_ret(); + TRANS_LOG(WARN, "for each tx ctx error", KR(ret), "manager", *this); + } else { + active_tx_count = fn.get_count(); + op_tx_count = fn.get_op_tx_count(); + } } else { - active_tx_count = fn.get_count(); - op_tx_count = fn.get_op_tx_count(); + active_tx_count = ls_tx_ctx_map_.count(); + for (int64_t idx = 0; OB_SUCC(ret) && idx < param.move_tx_ids_->count(); idx++) { + if (param.move_tx_ids_->at(idx).get_id() == param.except_tx_id_) { + continue; + } + ObPartTransCtx *ctx = nullptr; + ObTransCtx *tmp_ctx = nullptr; + bool is_operated = false; + if (OB_FAIL(ls_tx_ctx_map_.get(param.move_tx_ids_->at(idx), tmp_ctx))) { + if (OB_ENTRY_NOT_EXIST != ret) { + TRANS_LOG(WARN, "get tx ctx failed", KR(ret), K(param.move_tx_ids_->at(idx))); + } else { + ret = OB_SUCCESS; + } + } else if (FALSE_IT(ctx = static_cast(tmp_ctx))) { + } else if (OB_FAIL(ctx->do_transfer_out_tx_op(param.data_end_scn_, + param.op_scn_, + param.op_type_, + param.is_replay_, + param.dest_ls_id_, + param.transfer_epoch_, + is_operated))) { + TRANS_LOG(WARN, "transfer out tx failed", KR(ret), K(param)); + } else if (is_operated) { + op_tx_count++; + } + if (OB_NOT_NULL(ctx)) { + revert_tx_ctx(ctx); + } + } } - TRANS_LOG(INFO, "[TRANSFER] transfer_out_tx_op", KR(ret), K(data_end_scn), K(op_scn), K(op_type), K(is_replay), K(dest_ls_id), - K(transfer_epoch), K(active_tx_count), K(op_tx_count), K(ls_tx_ctx_map_.count()), K(tenant_id_), K(ls_id_)); return ret; } @@ -2594,23 +2625,43 @@ int ObLSTxCtxMgr::wait_tx_write_end(ObTimeoutCtx &timeout_ctx) int ObLSTxCtxMgr::collect_tx_ctx(const ObLSID dest_ls_id, const SCN log_scn, const ObIArray &tablet_list, - int64_t &tx_count, + const ObIArray &move_tx_ids, int64_t &collect_count, ObIArray &res) { int ret = OB_SUCCESS; - - const int64_t abs_expired_time = INT64_MAX; - CollectTxCtxFunctor fn(abs_expired_time, dest_ls_id, log_scn, tablet_list, tx_count, collect_count, res); - if (OB_FAIL(ls_tx_ctx_map_.for_each(fn))) { - TRANS_LOG(WARN, "for each tx ctx error", KR(ret), "manager", *this); - ret = fn.get_ret(); - } else { - tx_count = fn.get_tx_count(); - collect_count = fn.get_collect_count(); + ObSEArray final_move_tx_ids; + for (int64_t idx = 0; OB_SUCC(ret) && idx < move_tx_ids.count(); idx++) { + ObPartTransCtx *ctx = nullptr; + ObTransCtx *tmp_ctx = nullptr; + bool is_collected = false; + ObTxCtxMoveArg arg; + if (OB_FAIL(ls_tx_ctx_map_.get(move_tx_ids.at(idx), tmp_ctx))) { + if (OB_ENTRY_NOT_EXIST != ret) { + TRANS_LOG(WARN, "get tx ctx failed", KR(ret), K(move_tx_ids.at(idx))); + } else { + ret = OB_SUCCESS; + } + } else if (FALSE_IT(ctx = static_cast(tmp_ctx))) { + } else if (OB_FAIL(ctx->collect_tx_ctx(dest_ls_id, + log_scn, + tablet_list, + arg, + is_collected))) { + TRANS_LOG(WARN, "collect tx ctx failed", KR(ret), K(move_tx_ids.at(idx))); + } else if (!is_collected) { + } else if (OB_FAIL(final_move_tx_ids.push_back(move_tx_ids.at(idx)))) { + TRANS_LOG(WARN, "collect tx ctx failed", KR(ret), K(move_tx_ids.at(idx))); + } else if (OB_FAIL(res.push_back(arg))) { + TRANS_LOG(WARN, "push to array failed", KR(ret), K(move_tx_ids.at(idx))); + } else { + collect_count++; + } + if (OB_NOT_NULL(ctx)) { + revert_tx_ctx(ctx); + } } - - TRANS_LOG(INFO, "collect_tx_ctx", KR(ret), K(tx_count), K(collect_count), K(tenant_id_), K(ls_id_)); + TRANS_LOG(INFO, "collect_tx_ctx", KR(ret), K(final_move_tx_ids), K(collect_count), K(tenant_id_), K(ls_id_)); return ret; } @@ -2720,11 +2771,10 @@ int ObLSTxCtxMgr::move_tx_op(const ObTransferMoveTxParam &move_tx_param, if (OB_NOT_NULL(ctx)) { revert_tx_ctx(ctx); } - TRANS_LOG(INFO, "move_tx_op", KR(ret), K(arg.tx_id_), K(ls_id_), K(is_replay), K(is_created)); + TRANS_LOG(INFO, "move_tx_op", KR(ret), K(arg.tx_id_), K(ls_id_), K(is_replay), K(is_created), K(move_tx_param.op_type_)); } return ret; } - } } diff --git a/src/storage/tx/ob_trans_ctx_mgr_v4.h b/src/storage/tx/ob_trans_ctx_mgr_v4.h index 83d747f44..282ef9e68 100644 --- a/src/storage/tx/ob_trans_ctx_mgr_v4.h +++ b/src/storage/tx/ob_trans_ctx_mgr_v4.h @@ -41,6 +41,7 @@ class ObTransSubmitLogFunctor; class ObTxCtxTable; struct ObTxCtxMoveArg; struct ObTransferMoveTxParam; +struct ObTransferOutTxParam; } namespace memtable @@ -203,20 +204,18 @@ public: // Offline the in-memory state of the ObLSTxCtxMgr int offline(); - int transfer_out_tx_op(int64_t except_tx_id, - const SCN data_end_scn, - const SCN op_scn, - NotifyType op_type, - bool is_replay, - ObLSID dest_ls_id, - int64_t transfer_epoch, + int filter_tx_need_transfer(ObIArray &tablet_list, + const share::SCN data_end_scn, + ObIArray &move_tx_ids); + + int transfer_out_tx_op(const ObTransferOutTxParam ¶m, int64_t& active_tx_count, int64_t &op_tx_count); int wait_tx_write_end(ObTimeoutCtx &timeout_ctx); int collect_tx_ctx(const share::ObLSID dest_ls_id, const SCN log_scn, const ObIArray &tablet_list, - int64_t &tx_count, + const ObIArray &move_tx_ids, int64_t &colllect_count, ObIArray &res); int move_tx_op(const ObTransferMoveTxParam &move_tx_param, diff --git a/src/storage/tx/ob_trans_functor.h b/src/storage/tx/ob_trans_functor.h index f2fd80760..212c3d193 100644 --- a/src/storage/tx/ob_trans_functor.h +++ b/src/storage/tx/ob_trans_functor.h @@ -357,17 +357,53 @@ private: ObTxCommitCallback *&cb_list_; }; +class FilterTransferTxFunctor +{ +public: + FilterTransferTxFunctor(ObIArray &tablet_list, const SCN data_end_scn, ObIArray &move_tx_ids) : + tablet_list_(tablet_list), data_end_scn_(data_end_scn), + move_tx_ids_(move_tx_ids), count_(0), ret_(OB_SUCCESS) + {} + ~FilterTransferTxFunctor() { PRINT_FUNC_STAT; } + OPERATOR_V4(FilterTransferTxFunctor) + { + bool bool_ret = false; + int ret = OB_SUCCESS; + if (!tx_id.is_valid() || OB_ISNULL(tx_ctx)) { + ret_ = ret = OB_INVALID_ARGUMENT; + TRANS_LOG(WARN, "invalid argument", K(tx_id), "ctx", OB_P(tx_ctx)); + } else { + ++count_; + } + if (OB_SUCC(ret)) { + bool need_transfer = false; + if (OB_FAIL(tx_ctx->check_need_transfer(data_end_scn_, tablet_list_, need_transfer))) { + TRANS_LOG(WARN, "check need transfer failed", KR(ret), K(*tx_ctx)); + ret_ = ret; + } else if (need_transfer && OB_FAIL(move_tx_ids_.push_back(tx_id))) { + ret_ = ret; + } else { + bool_ret = true; + } + } + return bool_ret; + } + int get_ret() const { return ret_; } + int64_t get_count() const { return count_; } +private: + ObIArray &tablet_list_; + const SCN data_end_scn_; + ObIArray &move_tx_ids_; + int64_t count_; + int ret_; +}; + class TransferOutTxOpFunctor { public: - TransferOutTxOpFunctor(const int64_t abs_expired_time, int64_t except_tx_id, const SCN data_end_scn, - const SCN op_scn, NotifyType op_type, bool is_replay, ObLSID dest_ls_id, int64_t transfer_epoch) - : abs_expired_time_(abs_expired_time), except_tx_id_(except_tx_id), data_end_scn_(data_end_scn), - op_scn_(op_scn), op_type_(op_type), is_replay_(is_replay), dest_ls_id_(dest_ls_id), - transfer_epoch_(transfer_epoch), count_(0), op_tx_count_(0), ret_(OB_SUCCESS) + TransferOutTxOpFunctor(const ObTransferOutTxParam ¶m) + : param_(param), count_(0), op_tx_count_(0), ret_(OB_SUCCESS) { - - SET_EXPIRED_LIMIT(100 * 1000 /*100ms*/, 3 * 1000 * 1000 /*3s*/); } ~TransferOutTxOpFunctor() { PRINT_FUNC_STAT; } OPERATOR_V4(TransferOutTxOpFunctor) @@ -379,21 +415,19 @@ public: TRANS_LOG(WARN, "invalid argument", K(tx_id), "ctx", OB_P(tx_ctx)); } else { ++count_; - if ((count_ % BATCH_CHECK_COUNT) == 0) { - const int64_t now = ObTimeUtility::current_time(); - if (now >= abs_expired_time_) { - ret_ = ret = OB_TIMEOUT; - TRANS_LOG(WARN, "transfer block tx timeout", K(count_)); - } - } } if (OB_FAIL(ret)) { - } else if (tx_id.get_id() == except_tx_id_) { + } else if (tx_id.get_id() == param_.except_tx_id_) { bool_ret = true; } else { bool is_operated = false; - if (OB_FAIL(tx_ctx->do_transfer_out_tx_op(data_end_scn_, op_scn_, op_type_, is_replay_, - dest_ls_id_, transfer_epoch_, is_operated))) { + if (OB_FAIL(tx_ctx->do_transfer_out_tx_op(param_.data_end_scn_, + param_.op_scn_, + param_.op_type_, + param_.is_replay_, + param_.dest_ls_id_, + param_.transfer_epoch_, + is_operated))) { TRANS_LOG(WARN, "do_transfer_out_tx_op failed", KR(ret), K(*tx_ctx)); ret_ = ret; } else { @@ -409,15 +443,7 @@ public: int64_t get_count() const { return count_; } int64_t get_op_tx_count() const { return op_tx_count_; } private: - static const int64_t BATCH_CHECK_COUNT = 100; - int64_t abs_expired_time_; - int64_t except_tx_id_; - const SCN data_end_scn_; - const SCN op_scn_; - NotifyType op_type_; - bool is_replay_; - ObLSID dest_ls_id_; - int64_t transfer_epoch_; + const ObTransferOutTxParam ¶m_; int64_t count_; int64_t op_tx_count_; int ret_; diff --git a/src/storage/tx/ob_trans_part_ctx.cpp b/src/storage/tx/ob_trans_part_ctx.cpp index ccb080a0f..918219068 100644 --- a/src/storage/tx/ob_trans_part_ctx.cpp +++ b/src/storage/tx/ob_trans_part_ctx.cpp @@ -43,6 +43,7 @@ #undef NEED_MDS_REGISTER_DEFINE #include "storage/tablet/ob_tablet_transfer_tx_ctx.h" #include "storage/tx/ob_ctx_tx_data.h" +#include "share/allocator/ob_shared_memory_allocator_mgr.h" #include "logservice/ob_log_service.h" #include "storage/ddl/ob_ddl_inc_clog_callback.h" #include "storage/tx/ob_tx_log_operator.h" @@ -195,14 +196,18 @@ int ObPartTransCtx::init_for_transfer_move(const ObTxCtxMoveArg &arg) { int ret = OB_SUCCESS; CtxLockGuard guard(lock_); - exec_info_.is_sub2pc_ = arg.is_sub2pc_; - mt_ctx_.set_trans_version(arg.trans_version_); - exec_info_.trans_type_ = TransType::DIST_TRANS; - if (arg.tx_state_ >= ObTxState::PREPARE) { - exec_info_.prepare_version_ = arg.prepare_version_; - ctx_tx_data_.set_commit_version(arg.commit_version_); + if (OB_FAIL(load_tx_op_if_exist_())) { + TRANS_LOG(WARN, "load_tx_op failed", KR(ret), KPC(this)); + } else { + exec_info_.is_sub2pc_ = arg.is_sub2pc_; + mt_ctx_.set_trans_version(arg.trans_version_); + exec_info_.trans_type_ = TransType::DIST_TRANS; + if (arg.tx_state_ >= ObTxState::PREPARE) { + exec_info_.prepare_version_ = arg.prepare_version_; + ctx_tx_data_.set_commit_version(arg.commit_version_); + } + set_durable_state_(arg.tx_state_); } - set_durable_state_(arg.tx_state_); return ret; } @@ -1786,6 +1791,10 @@ int ObPartTransCtx::serialize_tx_ctx_to_buffer(ObTxLocalBuffer &buffer, int64_t } else if (OB_FAIL(refresh_rec_log_ts_())) { TRANS_LOG(WARN, "refresh rec log ts failed", K(ret), K(*this)); } else { + SpinRLockManualGuard tx_op_guard; + if (ctx_info.tx_data_guard_.tx_data()->op_guard_.is_valid()) { + tx_op_guard.lock(ctx_info.tx_data_guard_.tx_data()->op_guard_->get_lock()); + } // 6. Do serialize int64_t pos = 0; serialize_size = ctx_info.get_serialize_size(); @@ -2168,6 +2177,9 @@ int ObPartTransCtx::tx_end_(const bool commit) // the user. } else if (OB_FAIL(ctx_tx_data_.set_state(state))) { TRANS_LOG(WARN, "set tx data state failed", K(ret), KPC(this)); + // We need put abort_op into tx_data before trans_end to promise ctx_tx_data is writeable + } else if (!commit && end_scn.is_valid() && OB_FAIL(ctx_tx_data_.add_abort_op(end_scn))) { + TRANS_LOG(WARN, "add tx data abort_op failed", K(ret), KPC(this)); // STEP5: We need invoke mt_ctx_.trans_end after the ctx_tx_data is decided // and filled in because we obey the rule that ObMvccRowCallback::trans_commit // is callbacked from front to back so that if the read or write is standing @@ -2324,6 +2336,174 @@ int ObPartTransCtx::on_success(ObTxLogCb *log_cb) return ret; } +int ObPartTransCtx::replay_mds_to_tx_table_(const ObTxBufferNodeArray &mds_node_array, + const share::SCN op_scn) +{ + int ret = OB_SUCCESS; + ObTxDataGuard tx_data_guard; + ObTxDataGuard new_tx_data_guard; + bool op_exist = false; + if (OB_FAIL(ctx_tx_data_.get_tx_data(tx_data_guard))) { + TRANS_LOG(WARN, "get tx data failed", KR(ret)); + } else if (OB_FAIL(tx_data_guard.tx_data()->check_tx_op_exist(op_scn, op_exist))) { + TRANS_LOG(WARN, "check_tx_op_exist failed", KR(ret)); + } else if (op_exist) { + // do nothing + } else if (OB_FAIL(tx_data_guard.tx_data()->init_tx_op())) { + TRANS_LOG(WARN, "init tx op failed", KR(ret)); + } else { + ObTxOpArray tx_op_batch; + if (OB_FAIL(prepare_mds_tx_op_(mds_node_array, + op_scn, + *tx_data_guard.tx_data()->op_allocator_, + tx_op_batch, + true))) { + TRANS_LOG(WARN, "preapre mds tx_op failed", KR(ret)); + } else if (OB_FAIL(tx_data_guard.tx_data()->op_guard_->add_tx_op_batch(trans_id_, + ls_id_, op_scn, tx_op_batch))) { + TRANS_LOG(WARN, "add_tx_op_batch failed", KR(ret)); + } + } + // tx_ctx and tx_data checkpoint independent + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ls_tx_ctx_mgr_->get_tx_table()->alloc_tx_data(new_tx_data_guard, true, INT64_MAX))){ + TRANS_LOG(WARN, "alloc tx data failed", KR(ret)); + } else { + *new_tx_data_guard.tx_data() = *tx_data_guard.tx_data(); + ObTxData *new_tx_data = new_tx_data_guard.tx_data(); + new_tx_data->end_scn_ = op_scn; + if (OB_FAIL(ls_tx_ctx_mgr_->get_tx_table()->insert(new_tx_data))) { + TRANS_LOG(WARN, "insert tx data failed", KR(ret)); + } + } + TRANS_LOG(INFO, "replay mds to tx_table", KR(ret), K(mds_node_array.count()), K(trans_id_), K(ls_id_), K(op_scn), K(op_exist)); + return ret; +} + +int ObPartTransCtx::insert_mds_to_tx_table_(ObTxLogCb &log_cb) +{ + int ret = OB_SUCCESS; + const ObTxBufferNodeArray &node_array = log_cb.get_mds_range().get_range_array(); + bool all_big_segment = true; + ObTxBufferNodeArray need_process_mds; + for (int64_t idx = 0; OB_SUCC(ret) && idx < node_array.count(); idx++) { + if (!node_array.at(idx).allow_to_use_mds_big_segment()) { + all_big_segment = false; + if (OB_FAIL(need_process_mds.push_back(node_array.at(idx)))) { + TRANS_LOG(WARN, "push to process_mds failed", KR(ret), K(log_cb)); + } + } + } + if (OB_FAIL(ret)) { + } else if (all_big_segment) { + TRANS_LOG(INFO, "MDS big_segment not support tx_op just skip", K(trans_id_), K(ls_id_), KP(this)); + // big segment not support tx_op + if (OB_NOT_NULL(log_cb.get_tx_op_array()) && log_cb.get_tx_op_array()->count() > 0) { + TRANS_LOG(WARN, "MDS big_segment log_cb pre_alloc is not null", KPC(this), K(log_cb)); + } + } else if (OB_ISNULL(log_cb.get_tx_op_array())) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "log_cb tx_op is null", KR(ret), KPC(this), K(log_cb)); + } else if (need_process_mds.count() != log_cb.get_tx_op_array()->count()) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "log_cb mds size is not match", KR(ret), KPC(this), K(log_cb), K(need_process_mds)); + } else { + SCN op_scn = log_cb.get_log_ts(); + ObTxOpArray &tx_op_array = *log_cb.get_tx_op_array(); + ObTxDataGuard tx_data_guard; + // assign mds for pre_alloc node + for (int64_t idx = 0; OB_SUCC(ret) && idx < tx_op_array.count(); idx++) { + tx_op_array.at(idx).set_op_scn(op_scn); + ObTxBufferNodeWrapper &wrapper = *(ObTxBufferNodeWrapper*)(tx_op_array.at(idx).get_op_val()); + if (wrapper.get_node().get_register_no() != need_process_mds.at(idx).get_register_no() || + wrapper.get_node().get_data_source_type() != need_process_mds.at(idx).get_data_source_type()) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "mds not match", KR(ret), KPC(this)); + } else if (OB_FAIL(wrapper.assign(trans_id_, need_process_mds.at(idx), MTL(ObSharedMemAllocMgr*)->tx_data_op_allocator(), true))) { + TRANS_LOG(WARN, "assign mds failed", KR(ret), KPC(this)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ctx_tx_data_.get_tx_data(tx_data_guard))) { + TRANS_LOG(WARN, "get tx data failed", KR(ret)); + } else if (OB_FAIL(tx_data_guard.tx_data()->init_tx_op())) { + TRANS_LOG(WARN, "init tx op failed", KR(ret)); + } else if (OB_FAIL(tx_data_guard.tx_data()->op_guard_->add_tx_op_batch(trans_id_, + ls_id_, op_scn, tx_op_array))) { + TRANS_LOG(WARN, "add_tx_op_batch failed", KR(ret)); + } else { + *log_cb.get_tx_data_guard().tx_data() = *tx_data_guard.tx_data(); + ObTxData *new_tx_data = log_cb.get_tx_data_guard().tx_data(); + new_tx_data->end_scn_ = op_scn; + if (OB_FAIL(ls_tx_ctx_mgr_->get_tx_table()->insert(new_tx_data))) { + TRANS_LOG(WARN, "insert tx data failed", KR(ret)); + } else { + tx_op_array.reset(); + } + } + } + TRANS_LOG(INFO, "insert mds to tx_table", KR(ret), K(trans_id_), K(ls_id_), K(exec_info_.multi_data_source_.count()), K(log_cb)); + return ret; +} + +int ObPartTransCtx::insert_undo_action_to_tx_table_(ObUndoAction &undo_action, + ObTxDataGuard &new_tx_data_guard, + const share::SCN op_scn) +{ + int ret = OB_SUCCESS; + // tx_data on part_ctx has modified + ObTxDataGuard tx_data_guard; + if (OB_FAIL(ctx_tx_data_.get_tx_data(tx_data_guard))) { + TRANS_LOG(WARN, "get tx data failed", KR(ret)); + } else if (OB_FAIL(tx_data_guard.tx_data()->init_tx_op())) { + TRANS_LOG(WARN, "init tx op failed", KR(ret)); + } else if (OB_FAIL(tx_data_guard.tx_data()->add_undo_action(ls_tx_ctx_mgr_->get_tx_table(), undo_action))) { + TRANS_LOG(WARN, "add undo action failed", KR(ret)); + } else { + *new_tx_data_guard.tx_data() = *tx_data_guard.tx_data(); + ObTxData *new_tx_data = new_tx_data_guard.tx_data(); + new_tx_data->end_scn_ = op_scn; + if (OB_FAIL(ls_tx_ctx_mgr_->get_tx_table()->insert(new_tx_data))) { + TRANS_LOG(WARN, "insert tx data failed", KR(ret)); + } + } + TRANS_LOG(INFO, "insert undo_action to tx_table", KR(ret), K(undo_action), K(trans_id_), K(ls_id_), K(op_scn)); + return ret; +} + +int ObPartTransCtx::replay_undo_action_to_tx_table_(ObUndoAction &undo_action, + const share::SCN op_scn) +{ + int ret = OB_SUCCESS; + ObTxDataGuard tx_data_guard; + ObTxDataGuard new_tx_data_guard; + ObTxDataOp *tx_data_op = nullptr; + int64_t tx_data_op_ref = 0; + if (OB_FAIL(ctx_tx_data_.get_tx_data(tx_data_guard))) { + TRANS_LOG(WARN, "get tx data failed", KR(ret)); + } else if (OB_FAIL(tx_data_guard.tx_data()->init_tx_op())) { + TRANS_LOG(WARN, "init tx op failed", KR(ret)); + } else if (OB_FAIL(ls_tx_ctx_mgr_->get_tx_table()->alloc_tx_data(new_tx_data_guard, true, INT64_MAX))){ + TRANS_LOG(WARN, "alloc tx data failed", KR(ret)); + } else { + *new_tx_data_guard.tx_data() = *tx_data_guard.tx_data(); + ObTxData *new_tx_data = new_tx_data_guard.tx_data(); + new_tx_data->end_scn_ = op_scn; + tx_data_op = new_tx_data->op_guard_.ptr(); + if (OB_NOT_NULL(tx_data_op)) { + tx_data_op_ref = tx_data_op->get_ref(); + } + if (OB_FAIL(new_tx_data->add_undo_action(ls_tx_ctx_mgr_->get_tx_table(), undo_action))) { + TRANS_LOG(WARN, "add undo action failed", KR(ret)); + } else if (OB_FAIL(ls_tx_ctx_mgr_->get_tx_table()->insert(new_tx_data))) { + TRANS_LOG(WARN, "insert tx data failed", KR(ret)); + } + } + TRANS_LOG(INFO, "replay undo_action to tx_table", KR(ret), K(undo_action), K(trans_id_), + K(ls_id_), K(op_scn), KP(tx_data_op), K(tx_data_op_ref)); + return ret; +} + int ObPartTransCtx::on_success_ops_(ObTxLogCb *log_cb) { int ret = OB_SUCCESS; @@ -2353,8 +2533,11 @@ int ObPartTransCtx::on_success_ops_(ObTxLogCb *log_cb) false, log_cb->get_mds_range().get_range_array()))) { TRANS_LOG(WARN, "notify data source for ON_REDO", K(ret)); + } else if (OB_FAIL(insert_mds_to_tx_table_(*log_cb))) { + TRANS_LOG(WARN, "inert into tx table failed", KR(ret)); } else { log_cb->get_mds_range().reset(); + log_cb->reset_tx_op_array(); } } else if (ObTxLogType::TX_DIRECT_LOAD_INC_LOG == log_type) { ObTxCtxLogOperator dli_log_op(this, log_cb); @@ -2408,20 +2591,10 @@ int ObPartTransCtx::on_success_ops_(ObTxLogCb *log_cb) TRANS_LOG(INFO, "apply commit info log", KR(ret), K(*this), K(two_phase_log_type)); } } else if (ObTxLogType::TX_ROLLBACK_TO_LOG == log_type) { - ObTxData *tx_data = log_cb->get_tx_data(); - if (OB_ISNULL(tx_data)) { - ret = OB_ERR_UNEXPECTED; - TRANS_LOG(ERROR, "unexpected null ptr", KR(ret), K(*this)); + if (OB_FAIL(insert_undo_action_to_tx_table_(log_cb->get_undo_action(), log_cb->get_tx_data_guard(), log_ts))) { + TRANS_LOG(WARN, "insert to tx table failed", KR(ret), K(*this)); } else { - // although logs may be callbacked out of order, - // insert into tx table all the way, tx table will - // filter out the obsolete one. - tx_data->end_scn_ = log_ts; - if (OB_FAIL(ctx_tx_data_.insert_tmp_tx_data(tx_data))) { - TRANS_LOG(WARN, "insert to tx table failed", KR(ret), K(*this)); - } else { - log_cb->set_tx_data(nullptr); - } + log_cb->set_tx_data(nullptr); } } else if (ObTxLogTypeChecker::is_state_log(log_type)) { sub_state_.clear_state_log_submitting(); @@ -3688,6 +3861,8 @@ int ObPartTransCtx::submit_abort_log_() } } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); + } else if (OB_FAIL(ctx_tx_data_.reserve_tx_op_space(1))) { + TRANS_LOG(WARN, "reserve tx_op space failed", KR(ret), KPC(this)); } else if (OB_FAIL(submit_log_block_out_(log_block, SCN::min_scn(), log_cb, replay_hint, barrier, 50 * 1000))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); @@ -4793,7 +4968,7 @@ int ObPartTransCtx::push_replaying_log_ts(const SCN log_ts_ns, const int64_t log } if (OB_UNLIKELY(replay_completeness_.is_unknown())) { const bool replay_continous = exec_info_.next_log_entry_no_ == log_entry_no; - set_replay_completeness_(replay_continous); + set_replay_completeness_(replay_continous, log_ts_ns); } } return ret; @@ -5299,7 +5474,7 @@ int ObPartTransCtx::replay_rollback_to(const ObTxRollbackToLog &log, // all previous log replayed // the txn must not replay from its first log, aka. incomplete-replay TRANS_LOG(INFO, "detect txn replayed from middle", K(ret), K(timestamp), K_(trans_id), K_(ls_id), K_(exec_info)); - set_replay_completeness_(false); + set_replay_completeness_(false, timestamp); ret = OB_SUCCESS; } else if (min_unreplayed_scn > timestamp) { ret = OB_ERR_UNEXPECTED; @@ -6042,6 +6217,8 @@ int ObPartTransCtx::replay_multi_data_source(const ObTxMultiDataSourceLog &log, true, increamental_array))) { TRANS_LOG(WARN, "notify data source for ON_REDO failed", K(ret)); + } else if (OB_FAIL(replay_mds_to_tx_table_(increamental_array, timestamp))) { + TRANS_LOG(WARN, "insert mds_op to tx_table failed", K(ret)); } if (OB_SUCC(ret) && OB_FAIL(check_and_merge_redo_lsns_(lsn))) { @@ -6950,7 +7127,6 @@ int ObPartTransCtx::deep_copy_mds_array_(const ObTxBufferNodeArray &mds_array, K(i), K(ctx_array_start_index), K(tmp_buf_arr[i].get_register_no()), K(exec_info_.multi_data_source_[ctx_array_start_index]), KPC(this)); } - } else { if (OB_FAIL(exec_info_.multi_data_source_.push_back(tmp_buf_arr[i]))) { TRANS_LOG(WARN, "push back exec_info_.multi_data_source_ failed", K(ret)); @@ -6964,6 +7140,44 @@ int ObPartTransCtx::deep_copy_mds_array_(const ObTxBufferNodeArray &mds_array, return ret; } +int ObPartTransCtx::prepare_mds_tx_op_(const ObTxBufferNodeArray &mds_array, + SCN op_scn, + ObTenantTxDataOpAllocator &tx_op_allocator, + ObTxOpArray &tx_op_array, + bool is_replay) +{ + int ret = OB_SUCCESS; + int64_t dest_max_register_no = 0; + + for (int64_t i = 0; OB_SUCC(ret) && i < mds_array.count(); i++) { + const ObTxBufferNode &node = mds_array.at(i); + ObTxBufferNodeWrapper *new_node_wrapper = nullptr; + mds::BufferCtx *new_ctx = nullptr; + ObTxOp tx_op; + tx_op_allocator.reset_local_alloc_size(); + if (node.allow_to_use_mds_big_segment()) { + // do nothing + } else if (OB_ISNULL(new_node_wrapper = (ObTxBufferNodeWrapper*)(tx_op_allocator.alloc(sizeof(ObTxBufferNodeWrapper))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + TRANS_LOG(WARN, "allocate memory failed", KR(ret)); + } else if (FALSE_IT(new(new_node_wrapper) ObTxBufferNodeWrapper())) { + } else if (!is_replay && OB_FAIL(new_node_wrapper->pre_alloc(trans_id_, node, tx_op_allocator))) { + TRANS_LOG(WARN, "pre_alloc failed", KR(ret), KPC(this)); + } else if (is_replay && OB_FAIL(new_node_wrapper->assign(trans_id_, node, tx_op_allocator, false))) { + TRANS_LOG(WARN, "assign failed", KR(ret), KPC(this)); + } else if (OB_FAIL(tx_op.init(ObTxOpCode::MDS_OP, op_scn, new_node_wrapper, tx_op_allocator.get_local_alloc_size()))) { + TRANS_LOG(WARN, "init tx_op fail", KR(ret)); + } else if (OB_FAIL(tx_op_array.push_back(tx_op))) { + TRANS_LOG(WARN, "push buffer_node to list fail", KR(ret)); + } + if (OB_FAIL(ret) && OB_NOT_NULL(new_node_wrapper)) { + tx_op_allocator.free(new_node_wrapper); + } + } + TRANS_LOG(INFO, "prepare_mds_tx_op", K(ret), K(trans_id_), K(ls_id_), K(mds_array), K(tx_op_array), K(op_scn)); + return ret; +} + int ObPartTransCtx::decide_state_log_barrier_type_( const ObTxLogType &state_log_type, logservice::ObReplayBarrierType &final_barrier_type) @@ -7068,6 +7282,7 @@ int ObPartTransCtx::submit_multi_data_source_(ObTxLogBlock &log_block) share::SCN mds_base_scn; const int64_t replay_hint = trans_id_.get_id(); ObTxLogCb *log_cb = nullptr; + void *tmp_buf = nullptr; if (mds_cache_.count() > 0) { ObTxMultiDataSourceLog log; ObTxMDSRange range; @@ -7118,7 +7333,20 @@ int ObPartTransCtx::submit_multi_data_source_(ObTxLogBlock &log_block) log_cb = nullptr; } else if ((mds_base_scn.is_valid() ? OB_FALSE_IT(mds_base_scn = share::SCN::scn_inc(mds_base_scn)) : OB_FALSE_IT(mds_base_scn.set_min()))) { - // do nothing + } else if (OB_FAIL(ctx_tx_data_.reserve_tx_op_space(log_cb->get_mds_range().count()))) { + TRANS_LOG(WARN, "reserve tx_op space failed", KR(ret), KPC(this)); + } else if (OB_FAIL(ls_tx_ctx_mgr_->get_tx_table()->alloc_tx_data(log_cb->get_tx_data_guard(), true, INT64_MAX))) { + TRANS_LOG(WARN, "alloc tx_data failed", KR(ret), KPC(this)); + } else if (OB_ISNULL(tmp_buf = mtl_malloc(sizeof(ObTxOpArray), "ObTxOpArray"))) { + TRANS_LOG(WARN, "alloc memory failed", KR(ret), KPC(this)); + } else if (FALSE_IT(new (tmp_buf) ObTxOpArray())) { + } else if (FALSE_IT(log_cb->get_tx_op_array() = (ObTxOpArray*)tmp_buf)) { + } else if (OB_FAIL(prepare_mds_tx_op_(log_cb->get_mds_range().get_range_array(), + SCN::min_scn(), + *log_cb->get_tx_data_guard().tx_data()->op_allocator_, + *log_cb->get_tx_op_array(), + false))) { + TRANS_LOG(WARN, "preapre tx_op failed", KR(ret), KPC(this)); } else if (OB_FAIL(submit_log_block_out_(log_block, mds_base_scn, log_cb, replay_hint, barrier_type))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); release_ctx_ref_(); @@ -8022,7 +8250,7 @@ int ObPartTransCtx::sub_end_tx(const int64_t &request_id, return ret; } -int ObPartTransCtx::supplement_undo_actions_if_exist_() +int ObPartTransCtx::supplement_tx_op_if_exist_(const bool for_replay, const SCN replay_scn) { int ret = OB_SUCCESS; @@ -8031,26 +8259,91 @@ int ObPartTransCtx::supplement_undo_actions_if_exist_() ObTxDataGuard tmp_tx_data_guard; tmp_tx_data_guard.reset(); ctx_tx_data_.get_tx_table(tx_table); - const ObTxData *tx_data = nullptr; - if (OB_FAIL(ctx_tx_data_.get_tx_data(guard))) { + if (for_replay && !replay_scn.is_valid()) { + ret = OB_INVALID_ARGUMENT; + TRANS_LOG(WARN, "supplement tx_op", KR(ret), K(for_replay), K(replay_scn), KPC(this)); + } else if (OB_FAIL(ctx_tx_data_.get_tx_data(guard))) { TRANS_LOG(ERROR, "get tx data from ctx tx data failed", KR(ret)); - } else if (OB_NOT_NULL(guard.tx_data()->undo_status_list_.head_)) { - ret = OB_ERR_UNEXPECTED; - TRANS_LOG(ERROR, "invalid ctx tx data", KR(ret), KPC(guard.tx_data())); - } else if (OB_FAIL(ctx_tx_data_.deep_copy_tx_data_out(tmp_tx_data_guard))) { - TRANS_LOG(WARN, "deep copy tx data in ctx tx data failed.", KR(ret), - K(ctx_tx_data_), KPC(this)); - } else if (OB_FAIL(tx_table->supplement_undo_actions_if_exist( - tmp_tx_data_guard.tx_data()))) { - TRANS_LOG( - WARN, - "supplement undo actions to a tx data when replaying a transaction from the middle failed.", - KR(ret), K(ctx_tx_data_), KPC(this)); + } else if (OB_FAIL(tx_table->alloc_tx_data(tmp_tx_data_guard))) { + TRANS_LOG(WARN, "alloc tx_data failed", KR(ret), KPC(this)); + } else if (FALSE_IT(tmp_tx_data_guard.tx_data()->tx_id_ = trans_id_)) { + } else if (OB_FAIL(tx_table->supplement_tx_op_if_exist(tmp_tx_data_guard.tx_data()))) { + TRANS_LOG(WARN, "supplement tx_op ", KR(ret), K(ctx_tx_data_), KPC(this)); } else if (OB_FAIL(ctx_tx_data_.recover_tx_data(tmp_tx_data_guard.tx_data()))) { TRANS_LOG(WARN, "replace tx data in ctx tx data failed.", KR(ret), K(ctx_tx_data_), KPC(this)); + } else if (for_replay && tmp_tx_data_guard.tx_data()->op_guard_.is_valid() && + OB_FAIL(recover_tx_ctx_from_tx_op_(tmp_tx_data_guard.tx_data()->op_guard_->get_tx_op_list(), replay_scn))) { + TRANS_LOG(WARN, "recover tx_ctx from tx_op failed", KR(ret)); } + TRANS_LOG(INFO, "supplement_tx_op_if_exist_", KR(ret), K(trans_id_), K(ls_id_), K(ctx_tx_data_)); + return ret; +} +int ObPartTransCtx::recover_tx_ctx_from_tx_op_(ObTxOpVector &tx_op_list, const SCN replay_scn) +{ + TRANS_LOG(INFO, "recover_tx_ctx_from_tx_op_", K(tx_op_list.get_count()), K(replay_scn), KPC(this)); + int ret = OB_SUCCESS; + // filter tx_op for this tx_ctx life_cycle + ObTxOpArray ctx_tx_op; + for (int64_t idx = 0; OB_SUCC(ret) && idx < tx_op_list.get_count(); idx++) { + ObTxOp &tx_op = *tx_op_list.at(idx); + if (tx_op.get_op_scn() < replay_scn) { + if (tx_op.get_op_code() == ObTxOpCode::ABORT_OP) { + ctx_tx_op.reuse(); + } else if (OB_FAIL(ctx_tx_op.push_back(tx_op))) { + TRANS_LOG(WARN, "push tx_op to array fail", KR(ret), KPC(this)); + } + } else { + if (tx_op.get_op_code() == ObTxOpCode::ABORT_OP) { + break; + } else if (OB_FAIL(ctx_tx_op.push_back(tx_op))) { + TRANS_LOG(WARN, "push tx_op to array fail", KR(ret), KPC(this)); + } + } + } + // recover tx_op to tx_ctx + ObTxBufferNodeArray mds_array; + for (int64_t idx = 0; OB_SUCC(ret) && idx < ctx_tx_op.count(); idx++) { + ObTxOp &tx_op = ctx_tx_op.at(idx); + if (tx_op.get_op_code() == ObTxOpCode::MDS_OP) { + ObTxBufferNodeWrapper &node_wrapper = *tx_op.get(); + const ObTxBufferNode &node = node_wrapper.get_node(); + if (OB_FAIL(mds_array.push_back(node))) { + TRANS_LOG(WARN, "failed to push node to array", KR(ret), KPC(this)); + } else if (node.type_ == ObTxDataSourceType::TABLE_LOCK) { + // to recover table_lock + ObTableLockOp lock_op; + int64_t pos = 0; + const char *buf = node.data_.ptr(); + const int64_t len = node.data_.length(); + if (OB_FAIL(lock_op.deserialize(buf, node.get_data_size(), pos))) { + TRANS_LOG(WARN, "deserialize fail", KR(ret), KPC(this)); + } else if (OB_FAIL(mt_ctx_.replay_lock(lock_op, tx_op.get_op_scn()))) { + TRANS_LOG(WARN, "recover lock_op failed", KR(ret), KPC(this)); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "recover tx_op undefined", KR(ret), KPC(this)); + } + } + ObTxBufferNodeArray _unused_; + if (FAILEDx(deep_copy_mds_array_(mds_array, _unused_))) { + TRANS_LOG(WARN, "deep copy mds array failed", KR(ret), KPC(this)); + } + int64_t mds_max_register_no = 0; + if (mds_array.count() > 0) { + mds_max_register_no = mds_array.at(mds_array.count() - 1).get_register_no(); + } + int64_t ctx_max_register_no = 0; + if (exec_info_.multi_data_source_.count() > 0) { + ctx_max_register_no = exec_info_.multi_data_source_.at(exec_info_.multi_data_source_.count() - 1).get_register_no(); + } + TRANS_LOG(INFO, "recover tx_ctx from tx_op", KR(ret), K(tx_op_list.get_count()), K(ctx_tx_op.count()), + K(mds_array.count()), K(exec_info_.multi_data_source_.count()), + K(mds_max_register_no), K(ctx_max_register_no), + KPC(this)); return ret; } @@ -8359,83 +8652,25 @@ int ObPartTransCtx::rollback_to_savepoint_(const ObTxSEQ from_scn, // _NOTICE_ must load Undo(s) from TxDataTable before overwriten if (replay_completeness_.is_unknown() && !ctx_tx_data_.has_recovered_from_tx_table() && - OB_FAIL(supplement_undo_actions_if_exist_())) { + OB_FAIL(supplement_tx_op_if_exist_(true, replay_scn))) { TRANS_LOG(WARN, "load undos from tx table fail", K(ret), KPC(this)); - } else if (OB_FAIL(ctx_tx_data_.add_undo_action(undo_action))) { - TRANS_LOG(WARN, "recrod undo info fail", K(ret), K(from_scn), K(to_scn), KPC(this)); - } else if (OB_FAIL(ctx_tx_data_.deep_copy_tx_data_out(tmp_tx_data_guard))) { - TRANS_LOG(WARN, "deep copy tx data failed", KR(ret), K(*this)); - } - - // - // when multiple branch-level savepoints were replayed out of order, to ensure - // tx-data with larger end_scn include all undo-actions of others before - // - // we do deleting in frozen memtable and updating (which with largest end_scn) in active memtable - // because distinguish frozen/active memtable is not easy, just always do those two actions. - // - // following is an illusion of this strategy: - // - // assume rollback to logs with scn of: 80 90 110 - // - // and frozen scn is 100 - // - // case 1: replay order: 110, 90, 80 - // case 2: replay order: 110, 80, 90 - // case 3: replay order: 90, 110, 80 - // case 4: replay order: 90, 80, 110 - // - // the operations of each case: - // case 1: insert 110 -> [insert 90, update 110] -> [insert 80, delete 90, udpate 110] - // case 2: insert 110 -> [insert 80 update 110] -> [insert 90, delete 80, update 110] - // case 3: insert 90 -> insert 110 -> [insert 80, delete 90, update 110] - // case 4: insert 90 -> [insert 80, update 90] -> insert 100 - // - - if (OB_SUCC(ret)) { - need_update_tx_data = ctx_tx_data_.get_max_replayed_rollback_scn() > replay_scn; - if (need_update_tx_data && OB_FAIL(ctx_tx_data_.deep_copy_tx_data_out(update_tx_data_guard))) { - TRANS_LOG(WARN, "deep copy tx data failed", KR(ret), K(*this)); - } - } - // prepare end_scn for tx-data items - if (OB_SUCC(ret)) { - tmp_tx_data_guard.tx_data()->end_scn_ = replay_scn; - if (need_update_tx_data) { - // if the tx-data will be inserted into frozen tx-data-memtable, and it may be not the one with largest end_scn - // we must delete others in order to ensure ourself is the valid one with largest end_scn - tmp_tx_data_guard.tx_data()->exclusive_flag_ = ObTxData::ExclusiveType::EXCLUSIVE; - // for update tx-data, use the same end_scn_ - update_tx_data_guard.tx_data()->end_scn_ = ctx_tx_data_.get_max_replayed_rollback_scn(); - update_tx_data_guard.tx_data()->exclusive_flag_ = ObTxData::ExclusiveType::EXCLUSIVE; - } - } - // prepare done, do the final step to insert tx-data-table, this should not fail - if (OB_SUCC(ret)) { - if (OB_FAIL(ctx_tx_data_.insert_tmp_tx_data(tmp_tx_data_guard.tx_data()))) { - TRANS_LOG(WARN, "insert to tx table failed", KR(ret), K(*this)); - } else if (need_update_tx_data && OB_FAIL(ctx_tx_data_.insert_tmp_tx_data(update_tx_data_guard.tx_data()))) { - TRANS_LOG(WARN, "insert to tx table failed", KR(ret), K(*this)); - } - } - // if this is the largest scn replayed, remember it - if (OB_SUCC(ret) && !need_update_tx_data) { - ctx_tx_data_.set_max_replayed_rollback_scn(replay_scn); + } else if (OB_FAIL(replay_undo_action_to_tx_table_(undo_action, replay_scn))) { + TRANS_LOG(WARN, "insert to tx table failed", KR(ret), K(*this)); } } else if (OB_UNLIKELY(exec_info_.max_submitted_seq_no_ > to_scn)) { /* Leader */ - ObUndoAction undo_action(from_scn, to_scn); - ObUndoStatusNode *undo_status = NULL; - if (OB_FAIL(ctx_tx_data_.prepare_add_undo_action(undo_action, tmp_tx_data_guard, undo_status))) { - TRANS_LOG(WARN, "prepare add undo action fail", K(ret), KPC(this)); - } else if (OB_FAIL(submit_rollback_to_log_(from_scn, to_scn, tmp_tx_data_guard.tx_data()))) { + ObTxDataGuard tx_data_guard; + ObTxTable *tx_table = nullptr; + ctx_tx_data_.get_tx_table(tx_table); + ObUndoAction undo(from_scn, to_scn); + if (OB_FAIL(ctx_tx_data_.get_tx_data(tx_data_guard))) { + TRANS_LOG(WARN, "get tx data failed", KR(ret)); + } else if (OB_FAIL(tx_data_guard.tx_data()->init_tx_op())) { + TRANS_LOG(WARN, "init tx op failed", KR(ret)); + } else if (OB_FAIL(tx_data_guard.tx_data()->add_undo_action(ls_tx_ctx_mgr_->get_tx_table(), + undo))) { + TRANS_LOG(WARN, "add undo action failed", KR(ret)); + } else if (OB_FAIL(submit_rollback_to_log_(from_scn, to_scn))) { TRANS_LOG(WARN, "submit undo redolog fail", K(ret), K(from_scn), K(to_scn), KPC(this)); - int tmp_ret = OB_SUCCESS; - if (OB_TMP_FAIL(ctx_tx_data_.cancel_add_undo_action(undo_status))) { - TRANS_LOG(ERROR, "cancel add undo action failed", KR(tmp_ret), KPC(this)); - } - } else if (OB_FAIL(ctx_tx_data_.commit_add_undo_action(undo_action, undo_status))) { - TRANS_LOG(ERROR, "oops, commit add undo action fail", K(ret), KPC(this)); - ob_abort(); } } @@ -8452,8 +8687,7 @@ int ObPartTransCtx::rollback_to_savepoint_(const ObTxSEQ from_scn, } int ObPartTransCtx::submit_rollback_to_log_(const ObTxSEQ from_scn, - const ObTxSEQ to_scn, - ObTxData *tx_data) + const ObTxSEQ to_scn) { int ret = OB_SUCCESS; ObTxLogBlock log_block; @@ -8484,6 +8718,8 @@ int ObPartTransCtx::submit_rollback_to_log_(const ObTxSEQ from_scn, TRANS_LOG(ERROR, "cb arg array is empty", K(ret), K(log_block)); return_log_cb_(log_cb); log_cb = NULL; + } else if (OB_FAIL(ls_tx_ctx_mgr_->get_tx_table()->alloc_tx_data(log_cb->get_tx_data_guard(), true, INT64_MAX))) { + TRANS_LOG(WARN, "alloc_tx_data failed", KR(ret), KPC(this)); } else if (OB_FAIL(submit_log_block_out_(log_block, SCN::min_scn(), log_cb, replay_hint, barrier))) { TRANS_LOG(WARN, "submit log fail", K(ret), K(log_block), KPC(this)); return_log_cb_(log_cb); @@ -8492,7 +8728,7 @@ int ObPartTransCtx::submit_rollback_to_log_(const ObTxSEQ from_scn, TRANS_LOG(ERROR, "inc TxCtx ref fail", K(ret), KPC(this)); } else if (OB_FAIL(after_submit_log_(log_block, log_cb, NULL))) { } else { - log_cb->set_tx_data(tx_data); + log_cb->set_undo_action(ObUndoAction(from_scn, to_scn)); } REC_TRANS_TRACE_EXT(tlog_, submit_rollback_log, OB_ID(ret), ret, @@ -8589,7 +8825,9 @@ int ObPartTransCtx::try_alloc_retain_ctx_func_() { int ret = OB_SUCCESS; - if (OB_ISNULL(retain_ctx_func_ptr_)) { + if (is_support_tx_op_()) { + // do nothing + } else if (OB_ISNULL(retain_ctx_func_ptr_)) { if (OB_ISNULL(retain_ctx_func_ptr_ = static_cast( ObTxRetainCtxMgr::alloc_object(sizeof(ObMDSRetainCtxFunctor))))) { @@ -8630,9 +8868,15 @@ int ObPartTransCtx::insert_into_retain_ctx_mgr_(RetainCause cause, if (for_replay) { retain_lock_timeout = 10 * 1000; } + bool need_retain_ctx = !is_support_tx_op_(); + if (need_retain_ctx) { + TRANS_LOG(INFO, "insert into retain_ctx", KPC(this), K(for_replay), K(log_ts)); + } ObTxRetainCtxMgr &retain_ctx_mgr = ls_tx_ctx_mgr_->get_retain_ctx_mgr(); - if (OB_ISNULL(ls_tx_ctx_mgr_) || RetainCause::UNKOWN == cause) { + if (!need_retain_ctx) { + // do nothing + } else if (OB_ISNULL(ls_tx_ctx_mgr_) || RetainCause::UNKOWN == cause) { ret = OB_INVALID_ARGUMENT; TRANS_LOG(WARN, "invalid argument", K(ret), K(cause), KP(ls_tx_ctx_mgr_), KPC(this)); } else if (OB_ISNULL(retain_ctx_func_ptr_)) { @@ -9609,13 +9853,9 @@ int ObPartTransCtx::collect_tx_ctx(const ObLSID dest_ls_id, TRANS_LOG(INFO, "collect_tx_ctx tx skip ctx exiting", K(trans_id_), K(ls_id_)); } else if (FALSE_IT(start_scn = get_start_log_ts())) { } else if (!start_scn.is_valid() || start_scn > data_end_scn) { - // just for check - if (sub_state_.is_transfer_blocking()) { - ret = OB_ERR_UNEXPECTED; - TRANS_LOG(WARN, "tx has transfer_blocking state unexpected", KR(ret), KPC(this), K(start_scn), K(data_end_scn)); - } else { - TRANS_LOG(INFO, "collect_tx_ctx tx skip for start_scn", K(trans_id_), K(ls_id_), K(start_scn.is_valid()), K(start_scn > data_end_scn)); - } + TRANS_LOG(INFO, "collect_tx_ctx tx skip for start_scn", K(trans_id_), K(ls_id_), K(start_scn.is_valid()), K(start_scn > data_end_scn)); + } else if (exec_info_.state_ >= ObTxState::COMMIT) { + TRANS_LOG(INFO, "collect_tx_ctx tx skip ctx has commit", K(trans_id_), K(ls_id_), K(exec_info_.state_)); } else if (!sub_state_.is_transfer_blocking()) { // just for check if (!is_contain_mds_type_(ObTxDataSourceType::START_TRANSFER_OUT_V2)) { @@ -9625,9 +9865,6 @@ int ObPartTransCtx::collect_tx_ctx(const ObLSID dest_ls_id, TRANS_LOG(INFO, "collect_tx_ctx tx skip transfer self", K(trans_id_), K(ls_id_), K(start_scn), K(start_scn > data_end_scn), K(is_contain_mds_type_(ObTxDataSourceType::START_TRANSFER_OUT_V2))); } - } else if (exec_info_.state_ >= ObTxState::COMMIT) { - TRANS_LOG(INFO, "collect_tx_ctx tx skip ctx has commit", K(trans_id_), K(ls_id_), K(exec_info_.state_)); - // filter } else if (sub_state_.is_state_log_submitting()) { ret = OB_ERR_UNEXPECTED; TRANS_LOG(WARN, "tx is driving when transfer move", KR(ret), KPC(this), @@ -9725,6 +9962,28 @@ int ObPartTransCtx::check_is_aborted_in_tx_data_(const ObTransID tx_id, return ret; } +int ObPartTransCtx::load_tx_op_if_exist_() +{ + int ret = OB_SUCCESS; + ObTxData *tx_data = NULL; + ObTxTableGuard tx_table_guard; + if (OB_FAIL(ctx_tx_data_.get_tx_data_ptr(tx_data))) { + TRANS_LOG(WARN, "get_tx_data failed", KR(ret), KPC(this)); + } else if (OB_ISNULL(tx_data)) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "tx_data is null", KR(ret), KPC(this)); + } else if (OB_FAIL(ls_tx_ctx_mgr_->get_tx_table_guard(tx_table_guard))) { + TRANS_LOG(WARN, "get_tx_table failed", KR(ret), KPC(this)); + } else if (OB_FAIL(tx_table_guard.load_tx_op(trans_id_, *tx_data))) { + if (OB_TRANS_CTX_NOT_EXIST != ret) { + TRANS_LOG(WARN, "load_tx_op failed", KR(ret), KPC(this)); + } else { + ret = OB_SUCCESS; + } + } + return ret; +} + // NB: This function can report a retryable error because the outer while loop // will ignore the error and continuously retry until it succeeds within the // callback function. @@ -10001,20 +10260,21 @@ int ObPartTransCtx::update_tx_data_start_and_end_scn_(const SCN start_scn, TRANS_LOG(WARN, "tx table is null", KR(ret), KPC(this)); } else if (OB_FAIL(ctx_tx_data_.get_tx_data(tx_data_guard))) { TRANS_LOG(WARN, "get tx_data failed", KR(ret)); - } else if (OB_FAIL(tx_table->deep_copy_tx_data(tx_data_guard, tmp_tx_data_guard))) { + } else if (OB_FAIL(tx_table->alloc_tx_data(tmp_tx_data_guard))) { TRANS_LOG(WARN, "copy tx data failed", KR(ret), KPC(this)); } else { - ObTxData *tx_data = tmp_tx_data_guard.tx_data(); + ObTxData *new_tx_data = tmp_tx_data_guard.tx_data(); + *new_tx_data = *tx_data_guard.tx_data(); if (start_scn.is_valid()) { share::SCN current_start_scn = get_start_log_ts(); if (current_start_scn.is_valid()) { - tx_data->start_scn_.atomic_store(MIN(start_scn, current_start_scn)); + new_tx_data->start_scn_.atomic_store(MIN(start_scn, current_start_scn)); } else { - tx_data->start_scn_.atomic_store(start_scn); + new_tx_data->start_scn_.atomic_store(start_scn); } } - tx_data->end_scn_.atomic_store(end_scn); - if (OB_FAIL(tx_table->insert(tx_data))) { + new_tx_data->end_scn_.atomic_store(end_scn); + if (OB_FAIL(tx_table->insert(new_tx_data))) { TRANS_LOG(WARN, "insert tx data failed", KR(ret), KPC(this)); } } @@ -10153,17 +10413,17 @@ inline bool ObPartTransCtx::has_replay_serial_final_() const exec_info_.max_applied_log_ts_ >= exec_info_.serial_final_scn_; } -int ObPartTransCtx::set_replay_incomplete() { +int ObPartTransCtx::set_replay_incomplete(const share::SCN log_ts) { CtxLockGuard guard(lock_); - return set_replay_completeness_(false); + return set_replay_completeness_(false, log_ts); } -int ObPartTransCtx::set_replay_completeness_(const bool complete) +int ObPartTransCtx::set_replay_completeness_(const bool complete, const SCN replay_scn) { int ret = OB_SUCCESS; if (OB_UNLIKELY(replay_completeness_.is_unknown())) { if (!complete && !ctx_tx_data_.has_recovered_from_tx_table()) { - if (OB_FAIL(supplement_undo_actions_if_exist_())) { + if (OB_FAIL(supplement_tx_op_if_exist_(true, replay_scn))) { TRANS_LOG(WARN, "load Undo(s) from tx-table fail", K(ret), KPC(this)); } else { TRANS_LOG(INFO, "replay from middle, load Undo(s) from tx-table succuess", @@ -10186,6 +10446,11 @@ inline bool ObPartTransCtx::is_support_parallel_replay_() const return cluster_version_accurate_ && cluster_version_ >= CLUSTER_VERSION_4_3_0_0; } +inline bool ObPartTransCtx::is_support_tx_op_() const +{ + return cluster_version_accurate_ && cluster_version_ >= CLUSTER_VERSION_4_3_2_0; +} + inline int ObPartTransCtx::switch_to_parallel_logging_(const share::SCN serial_final_scn, const ObTxSEQ max_seq_no) { @@ -10256,5 +10521,44 @@ int ObPartTransCtx::get_stat_for_virtual_table(share::ObLSArray &participants, i return ret; } +int ObPartTransCtx::check_need_transfer( + const SCN data_end_scn, + ObIArray &tablet_list, + bool &need_transfer) +{ + int ret = OB_SUCCESS; + need_transfer = true; + SCN start_scn; + const int64_t LOCK_OP_CHECK_LIMIT = 100; + CtxLockGuard guard(lock_); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + TRANS_LOG(WARN, "ObPartTransCtx not inited", KR(ret)); + } else if (!data_end_scn.is_valid() || tablet_list.empty()) { + ret = OB_INVALID_ARGUMENT; + TRANS_LOG(WARN, "invalid args", KR(ret), K(data_end_scn), K(tablet_list)); + } else if (FALSE_IT(start_scn = get_start_log_ts())) { + } else if (!start_scn.is_valid() || start_scn > data_end_scn) { + // filter + need_transfer = false; + } else if (exec_info_.state_ >= ObTxState::COMMIT) { + // filter + need_transfer = false; + } else if (mt_ctx_.get_lock_mem_ctx().get_lock_op_count() > LOCK_OP_CHECK_LIMIT) { + // too many lock_op just transfer + } else { + bool contain = false; + for (int64_t idx =0; OB_SUCC(ret) && !contain && idx < tablet_list.count(); idx++) { + if (OB_FAIL(mt_ctx_.get_lock_mem_ctx().check_contain_tablet(tablet_list.at(idx), contain))) { + TRANS_LOG(WARN, "check lock_ctx contain tablet fail", KR(ret), K(tablet_list.at(idx)), K(trans_id_), K(ls_id_)); + } + } + if (OB_SUCC(ret) && !contain) { + need_transfer = false; + } + } + return ret; +} + } // namespace transaction } // namespace oceanbase diff --git a/src/storage/tx/ob_trans_part_ctx.h b/src/storage/tx/ob_trans_part_ctx.h index 307d62e5a..ef2060957 100644 --- a/src/storage/tx/ob_trans_part_ctx.h +++ b/src/storage/tx/ob_trans_part_ctx.h @@ -458,7 +458,7 @@ public: void check_no_need_replay_checksum(const share::SCN &log_ts, const int index); bool is_replay_complete_unknown() const { return replay_completeness_.is_unknown(); } - int set_replay_incomplete(); + int set_replay_incomplete(const share::SCN log_ts); // return the min log ts of those logs which are submitted but // not callbacked yet, if there is no such log return INT64_MAX const share::SCN get_min_undecided_log_ts() const; @@ -478,7 +478,8 @@ public: int switch_to_leader(const share::SCN &start_working_ts); int switch_to_follower_gracefully(ObTxCommitCallback *&cb_list); int resume_leader(const share::SCN &start_working_ts); - int supplement_undo_actions_if_exist_(); + int supplement_tx_op_if_exist_(const bool for_replay, const share::SCN replay_scn); + int recover_tx_ctx_from_tx_op_(ObTxOpVector &tx_op_list, const share::SCN replay_scn); void set_role_state(const bool for_replay) { @@ -658,6 +659,15 @@ private: int deep_copy_mds_array_(const ObTxBufferNodeArray &mds_array, ObTxBufferNodeArray &incremental_array, bool need_replace = false); + int prepare_mds_tx_op_(const ObTxBufferNodeArray &mds_array, + share::SCN op_scn, + share::ObTenantTxDataOpAllocator &tx_op_allocator, + ObTxOpArray &tx_op_list, + bool is_replay); + int replay_mds_to_tx_table_(const ObTxBufferNodeArray &mds_node_array, const share::SCN op_scn); + int insert_mds_to_tx_table_(ObTxLogCb &log_cb); + int insert_undo_action_to_tx_table_(ObUndoAction &undo_action, ObTxDataGuard &new_tx_data_guard, const share::SCN op_scn); + int replay_undo_action_to_tx_table_(ObUndoAction &undo_action, const share::SCN op_scn); int decide_state_log_barrier_type_(const ObTxLogType &state_log_type, logservice::ObReplayBarrierType &final_barrier_type); bool is_contain_mds_type_(const ObTxDataSourceType target_type); @@ -684,8 +694,9 @@ private: const share::SCN ×tamp, const int64_t &part_log_no); bool is_support_parallel_replay_() const; - int set_replay_completeness_(const bool complete); + int set_replay_completeness_(const bool complete, const share::SCN replay_scn); int errsim_notify_mds_(); + bool is_support_tx_op_() const; protected: virtual int get_gts_(share::SCN >s); virtual int wait_gts_elapse_commit_version_(bool &need_wait); @@ -805,6 +816,9 @@ private: // ======================= for transfer =============================== public: + int check_need_transfer(const share::SCN data_end_scn, + ObIArray &tablet_list, + bool &need_transfer); int do_transfer_out_tx_op(const share::SCN data_end_scn, const share::SCN op_scn, const NotifyType op_type, @@ -825,6 +839,7 @@ public: bool is_exec_complete_without_lock(ObLSID ls_id, int64_t epoch, int64_t transfer_epoch); private: int transfer_op_log_cb_(share::SCN op_scn, NotifyType op_type); + int load_tx_op_if_exist_(); int update_tx_data_start_and_end_scn_(const share::SCN start_scn, const share::SCN end_scn, const share::SCN transfer_scn); @@ -918,8 +933,7 @@ private: const ObTxSEQ to_scn, const share::SCN replay_scn = share::SCN::invalid_scn()); int submit_rollback_to_log_(const ObTxSEQ from_scn, - const ObTxSEQ to_scn, - ObTxData *tx_data); + const ObTxSEQ to_scn); int set_state_info_array_(); int update_state_info_array_(const ObStateInfo& state_info); int update_state_info_array_with_transfer_parts_(const ObTxCommitParts &parts, const ObLSID &ls_id); diff --git a/src/storage/tx/ob_trans_submit_log_cb.cpp b/src/storage/tx/ob_trans_submit_log_cb.cpp index 44c63bee3..a79fd8b94 100644 --- a/src/storage/tx/ob_trans_submit_log_cb.cpp +++ b/src/storage/tx/ob_trans_submit_log_cb.cpp @@ -93,6 +93,18 @@ int ObTxLogCb::init(const ObLSID &key, return ret; } +void ObTxLogCb::reset_tx_op_array() +{ + if (OB_NOT_NULL(tx_op_array_)) { + for (int64_t idx = 0; idx < tx_op_array_->count(); idx++) { + tx_op_array_->at(idx).release(); + } + tx_op_array_->~ObTxOpArray(); + mtl_free(tx_op_array_); + tx_op_array_ = nullptr; + } +} + void ObTxLogCb::reset() { ObTxBaseLogCb::reset(); @@ -115,6 +127,7 @@ void ObTxLogCb::reset() // is_callbacking_ = false; first_part_scn_.invalid_scn(); + reset_tx_op_array(); } void ObTxLogCb::reuse() @@ -132,6 +145,7 @@ void ObTxLogCb::reuse() need_free_extra_cb_ = false; first_part_scn_.invalid_scn(); + reset_tx_op_array(); } ObTxLogType ObTxLogCb::get_last_log_type() const diff --git a/src/storage/tx/ob_trans_submit_log_cb.h b/src/storage/tx/ob_trans_submit_log_cb.h index ac3439918..9e3f957b4 100644 --- a/src/storage/tx/ob_trans_submit_log_cb.h +++ b/src/storage/tx/ob_trans_submit_log_cb.h @@ -28,6 +28,7 @@ #include "storage/tx/ob_tx_log.h" #include "storage/memtable/mvcc/ob_mvcc_trans_ctx.h" #include "storage/memtable/ob_redo_log_generator.h" +#include "storage/tx/ob_tx_data_op.h" namespace oceanbase { @@ -74,13 +75,14 @@ class ObTxLogCb : public ObTxBaseLogCb, public common::ObDLinkBase { public: - ObTxLogCb() : extra_cb_(nullptr), need_free_extra_cb_(false) { reset(); } + ObTxLogCb() : extra_cb_(nullptr), need_free_extra_cb_(false), tx_op_array_(nullptr) { reset(); } ~ObTxLogCb() { destroy(); } int init(const share::ObLSID &key, const ObTransID &trans_id, ObTransCtx *ctx, const bool is_dynamic); void reset(); + void reset_tx_op_array(); void reuse(); void destroy() { reset(); } ObTxLogType get_last_log_type() const; @@ -93,7 +95,13 @@ public: tx_data_guard_.init(tx_data); } } + void set_undo_action(const ObUndoAction &undo_action) { + undo_action_ = undo_action; + } + ObUndoAction &get_undo_action() { return undo_action_; } + ObTxOpArray *&get_tx_op_array() { return tx_op_array_; } ObTxData* get_tx_data() { return tx_data_guard_.tx_data(); } + ObTxDataGuard &get_tx_data_guard() { return tx_data_guard_; } int set_callbacks(const ObCallbackScopeArray &callbacks) { return callbacks_.assign(callbacks); } ObCallbackScopeArray& get_callbacks() { return callbacks_; } int reserve_callbacks(int cnt) { return callbacks_.reserve(cnt); } @@ -161,6 +169,8 @@ private: storage::ObDDLIncLogBasic dli_batch_key_; logservice::AppendCb * extra_cb_; bool need_free_extra_cb_; + ObUndoAction undo_action_; + storage::ObTxOpArray *tx_op_array_; //bool is_callbacking_; }; diff --git a/src/storage/tx/ob_tx_data_define.cpp b/src/storage/tx/ob_tx_data_define.cpp index a342edd8a..fc1b85f43 100644 --- a/src/storage/tx/ob_tx_data_define.cpp +++ b/src/storage/tx/ob_tx_data_define.cpp @@ -14,8 +14,11 @@ #include "lib/utility/ob_unify_serialize.h" #include "storage/tx_table/ob_tx_table.h" #include "share/rc/ob_tenant_base.h" +#include "share/allocator/ob_shared_memory_allocator_mgr.h" +#include "storage/tx/ob_tx_data_op.h" using namespace oceanbase::share; +using namespace oceanbase::transaction; namespace oceanbase { @@ -267,6 +270,7 @@ const char* ObTxCommitData::get_state_string(int32_t state) int ObTxData::serialize(char *buf, const int64_t buf_len, int64_t &pos) const { int ret = OB_SUCCESS; + int64_t pos_tmp = 0; const int64_t len = get_serialize_size_(); if (OB_UNLIKELY(OB_ISNULL(buf) || buf_len <= 0 || pos > buf_len)) { @@ -277,8 +281,9 @@ int ObTxData::serialize(char *buf, const int64_t buf_len, int64_t &pos) const K(pos)); } else if (OB_FAIL(serialization::encode_vi64(buf, buf_len, pos, len))) { STORAGE_LOG(WARN, "encode length of ObTxData failed.", KR(ret), KP(buf), K(buf_len), K(pos)); - } else if (OB_FAIL(serialize_(buf, buf_len, pos))) { - STORAGE_LOG(WARN, "serialize_ of ObTxData failed.", KR(ret), KP(buf), K(buf_len), K(pos)); + } else if (FALSE_IT(pos_tmp = pos)) { + } else if (OB_FAIL(serialize_(buf, pos + len, pos))) { + STORAGE_LOG(WARN, "serialize_ of ObTxData failed.", KR(ret), KP(buf), K(buf_len), K(pos), K(pos_tmp)); } return ret; } @@ -298,10 +303,28 @@ int ObTxData::serialize_(char *buf, const int64_t buf_len, int64_t &pos) const STORAGE_LOG(WARN, "serialize start_scn fail.", KR(ret), K(pos), K(buf_len)); } else if (OB_FAIL(end_scn_.serialize(buf, buf_len, pos))) { STORAGE_LOG(WARN, "serialize end_scn fail.", KR(ret), K(pos), K(buf_len)); - } else if (OB_FAIL(undo_status_list_.serialize(buf, buf_len, pos))) { - STORAGE_LOG(WARN, "serialize undo_status_list fail.", KR(ret), K(pos), K(buf_len)); } - + uint64_t data_version = 0; + if (FAILEDx(GET_MIN_DATA_VERSION(MTL_ID(), data_version))) { + STORAGE_LOG(WARN, "fail to get data version", KR(ret)); + } else if (data_version < DATA_VERSION_4_3_2_0) { + if (op_guard_.is_valid()) { + if (OB_FAIL(op_guard_->get_undo_status_list().serialize(buf, buf_len, pos))) { + STORAGE_LOG(WARN, "serialize undo_status_list fail.", KR(ret), K(pos), K(buf_len)); + } + } else { + ObUndoStatusList dummy_undo; + if (OB_FAIL(dummy_undo.serialize(buf, buf_len, pos))) { + STORAGE_LOG(WARN, "serialize undo_status_list fail.", KR(ret), K(pos), K(buf_len)); + } + } + } else if (op_guard_.is_valid()) { + if (OB_FAIL(op_guard_->get_undo_status_list().serialize(buf, buf_len, pos))) { + STORAGE_LOG(WARN, "serialize undo_status_list fail.", KR(ret), K(pos), K(buf_len)); + } else if (OB_FAIL(op_guard_->get_tx_op_list().serialize(buf, buf_len, pos))) { + STORAGE_LOG(WARN, "serialize tx_op_list fail.", KR(ret), K(pos), K(buf_len)); + } + } return ret; } @@ -324,13 +347,32 @@ int64_t ObTxData::get_serialize_size_() const len += commit_version_.get_serialize_size(); len += start_scn_.get_serialize_size(); len += end_scn_.get_serialize_size(); - len += undo_status_list_.get_serialize_size(); + int ret = OB_SUCCESS; + uint64_t data_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(MTL_ID(), data_version))) { + STORAGE_LOG(ERROR, "get data_version failed", KR(ret)); + } + if (data_version < DATA_VERSION_4_3_2_0) { + if (op_guard_.is_valid()) { + len += op_guard_->get_undo_status_list().get_serialize_size(); + } else { + ObUndoStatusList dummy_undo; + len += dummy_undo.get_serialize_size(); + } + } else if (op_guard_.is_valid()) { + len += op_guard_->get_undo_status_list().get_serialize_size(); + len += op_guard_->get_tx_op_list().get_serialize_size(); + } return len; } -int64_t ObTxData::size() const +int64_t ObTxData::size_need_cache() const { - int64_t len = (TX_DATA_SLICE_SIZE * (1LL + undo_status_list_.undo_node_cnt_)); + int64_t len = TX_DATA_SLICE_SIZE; + if (op_guard_.is_valid()) { + len += TX_DATA_SLICE_SIZE; // tx_op + len += TX_DATA_SLICE_SIZE * op_guard_->get_undo_status_list().undo_node_cnt_; + } return len; } @@ -342,6 +384,7 @@ int ObTxData::deserialize(const char *buf, int ret = OB_SUCCESS; int64_t version = 0; int64_t len = 0; + int64_t pos_tmp = 0; if (OB_UNLIKELY(nullptr == buf || data_len <= 0 || pos > data_len)) { ret = OB_INVALID_ARGUMENT; @@ -356,8 +399,9 @@ int ObTxData::deserialize(const char *buf, } else if (OB_UNLIKELY(pos + len > data_len)) { ret = OB_INVALID_SIZE; STORAGE_LOG(WARN, "length from deserialize is invalid.", KR(ret), K(pos), K(len), K(data_len)); - } else if (OB_FAIL(deserialize_(buf, data_len, pos, slice_allocator))) { - STORAGE_LOG(WARN, "deserialize tx data failed.", KR(ret)); + } else if (FALSE_IT(pos_tmp = pos)) { + } else if (OB_FAIL(deserialize_(buf, pos + len, pos, slice_allocator))) { + STORAGE_LOG(WARN, "deserialize tx data failed.", KR(ret), K(buf), K(pos), K(len), K(pos_tmp), K(data_len)); } return ret; @@ -380,10 +424,17 @@ int ObTxData::deserialize_(const char *buf, STORAGE_LOG(WARN, "deserialize start_scn fail.", KR(ret), K(pos), K(data_len)); } else if (OB_FAIL(end_scn_.deserialize(buf, data_len, pos))) { STORAGE_LOG(WARN, "deserialize end_scn fail.", KR(ret), K(pos), K(data_len)); - } else if (OB_FAIL(undo_status_list_.deserialize(buf, data_len, pos, tx_data_allocator))) { - STORAGE_LOG(WARN, "deserialize undo_status_list fail.", KR(ret), K(pos), K(data_len)); } - + if (OB_SUCC(ret) && pos < data_len) { + if (OB_FAIL(init_tx_op())) { + STORAGE_LOG(WARN, "init tx op fail", KR(ret)); + } else if (OB_FAIL(op_guard_->get_undo_status_list().deserialize(buf, data_len, pos, tx_data_allocator))) { + STORAGE_LOG(WARN, "deserialize undo_status_list fail.", KR(ret), K(pos), K(data_len)); + } else if (pos < data_len && OB_FAIL(op_guard_->get_tx_op_list().deserialize(buf, data_len, pos, + MTL(ObSharedMemAllocMgr*)->tx_data_op_allocator()))) { + STORAGE_LOG(WARN, "deserialize tx_op_list fail.", KR(ret), K(pos), K(data_len)); + } + } return ret; } @@ -396,9 +447,9 @@ void ObTxData::reset() ob_abort(); } ObTxCommitData::reset(); + op_guard_.reset(); tx_data_allocator_ = nullptr; ref_cnt_ = 0; - undo_status_list_.reset(); } ObTxData::ObTxData(const ObTxData &rhs) @@ -413,7 +464,9 @@ ObTxData &ObTxData::operator=(const ObTxData &rhs) commit_version_ = rhs.commit_version_; start_scn_ = rhs.start_scn_; end_scn_ = rhs.end_scn_; - undo_status_list_ = rhs.undo_status_list_; + if (rhs.op_guard_.is_valid()) { + op_guard_.init(rhs.op_guard_.ptr()); + } return *this; } @@ -424,7 +477,7 @@ ObTxData &ObTxData::operator=(const ObTxCommitData &rhs) commit_version_ = rhs.commit_version_; start_scn_ = rhs.start_scn_; end_scn_ = rhs.end_scn_; - undo_status_list_.reset(); + op_guard_.reset(); return *this; } @@ -441,9 +494,6 @@ bool ObTxData::is_valid_in_tx_data_table() const if (!end_scn_.is_valid()) { bool_ret = false; STORAGE_LOG_RET(ERROR, OB_INVALID_ERROR, "tx data end log ts is invalid", KPC(this)); - } else if (OB_ISNULL(undo_status_list_.head_)) { - bool_ret = false; - STORAGE_LOG_RET(ERROR, OB_INVALID_ERROR, "tx data undo status list is invalid", KPC(this)); } else { bool_ret = true; } @@ -468,54 +518,91 @@ bool ObTxData::is_valid_in_tx_data_table() const return bool_ret; } -int ObTxData::add_undo_action(ObTxTable *tx_table, transaction::ObUndoAction &new_undo_action, ObUndoStatusNode *undo_node) +int ObTxData::reserve_undo(ObTxTable *tx_table) { - // STORAGE_LOG(DEBUG, "do add_undo_action"); int ret = OB_SUCCESS; - SpinWLockGuard guard(undo_status_list_.lock_); ObTxDataTable *tx_data_table = nullptr; - ObUndoStatusNode *node = undo_status_list_.head_; if (OB_ISNULL(tx_table)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "tx table is nullptr.", KR(ret)); } else if (OB_ISNULL(tx_data_table = tx_table->get_tx_data_table())) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "tx data table in tx table is nullptr.", KR(ret)); - } else if (OB_FAIL(merge_undo_actions_(tx_data_table, node, new_undo_action))) { - STORAGE_LOG(WARN, "merge undo actions fail.", KR(ret), K(new_undo_action)); - } else if (!new_undo_action.is_valid()) { - // if new_undo_action is merged, it will be set to invalid and skip insert + } else if (!op_guard_.is_valid()) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "tx data op is nullptr", KR(ret)); } else { - // generate new node if current node cannot be inserted + SpinWLockGuard lock_guard(op_guard_->get_lock()); + SpinWLockGuard guard(op_guard_->get_undo_status_list().lock_); + ObUndoStatusNode *node = op_guard_->get_undo_status_list().head_; if (OB_ISNULL(node) || node->size_ >= TX_DATA_UNDO_ACT_MAX_NUM_PER_NODE) { ObUndoStatusNode *new_node = nullptr; - if (OB_NOT_NULL(undo_node)) { - new_node = undo_node; - undo_node = NULL; - } else if (OB_FAIL(tx_data_table->alloc_undo_status_node(new_node))) { + if (OB_FAIL(tx_data_table->alloc_undo_status_node(new_node))) { STORAGE_LOG(WARN, "alloc_undo_status_node() fail", KR(ret)); - } - - if (OB_SUCC(ret)) { - new_node->next_ = node; - undo_status_list_.head_ = new_node; - node = new_node; - undo_status_list_.undo_node_cnt_++; - } - } - - if (OB_SUCC(ret)) { - if (OB_NOT_NULL(node)) { - node->undo_actions_[node->size_++] = new_undo_action; } else { - ret = OB_ERR_UNEXPECTED; - STORAGE_LOG(ERROR, "node is unexpected nullptr", KR(ret), KPC(this)); + new_node->next_ = node; + op_guard_->get_undo_status_list().head_ = new_node; + op_guard_->get_undo_status_list().undo_node_cnt_++; } } } + return ret; +} - if (OB_NOT_NULL(undo_node)) { - tx_data_table->free_undo_status_node(undo_node); +int ObTxData::add_undo_action(ObTxTable *tx_table, transaction::ObUndoAction &new_undo_action, ObUndoStatusNode *undo_node) +{ + // STORAGE_LOG(DEBUG, "do add_undo_action"); + int ret = OB_SUCCESS; + ObTxDataTable *tx_data_table = nullptr; + if (OB_ISNULL(tx_table)) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "tx table is nullptr.", KR(ret)); + } else if (OB_ISNULL(tx_data_table = tx_table->get_tx_data_table())) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "tx data table in tx table is nullptr.", KR(ret)); + } else if (!op_guard_.is_valid()) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "tx data op is nullptr", KR(ret)); + } else { + SpinWLockGuard lock_guard(op_guard_->get_lock()); + SpinWLockGuard guard(op_guard_->get_undo_status_list().lock_); + ObUndoStatusNode *node = op_guard_->get_undo_status_list().head_; + if (OB_FAIL(merge_undo_actions_(tx_data_table, node, new_undo_action))) { + STORAGE_LOG(WARN, "merge undo actions fail.", KR(ret), K(new_undo_action)); + } else if (!new_undo_action.is_valid()) { + // if new_undo_action is merged, it will be set to invalid and skip insert + } else { + // generate new node if current node cannot be inserted + if (OB_ISNULL(node) || node->size_ >= TX_DATA_UNDO_ACT_MAX_NUM_PER_NODE) { + ObUndoStatusNode *new_node = nullptr; + if (OB_NOT_NULL(undo_node)) { + new_node = undo_node; + undo_node = NULL; + } else if (OB_FAIL(tx_data_table->alloc_undo_status_node(new_node))) { + STORAGE_LOG(WARN, "alloc_undo_status_node() fail", KR(ret)); + } + + if (OB_SUCC(ret)) { + new_node->next_ = node; + op_guard_->get_undo_status_list().head_ = new_node; + node = new_node; + op_guard_->get_undo_status_list().undo_node_cnt_++; + } + } + + if (OB_SUCC(ret)) { + if (OB_NOT_NULL(node)) { + node->undo_actions_[node->size_++] = new_undo_action; + } else { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(ERROR, "node is unexpected nullptr", KR(ret), KPC(this)); + } + } + } + + if (OB_NOT_NULL(undo_node)) { + tx_data_table->free_undo_status_node(undo_node); + } } return ret; } @@ -543,15 +630,15 @@ int ObTxData::merge_undo_actions_(ObTxDataTable *tx_data_table, // all undo actions in this node are merged, free it // STORAGE_LOG(DEBUG, "current node is empty, now free it"); ObUndoStatusNode *node_to_free = node; - undo_status_list_.head_ = node->next_; - node = undo_status_list_.head_; + op_guard_->get_undo_status_list().head_ = node->next_; + node = op_guard_->get_undo_status_list().head_; tx_data_table->free_undo_status_node(node_to_free); - if (undo_status_list_.undo_node_cnt_ <= 0) { + if (op_guard_->get_undo_status_list().undo_node_cnt_ <= 0) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(ERROR, "invalid undo node count int undo status list.", KR(ret), - K(undo_status_list_)); + K(op_guard_->get_undo_status_list())); } else { - undo_status_list_.undo_node_cnt_--; + op_guard_->get_undo_status_list().undo_node_cnt_--; } } else { // merge undo actions done @@ -580,12 +667,15 @@ bool ObTxData::equals_(ObTxData &rhs) } else if (end_scn_ != rhs.end_scn_) { bool_ret = false; STORAGE_LOG(INFO, "end_scn is not equal."); - } else if (undo_status_list_.undo_node_cnt_ != rhs.undo_status_list_.undo_node_cnt_) { - bool_ret = false; - STORAGE_LOG(INFO, "undo_node_cnt is not equal."); } else { - ObUndoStatusNode *l_node = undo_status_list_.head_; - ObUndoStatusNode *r_node = rhs.undo_status_list_.head_; + ObUndoStatusNode *l_node = NULL; + if (op_guard_.is_valid()) { + l_node = op_guard_->get_undo_status_list().head_; + } + ObUndoStatusNode *r_node = NULL; + if (rhs.op_guard_.is_valid()) { + r_node = rhs.op_guard_->get_undo_status_list().head_; + } while ((nullptr != l_node) && (nullptr != r_node)) { if (l_node->size_ != r_node->size_) { @@ -632,7 +722,9 @@ void ObTxData::print_to_stderr(const ObTxData &tx_data) to_cstring(tx_data.commit_version_), get_state_string(tx_data.state_)); - tx_data.undo_status_list_.dump_2_text(stderr); + if (tx_data.op_guard_.is_valid()) { + tx_data.op_guard_->get_undo_status_list().dump_2_text(stderr); + } } void ObTxData::dump_2_text(FILE *fd) const @@ -650,7 +742,9 @@ void ObTxData::dump_2_text(FILE *fd) const to_cstring(commit_version_), get_state_string(state_)); - undo_status_list_.dump_2_text(fd); + if (op_guard_.is_valid()) { + op_guard_->get_undo_status_list().dump_2_text(fd); + } fprintf(fd, "\n}\n"); } @@ -665,7 +759,7 @@ DEF_TO_STRING(ObTxData) K_(commit_version), K_(start_scn), K_(end_scn), - K_(undo_status_list)); + K_(op_guard)); J_OBJ_END(); return pos; } @@ -679,7 +773,65 @@ DEF_TO_STRING(ObUndoStatusNode) return pos; } +int ObTxData::init_tx_op() +{ + int ret = OB_SUCCESS; + void *ptr = nullptr; + if (!op_guard_.is_valid()) { + if (OB_ISNULL(tx_data_allocator_)) { + tx_data_allocator_ = &MTL(ObSharedMemAllocMgr*)->tx_data_allocator(); + } + if (OB_ISNULL(op_allocator_)) { + op_allocator_ = &MTL(ObSharedMemAllocMgr*)->tx_data_op_allocator(); + } + if (OB_ISNULL(ptr = tx_data_allocator_->alloc())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + STORAGE_LOG(WARN, "allocate memory from slice_allocator fail.", KR(ret), KP(this)); + } else { + ObTxDataOp *tx_data_op = new (ptr) ObTxDataOp(tx_data_allocator_, op_allocator_); + op_guard_.init(tx_data_op); + } + } + return ret; +} + +int ObTxData::check_tx_op_exist(share::SCN op_scn, bool &exist) +{ + int ret = OB_SUCCESS; + exist = false; + if (op_guard_.is_valid()) { + ObTxOpVector &tx_op_list = op_guard_->get_tx_op_list(); + if (tx_op_list.get_count() > 0 && op_scn <= tx_op_list.at(tx_op_list.get_count() - 1)->get_op_scn()) { + exist = true; + } + } + return ret; +} + +int ObTxDataOpGuard::init(ObTxDataOp *tx_data_op) +{ + int ret = OB_SUCCESS; + reset(); + if (OB_ISNULL(tx_data_op)) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "init ObTxDataOpGuard with invalid arguments", KR(ret)); + } else if (tx_data_op->inc_ref() <= 0) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(ERROR, "unexpected ref cnt on tx data op", KR(ret), KP(tx_data_op), KPC(tx_data_op)); + ob_abort(); + } else { + tx_data_op_ = tx_data_op; + } + return ret; +} + +void ObTxDataOpGuard::reset() +{ + if (OB_NOT_NULL(tx_data_op_)) { + tx_data_op_->dec_ref(); + tx_data_op_ = nullptr; + } +} } // namespace storage - } // namespace oceanbase diff --git a/src/storage/tx/ob_tx_data_define.h b/src/storage/tx/ob_tx_data_define.h index 0ceefb22a..115a6c71c 100644 --- a/src/storage/tx/ob_tx_data_define.h +++ b/src/storage/tx/ob_tx_data_define.h @@ -19,10 +19,10 @@ #include "storage/tx/ob_committer_define.h" #include "storage/tx/ob_trans_define.h" #include "storage/tx_table/ob_tx_data_hash_map.h" +#include "storage/tx/ob_trans_factory.h" namespace oceanbase { - namespace storage { class ObTxData; @@ -30,6 +30,8 @@ class ObTxTable; class ObTxDataTable; class ObTxDataMemtable; class ObTxDataMemtableMgr; +class ObTxDataOp; + // The memory structures associated with tx data are shown below. They are designed for several // reasons: @@ -132,7 +134,6 @@ struct ObTxDataLinkNode TO_STRING_KV(KP_(next)); }; - struct ObUndoStatusList { private: @@ -221,7 +222,6 @@ public: share::SCN end_scn_; }; - class ObTxDataLink { public: @@ -232,6 +232,29 @@ public: ObTxDataLinkNode hash_node_; }; +class ObTxDataOpGuard +{ +public: + ObTxDataOpGuard() : tx_data_op_(nullptr) {} + ~ObTxDataOpGuard() { reset(); } + int init(ObTxDataOp *tx_data_op); + bool is_valid() const { return tx_data_op_ != nullptr; } + void reset(); + ObTxDataOp *ptr() const { return tx_data_op_; } + ObTxDataOp &operator*() { + return *tx_data_op_; + } + ObTxDataOp* operator->() { + return tx_data_op_; + } + ObTxDataOp* operator->() const { + return tx_data_op_; + } + TO_STRING_KV(KP(tx_data_op_)); +private: + ObTxDataOp *tx_data_op_; +}; + // DONT : Modify this definition class ObTxData : public ObTxCommitData, public ObTxDataLink { @@ -248,8 +271,8 @@ public: : ObTxCommitData(), ObTxDataLink(), tx_data_allocator_(nullptr), + op_allocator_(nullptr), ref_cnt_(0), - undo_status_list_(), exclusive_flag_(ExclusiveType::NORMAL) {} ObTxData(const ObTxData &rhs); ObTxData &operator=(const ObTxData &rhs); @@ -260,6 +283,8 @@ public: void reset(); OB_INLINE bool contain(const transaction::ObTransID &tx_id) { return tx_id_ == tx_id; } + int init_tx_op(); + int reserve_undo(ObTxTable *tx_table); int64_t inc_ref() { int64_t ref_cnt = ATOMIC_AAF(&ref_cnt_, 1); @@ -275,23 +300,17 @@ public: STORAGE_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "invalid slice allocator", KPC(this)); ob_abort(); } else if (0 == ATOMIC_SAF(&ref_cnt_, 1)) { - if (OB_UNLIKELY(nullptr != undo_status_list_.head_)) { - ObUndoStatusNode *node_ptr = undo_status_list_.head_; - ObUndoStatusNode *node_to_free = nullptr; - while (nullptr != node_ptr) { - node_to_free = node_ptr; - node_ptr = node_ptr->next_; - tx_data_allocator_->free(node_to_free); - } - } + op_guard_.reset(); tx_data_allocator_->free(this); } } + int check_tx_op_exist(share::SCN op_scn, bool &exist); + /** * @brief Add a undo action with dynamically memory allocation. * See more details in alloc_undo_status_node() function of class ObTxDataTable - * + * * @param[in] tx_table, the tx table contains this tx data * @param[in & out] undo_action, the undo action which is waiting to be added. If this undo action contains exsiting undo actions, the existing undo actions will be deleted and this undo action will be modified to contain all the deleted undo actions. * @param[in] undo_node, the undo status node can be used to extend undo status list if required, otherwise it will be released @@ -306,7 +325,7 @@ public: int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; int deserialize(const char *buf, const int64_t data_len, int64_t &pos, share::ObTenantTxDataAllocator &tx_data_allocator); int64_t get_serialize_size() const; - int64_t size() const; + int64_t size_need_cache() const; void dump_2_text(FILE *fd) const; static void print_to_stderr(const ObTxData &tx_data); @@ -338,11 +357,14 @@ public: public: share::ObTenantTxDataAllocator *tx_data_allocator_; + share::ObTenantTxDataOpAllocator *op_allocator_; int64_t ref_cnt_; - ObUndoStatusList undo_status_list_; ExclusiveType exclusive_flag_; + ObTxDataOpGuard op_guard_; }; +static_assert(sizeof(ObTxData) < storage::TX_DATA_SLICE_SIZE, "ObTxData exceed slice_allocator fixed length"); + class ObTxDataGuard { public: @@ -460,15 +482,19 @@ struct ObReadTxDataArg{ const transaction::ObTransID tx_id_; const int64_t read_epoch_; ObTxDataMiniCache &tx_data_mini_cache_; + const bool skip_cache_; - ObReadTxDataArg(const transaction::ObTransID tx_id, const int64_t read_epoch, ObTxDataMiniCache &mini_cache) - : tx_id_(tx_id), read_epoch_(read_epoch), tx_data_mini_cache_(mini_cache) {} + ObReadTxDataArg(const transaction::ObTransID tx_id, + const int64_t read_epoch, + ObTxDataMiniCache &mini_cache, + const bool skip_cache = false) + : tx_id_(tx_id), read_epoch_(read_epoch), + tx_data_mini_cache_(mini_cache), skip_cache_(skip_cache) {} - TO_STRING_KV(K_(tx_id), K_(read_epoch), K_(tx_data_mini_cache)); + TO_STRING_KV(K_(tx_id), K_(read_epoch), K_(tx_data_mini_cache), K_(skip_cache)); }; } // namespace storage - } // namespace oceanbase #endif // OCEANBASE_STORAGE_OB_TX_DATA_DEFINE_ diff --git a/src/storage/tx/ob_tx_data_functor.cpp b/src/storage/tx/ob_tx_data_functor.cpp index 6dbd5a1a2..95631e41d 100644 --- a/src/storage/tx/ob_tx_data_functor.cpp +++ b/src/storage/tx/ob_tx_data_functor.cpp @@ -82,7 +82,8 @@ int CheckSqlSequenceCanReadFunctor::operator() (const ObTxData &tx_data, ObTxCCC const int32_t state = ATOMIC_LOAD(&tx_data.state_); const SCN commit_version = tx_data.commit_version_.atomic_load(); const SCN end_scn = tx_data.end_scn_.atomic_load(); - const bool is_rollback = tx_data.undo_status_list_.is_contain(sql_sequence_, state); + const bool is_rollback = !tx_data.op_guard_.is_valid() ? false : + tx_data.op_guard_->get_undo_status_list().is_contain(sql_sequence_, state); // NB: The functor is only used during minor merge if (ObTxData::ABORT == state) { @@ -116,7 +117,8 @@ int CheckRowLockedFunctor::operator() (const ObTxData &tx_data, ObTxCCCtx *tx_cc const int32_t state = ATOMIC_LOAD(&tx_data.state_); const SCN commit_version = tx_data.commit_version_.atomic_load(); const SCN end_scn = tx_data.end_scn_.atomic_load(); - const bool is_rollback = tx_data.undo_status_list_.is_contain(sql_sequence_, state); + const bool is_rollback = !tx_data.op_guard_.is_valid() ? false : + tx_data.op_guard_->get_undo_status_list().is_contain(sql_sequence_, state); switch (state) { case ObTxData::COMMIT: { @@ -249,7 +251,8 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx * const int32_t state = ATOMIC_LOAD(&tx_data.state_); const SCN commit_version = tx_data.commit_version_.atomic_load(); const SCN end_scn = tx_data.end_scn_.atomic_load(); - const bool is_rollback = tx_data.undo_status_list_.is_contain(data_sql_sequence, state); + const bool is_rollback = !tx_data.op_guard_.is_valid() ? false : + tx_data.op_guard_->get_undo_status_list().is_contain(data_sql_sequence, state); can_read_ = false; trans_version_.set_invalid(); @@ -266,7 +269,7 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx * } else { // Case 1.2: Otherwise, we get the version under mvcc can_read_ = snapshot_version >= commit_version - && !tx_data.undo_status_list_.is_contain(data_sql_sequence, state); + && !is_rollback; trans_version_ = commit_version; } break; @@ -500,7 +503,8 @@ int CleanoutTxStateFunctor::operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc const int32_t state = ATOMIC_LOAD(&tx_data.state_); const SCN commit_version = tx_data.commit_version_.atomic_load(); const SCN end_scn = tx_data.end_scn_.atomic_load(); - const bool is_rollback = tx_data.undo_status_list_.is_contain(seq_no_, state); + const bool is_rollback = !tx_data.op_guard_.is_valid() ? false : + tx_data.op_guard_->get_undo_status_list().is_contain(seq_no_, state); (void)resolve_tx_data_check_data_(state, commit_version, end_scn, is_rollback); @@ -632,9 +636,26 @@ int GenerateVirtualTxDataRowFunctor::operator()(const ObTxData &tx_data, ObTxCCC row_data_.start_scn_ = tx_data.start_scn_; row_data_.end_scn_ = tx_data.end_scn_; row_data_.commit_version_ = tx_data.commit_version_; - tx_data.undo_status_list_.to_string(row_data_.undo_status_list_str_, common::MAX_UNDO_LIST_CHAR_LENGTH); + if (tx_data.op_guard_.is_valid()) { + tx_data.op_guard_->get_undo_status_list().to_string(row_data_.undo_status_list_str_, common::MAX_UNDO_LIST_CHAR_LENGTH); + tx_data.op_guard_->get_tx_op_list().to_string(row_data_.tx_op_str_, common::MAX_TX_OP_CHAR_LENGTH); + } return OB_SUCCESS; } + +int LoadTxOpFunctor::operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx) +{ + int ret = OB_SUCCESS; + if (!tx_data.op_guard_.is_valid()) { + // do nothing + } else if (OB_FAIL(tx_data_.init_tx_op())) { + TRANS_LOG(WARN, "init_tx_op failed", K(ret)); + } else { + tx_data_.op_guard_.init(tx_data.op_guard_.ptr()); + } + return ret; +} + } // namespace storage } // namespace oceanbase diff --git a/src/storage/tx/ob_tx_data_functor.h b/src/storage/tx/ob_tx_data_functor.h index 0c6c15f16..d1b87de15 100644 --- a/src/storage/tx/ob_tx_data_functor.h +++ b/src/storage/tx/ob_tx_data_functor.h @@ -241,6 +241,15 @@ public: observer::VirtualTxDataRow &row_data_; }; +class LoadTxOpFunctor : public ObITxDataCheckFunctor +{ +public: + LoadTxOpFunctor(ObTxData &tx_data) : tx_data_(tx_data) {} + virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) override; +public: + ObTxData &tx_data_; +}; + } // namespace storage } // namespace oceanbase diff --git a/src/storage/tx/ob_tx_data_op.cpp b/src/storage/tx/ob_tx_data_op.cpp new file mode 100644 index 000000000..482d8e41c --- /dev/null +++ b/src/storage/tx/ob_tx_data_op.cpp @@ -0,0 +1,333 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "storage/tx/ob_tx_data_define.h" +#include "lib/utility/ob_unify_serialize.h" +#include "storage/tx_table/ob_tx_table.h" +#include "share/rc/ob_tenant_base.h" +#include "share/allocator/ob_shared_memory_allocator_mgr.h" + +using namespace oceanbase::share; +using namespace oceanbase::transaction; + +namespace oceanbase +{ +namespace storage +{ + +OB_SERIALIZE_MEMBER(ObTxDummyOp); +ObTxDummyOp DEFAULT_TX_DUMMY_OP; + +void ObTxDataOp::dec_ref() { + int ret = OB_SUCCESS; + if(ATOMIC_SAF(&ref_cnt_, 1) == 0) { + if (OB_FAIL(tx_op_list_.check_stat())) { + STORAGE_LOG(WARN, "dec_ref", KR(ret), KP(this), KPC(this)); + abort(); + } + // to release tx_op + for (int64_t idx = 0; idx < tx_op_list_.get_count(); idx++) { + ObTxOp &tx_op = *tx_op_list_.at(idx); + tx_op.release(); + } + if (OB_NOT_NULL(tx_op_list_.get_ptr())) { + op_allocator_->free(tx_op_list_.get_ptr()); + } + // to release undo status + if (OB_UNLIKELY(nullptr != undo_status_list_.head_)) { + ObUndoStatusNode *node_ptr = undo_status_list_.head_; + ObUndoStatusNode *node_to_free = nullptr; + while (nullptr != node_ptr) { + node_to_free = node_ptr; + node_ptr = node_ptr->next_; + tx_data_allocator_->free(node_to_free); + } + } + tx_data_allocator_->free(this); + } +} + +int64_t ObTxDataOp::get_tx_op_size() +{ + int64_t tx_op_size = tx_op_list_.get_capacity() * sizeof(ObTxOp); + for (int64_t idx = 0; idx < tx_op_list_.get_count(); idx++) { + ObTxOp &tx_op = *tx_op_list_.at(idx); + tx_op_size += tx_op.get_val_size(); + } + return tx_op_size; +} + +int64_t ObTxOpVector::to_string(char *buf, const int64_t buf_len) const +{ + int64_t pos = 0; + + common::databuff_printf(buf, buf_len, pos, "count=%ld", count_); + for (int64_t idx = 0; idx < count_; idx++) { + ObTxOp *op = &tx_op_[idx]; + common::databuff_printf(buf, buf_len, pos, " op(%ld)=(op_code:%ld, op_scn:%ld)", + idx, op->get_op_code(), op->get_op_scn().convert_to_ts()); + } + return pos; +} + +ObTxOp *ObTxOpVector::at(int64_t idx) +{ + ObTxOp *tx_op = nullptr; + int ret = OB_SUCCESS; + if (OB_FAIL(check_stat())) { + STORAGE_LOG(WARN, "tx_op vector stat error", KPC(this), KR(ret), K(lbt())); + } else if (idx >= 0 && idx < count_) { + tx_op = &tx_op_[idx]; + } else { + STORAGE_LOG(WARN, "out of range", KPC(this), K(idx)); + } + return tx_op; +} + +int ObTxOpVector::push_back(ObTxOp &tx_op) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(check_stat())) { + STORAGE_LOG(WARN, "tx_op vector stat error", KR(ret)); + } else if (count_ < capacity_) { + tx_op_[count_] = tx_op; + count_++; + } else { + ret = OB_SIZE_OVERFLOW; + } + return ret; +} + +int ObTxOpVector::check_stat() +{ + int ret = OB_SUCCESS; + if (count_ < 0 || capacity_ < 0 || count_ > capacity_ + || (count_ > 0 && OB_ISNULL(tx_op_))) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "check_stat failed", KPC(this), KP(this)); + } + return ret; +} + +int ObTxOpVector::try_extend_space(int64_t count, ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + if (count < 0) { + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(check_stat())) { + STORAGE_LOG(WARN, "check_stat failed", KR(ret)); + } else if (count == 0) { + // do nothing + } else if (count_ + count <= capacity_) { + // do nothing + } else { + ObTxOp *tx_op_ptr = nullptr; + if (OB_ISNULL(tx_op_ptr = (ObTxOp*)allocator.alloc((count_ + count) * sizeof(ObTxOp)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + if (count_ > 0) { + MEMCPY(tx_op_ptr, tx_op_, count_ * sizeof(ObTxOp)); + } + if (OB_NOT_NULL(tx_op_)) { + allocator.free(tx_op_); + } + tx_op_ = tx_op_ptr; + capacity_ = count_ + count; + } + } + return ret; +} + +int64_t ObTxOpVector::get_serialize_size() const +{ + int64_t len = 0; + len += serialization::encoded_length_vi64(count_); + for (int64_t idx = 0; idx < count_; idx++) { + len += tx_op_[idx].get_serialize_size(); + } + return len; +} + +int ObTxOpVector::serialize(char *buf, const int64_t buf_len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + if (OB_FAIL(serialization::encode_vi64(buf, buf_len, pos, count_))) { + STORAGE_LOG(WARN, "serialize fail", KR(ret)); + } else { + for (int64_t idx = 0; OB_SUCC(ret) && idx < count_; idx++) { + if (OB_FAIL(tx_op_[idx].serialize(buf, buf_len, pos))) { + STORAGE_LOG(WARN, "serialize fail", KR(ret)); + } + } + } + return ret; +} + +int ObTxOpVector::deserialize(const char *buf, const int64_t buf_len, int64_t &pos, ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(serialization::decode_vi64(buf, buf_len, pos, &count_))) { + STORAGE_LOG(WARN, "deserialize fail", KR(ret), K(buf), K(buf_len), K(pos), K(count_)); + } else if (count_ > 0) { + if (OB_ISNULL(tx_op_ = (ObTxOp*)allocator.alloc(count_ * sizeof(ObTxOp)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + capacity_ = count_; + } + for (int64_t idx = 0; OB_SUCC(ret) && idx < count_; idx++) { + new (&tx_op_[idx]) ObTxOp(); + if (OB_FAIL(tx_op_[idx].deserialize(buf, buf_len, pos, allocator))) { + STORAGE_LOG(WARN, "deserialize fail", KR(ret)); + } + } + } + return ret; +} + +int ObTxDataOp::add_tx_op(ObTxOp &tx_op) +{ + int ret = OB_SUCCESS; + SpinWLockGuard lock_guard(lock_); + if (OB_FAIL(tx_op_list_.try_extend_space(1, *op_allocator_))) { + STORAGE_LOG(WARN, "try_extend_space fail", KR(ret), K(tx_op)); + } else if (OB_FAIL(tx_op_list_.push_back(tx_op))) { + STORAGE_LOG(WARN, "push tx_op to array failed", KR(ret)); + } + return ret; +} + +int ObTxDataOp::reserve_tx_op_space(int64_t count) +{ + int ret = OB_SUCCESS; + SpinWLockGuard lock_guard(lock_); + if (OB_FAIL(tx_op_list_.try_extend_space(count, *op_allocator_))) { + STORAGE_LOG(WARN, "try_extend_space fail", KR(ret), K(count)); + } + return ret; +} + +int ObTxDataOp::add_tx_op_batch(transaction::ObTransID tx_id, share::ObLSID ls_id, share::SCN op_scn, ObTxOpArray &tx_op_batch) +{ + int ret = OB_SUCCESS; + SpinWLockGuard lock_guard(lock_); + if (OB_FAIL(tx_op_list_.try_extend_space(tx_op_batch.count(), *op_allocator_))) { + STORAGE_LOG(WARN, "try_extend_space fail", KR(ret), K(tx_op_batch)); + } else { + for (int64_t idx = 0; OB_SUCC(ret) && idx < tx_op_batch.count(); idx++) { + if (OB_FAIL(tx_op_list_.push_back(tx_op_batch.at(idx)))) { + STORAGE_LOG(WARN, "push tx_op to array failed", KR(ret)); + } + } + // !!! we must promise tx_op_batch atomic append into tx_op_list + // otherwise tx_op replay filter with log_scn compare op_scn will cause serious problem + if (OB_FAIL(ret)) { + STORAGE_LOG(ERROR, "tx_op_batch is not atomic append", K(tx_id), K(ls_id), K(tx_op_list_), K(tx_op_batch), K(op_scn)); + ob_abort(); + } + } + STORAGE_LOG(INFO, "add_tx_op", K(ret), K(tx_id), K(ls_id), K(op_scn), K(tx_op_batch.count()), K(tx_op_list_.get_count()), K(tx_op_batch)); + return ret; +} + +int64_t ObTxOp::get_serialize_size() const +{ + int64_t len = 0; + len += serialization::encoded_length_vi64(int64_t(op_code_)); + len += op_scn_.get_serialize_size(); + + #define SERIALIZE_TX_OP_TMP(OP_CODE, OP_TYPE) \ + if (OB_NOT_NULL(op_val_) && op_code_ == OP_CODE) { \ + OP_TYPE &op_obj = *((OP_TYPE*)op_val_); \ + len += op_obj.get_serialize_size(); \ + } + #define SERIALIZE_TX_OP(TYPE, UNUSED) SERIALIZE_TX_OP_TMP TYPE + + LST_DO2(SERIALIZE_TX_OP, (), TX_OP_MEMBERS); + #undef SERIALIZE_TX_OP_TMP + #undef SERIALIZE_TX_OP + return len; +} + +int ObTxOp::serialize(char *buf, const int64_t buf_len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + if (OB_FAIL(serialization::encode_vi64(buf, buf_len, pos, int64_t(op_code_)))) { + STORAGE_LOG(WARN, "serialize fail", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(op_scn_.serialize(buf, buf_len, pos))) { + STORAGE_LOG(WARN, "serialize fail", K(ret), K(buf_len), K(pos)); + } else if (OB_ISNULL(op_val_)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "tx_op op_val is null", KR(ret), KPC(this)); + } + #define SERIALIZE_TX_OP_TMP(OP_CODE, OP_TYPE) \ + if (OB_SUCC(ret) && op_code_ == OP_CODE) { \ + OP_TYPE &op_obj = *((OP_TYPE*)op_val_); \ + if (OB_FAIL(op_obj.serialize(buf, buf_len, pos))) { \ + STORAGE_LOG(WARN, "serialize fail", KR(ret)); \ + } \ + } + #define SERIALIZE_TX_OP(TYPE, UNUSED) SERIALIZE_TX_OP_TMP TYPE + + LST_DO2(SERIALIZE_TX_OP, (), TX_OP_MEMBERS); + #undef SERIALIZE_TX_OP_TMP + #undef SERIALIZE_TX_OP + return ret; +} + +int ObTxOp::deserialize(const char *buf, const int64_t data_len, int64_t &pos, ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(serialization::decode_vi64(buf, data_len, pos, (int64_t*)&op_code_))) { + STORAGE_LOG(WARN, "deserialize fail", K(data_len), K(pos), K(ret)); + } else if (OB_FAIL(op_scn_.deserialize(buf, data_len, pos))) { + STORAGE_LOG(WARN, "deserialize fail", K(ret), K(data_len), K(pos)); + } + #define DESERIALIZE_TX_OP_TMP(OP_CODE, OP_TYPE) \ + if (OB_SUCC(ret) && op_code_ == OP_CODE) { \ + OP_TYPE *op_obj = nullptr; \ + if (OB_ISNULL(op_obj = (OP_TYPE*)allocator.alloc(sizeof(OP_TYPE)))) { \ + ret = OB_ALLOCATE_MEMORY_FAILED; \ + STORAGE_LOG(WARN, "deserialize fail", K(ret), K(data_len), K(pos)); \ + } else if (FALSE_IT(new (op_obj) OP_TYPE())) { \ + } else if (OB_FAIL(op_obj->deserialize(buf, data_len, pos))) { \ + STORAGE_LOG(WARN, "deserialize fail", KR(ret)); \ + allocator.free(op_obj); \ + } else if (FALSE_IT(op_val_ = op_obj)) { \ + } \ + } + #define DESERIALIZE_TX_OP(TYPE, UNUSED) DESERIALIZE_TX_OP_TMP TYPE + + LST_DO2(DESERIALIZE_TX_OP, (), TX_OP_MEMBERS); + #undef DESERIALIZE_TX_OP_TMP + #undef DESERIALIZE_TX_OP + return ret; +} + +void ObTxOp::release() +{ + ObIAllocator &allocator = MTL(ObSharedMemAllocMgr*)->tx_data_op_allocator(); + #define RELEASE_TX_OP_TMP(OP_CODE, OP_TYPE) \ + if (OB_NOT_NULL(op_val_) && op_code_ == OP_CODE \ + && op_val_ != &DEFAULT_TX_DUMMY_OP) { \ + OP_TYPE *op_obj = (OP_TYPE*)op_val_; \ + release(*op_obj); \ + allocator.free(op_obj); \ + } + #define RELEASE_TX_OP(TYPE, UNUSED) RELEASE_TX_OP_TMP TYPE + + LST_DO2(RELEASE_TX_OP, (), TX_OP_MEMBERS); + #undef RELEASE_TX_OP_TMP + #undef RELEASE_TX_OP +} + +} // namespace storage +} // namespace oceanbase diff --git a/src/storage/tx/ob_tx_data_op.h b/src/storage/tx/ob_tx_data_op.h new file mode 100644 index 000000000..cc5cfb2b0 --- /dev/null +++ b/src/storage/tx/ob_tx_data_op.h @@ -0,0 +1,213 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_STORAGE_OB_TX_DATA_OP +#define OCEANBASE_STORAGE_OB_TX_DATA_OP + +#include "share/scn.h" +#include "share/allocator/ob_tx_data_allocator.h" +#include "storage/tx/ob_trans_define.h" +#include "storage/tx/ob_tx_data_define.h" + +namespace oceanbase +{ + +namespace transaction +{ +namespace tablelock +{ + struct ObTableLockOp; +} +} +namespace storage +{ + +// record tx.op has no value +class ObTxDummyOp +{ + OB_UNIS_VERSION(1); +public: + TO_STRING_EMPTY(); +}; + +extern ObTxDummyOp DEFAULT_TX_DUMMY_OP; + +/* + * tx.op is designed to store tx operations that need retain after tx_ctx exit + * so tx.op need to support various data types + * + * we use class ObTxOp to describe every tx operation + * users just need to add new data types to TX_OP_MEMBERS, we can create ObTxOp and put it into ObTxDataOp + */ +enum class ObTxOpCode : int64_t +{ + INVALID = 0, + MDS_OP = 1, + LOCK_OP = 2, + ABORT_OP = 3 +}; + +#define TX_OP_LIST(...) __VA_ARGS__ +#define TX_OP_MEMBERS \ + TX_OP_LIST( \ + (ObTxOpCode::MDS_OP, transaction::ObTxBufferNodeWrapper), \ + (ObTxOpCode::LOCK_OP, transaction::tablelock::ObTableLockOp), \ + (ObTxOpCode::ABORT_OP, ObTxDummyOp) \ + ) + +class ObTxOp +{ +public: + ObTxOp() { reset(); } + ~ObTxOp() { reset(); } + void reset() { + op_code_ = ObTxOpCode::INVALID; + op_scn_.reset(); + op_val_ = nullptr; + val_size_ = 0; + } + template + int init(ObTxOpCode op_code, share::SCN op_scn, T *val, int64_t val_size); + ObTxOpCode get_op_code() { return op_code_; } + share::SCN get_op_scn() { return op_scn_; } + void set_op_scn(share::SCN scn) { op_scn_ = scn; } + void* get_op_val() { return op_val_; } + int64_t get_val_size() { return val_size_; } + template + T *get(); + int64_t get_serialize_size() const; + int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; + int deserialize(const char *buf, const int64_t data_len, int64_t &pos, ObIAllocator &allocator); + void release(); + template + void release(T &op_val) { + op_val.~T(); + } + TO_STRING_KV(K_(op_code), K_(op_scn), K_(op_val)); +private: + ObTxOpCode op_code_; + share::SCN op_scn_; + void *op_val_; + int64_t val_size_; // for tx_data memory +}; + +typedef ObSEArray ObTxOpArray; + +class ObTxOpVector +{ +public: + ObTxOpVector() { reset(); } + ~ObTxOpVector() { reset(); } + void reset() { + capacity_ = 0; + count_ = 0; + tx_op_ = nullptr; + } + int64_t get_capacity() { return capacity_; } + int64_t get_count() { return count_; } + ObTxOp* get_ptr() { return tx_op_; } + int try_extend_space(int64_t count, ObIAllocator &allocator); + ObTxOp *at(int64_t idx); + int push_back(ObTxOp &tx_op); + int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; + int deserialize(const char *buf, const int64_t data_len, int64_t &pos, ObIAllocator &allocator); + int64_t get_serialize_size() const; + int check_stat(); + int64_t to_string(char *buf, const int64_t buf_len) const; +private: + int64_t capacity_; + int64_t count_; + ObTxOp *tx_op_; +}; + +class ObTxDataOp +{ +public: + ObTxDataOp(share::ObTenantTxDataAllocator *allocator, share::ObTenantTxDataOpAllocator *op_allocator) : + ref_cnt_(0), + undo_status_list_(), + tx_op_list_(), + tx_data_allocator_(allocator), + op_allocator_(op_allocator) {} + ~ObTxDataOp() {} + int64_t inc_ref() { return ATOMIC_AAF(&ref_cnt_, 1); } + void dec_ref(); + ObTxOpVector &get_tx_op_list() { return tx_op_list_; } + int64_t get_tx_op_size(); + ObUndoStatusList &get_undo_status_list() { return undo_status_list_; } + common::SpinRWLock &get_lock() { return lock_; } + int64_t get_ref() { return ref_cnt_; } + int add_tx_op(ObTxOp &tx_op); + int add_tx_op_batch(transaction::ObTransID tx_id, share::ObLSID ls_id, share::SCN op_scn, ObTxOpArray &tx_op_batch); + int reserve_tx_op_space(int64_t count); + + TO_STRING_KV(K_(ref_cnt), K_(undo_status_list), K_(tx_op_list)); +private: + int64_t ref_cnt_; + ObUndoStatusList undo_status_list_; + ObTxOpVector tx_op_list_; + share::ObTenantTxDataAllocator *tx_data_allocator_; + share::ObTenantTxDataOpAllocator *op_allocator_; + common::SpinRWLock lock_; +}; + +static_assert(sizeof(ObTxDataOp) < TX_DATA_SLICE_SIZE, "ObTxDataOp too large!"); + +template +int ObTxOp::init(ObTxOpCode op_code, share::SCN op_scn, T* op_val, int64_t val_size) +{ + int ret = OB_SUCCESS; + if (op_code == ObTxOpCode::INVALID || !op_scn.is_valid() || OB_ISNULL(op_val) || val_size < 0) { + ret = OB_INVALID_ARGUMENT; + } + + #define INIT_TX_OP_TMP(OP_CODE, OP_TYPE) \ + if (OB_SUCC(ret) && op_code_ == OP_CODE && !(std::is_same::value)) { \ + ret = OB_INVALID_ARGUMENT; \ + } + #define INIT_TX_OP(TYPE, UNUSED) INIT_TX_OP_TMP TYPE + + LST_DO2(INIT_TX_OP, (), TX_OP_MEMBERS); + #undef INIT_TX_OP_TMP + #undef INIT_TX_OP + if (OB_SUCC(ret)) { + op_code_ = op_code; + op_scn_ = op_scn; + op_val_ = op_val; + val_size_ = val_size; + } + return ret; +} + +template +T* ObTxOp::get() +{ + T* val = nullptr; + + #define GET_TX_OP_TMP(OP_CODE, OP_TYPE) \ + if (std::is_same::value) { \ + val = (T*)op_val_; \ + } + #define GET_TX_OP(TYPE, UNUSED) GET_TX_OP_TMP TYPE + + LST_DO2(GET_TX_OP, (), TX_OP_MEMBERS); + #undef GET_TX_OP_TMP + #undef GET_TX_OP + + return val; +} + + +} // namespace storage +} // namespace oceanbase + +#endif // OCEANBASE_STORAGE_OB_TX_DATA_OP_ diff --git a/src/storage/tx/ob_tx_log.h b/src/storage/tx/ob_tx_log.h index 58329987a..0909da82c 100644 --- a/src/storage/tx/ob_tx_log.h +++ b/src/storage/tx/ob_tx_log.h @@ -1100,7 +1100,8 @@ class ObTxAbortLog public: ObTxAbortLog(ObTxAbortLogTempRef &temp_ref) - : multi_source_data_(temp_ref.multi_source_data_), tx_data_backup_() + : multi_source_data_(temp_ref.multi_source_data_), + tx_data_backup_() { before_serialize(); } diff --git a/src/storage/tx/ob_tx_replay_executor.cpp b/src/storage/tx/ob_tx_replay_executor.cpp index 8788add38..5d94c320e 100644 --- a/src/storage/tx/ob_tx_replay_executor.cpp +++ b/src/storage/tx/ob_tx_replay_executor.cpp @@ -314,7 +314,7 @@ int ObTxReplayExecutor::try_get_tx_ctx_() } else if (base_header_.need_pre_replay_barrier() && OB_UNLIKELY(ctx_->is_replay_complete_unknown())) { // if a pre-barrier log will be replayed // the txn can be confirmed to incomplete replayed - ret = ctx_->set_replay_incomplete(); + ret = ctx_->set_replay_incomplete(log_ts_ns_); } } } diff --git a/src/storage/tx_table/ob_tx_data_cache.cpp b/src/storage/tx_table/ob_tx_data_cache.cpp index eb2454da1..fade20b3b 100644 --- a/src/storage/tx_table/ob_tx_data_cache.cpp +++ b/src/storage/tx_table/ob_tx_data_cache.cpp @@ -12,6 +12,7 @@ #include "ob_tx_data_cache.h" #include "share/rc/ob_tenant_base.h" +#include "storage/tx/ob_tx_data_op.h" namespace oceanbase { namespace storage { @@ -30,7 +31,7 @@ int ObTxDataCacheValue::init(const ObTxData &tx_data) STORAGE_LOG(WARN, "init tx data cache value twice", KR(ret), KPC(this)); } else { // reserve or allocate buf to store tx data - int64_t size = tx_data.size(); + int64_t size = tx_data.size_need_cache(); void *tx_data_buf = nullptr; if (TX_DATA_SLICE_SIZE == size) { // this tx data do not have undo actions, use reserved memory @@ -114,17 +115,24 @@ int ObTxDataCacheValue::inner_deep_copy_(void *tx_data_buf, const ObTxData &rhs) } else { tx_data_ = new (tx_data_buf) ObTxData(); tx_data_->assign_without_undo(rhs); + tx_data_->tx_data_allocator_ = rhs.tx_data_allocator_; + tx_data_->op_allocator_ = rhs.op_allocator_; + if (rhs.op_guard_.is_valid()) { + ObTxDataOp *tx_data_op = new ((char*)tx_data_buf + TX_DATA_SLICE_SIZE) ObTxDataOp(tx_data_->tx_data_allocator_, + tx_data_->op_allocator_); + tx_data_->op_guard_.init(tx_data_op); + } - if (OB_LIKELY(nullptr == rhs.undo_status_list_.head_)) { + if (OB_LIKELY(!rhs.op_guard_.is_valid() || OB_ISNULL(rhs.op_guard_->get_undo_status_list().head_))) { // this tx data do not have undo status } else { - undo_node_array_ = (ObUndoStatusNode *)((char *)tx_data_buf + TX_DATA_SLICE_SIZE); + undo_node_array_ = (ObUndoStatusNode *)((char *)tx_data_buf + TX_DATA_SLICE_SIZE + TX_DATA_SLICE_SIZE); + // ignore mds op if (OB_FAIL(inner_deep_copy_undo_status_(rhs))) { STORAGE_LOG(WARN, "deep copy undo status node for tx data kv cache failed", KR(ret), K(rhs)); } } } - return ret; } @@ -134,21 +142,21 @@ int ObTxDataCacheValue::inner_deep_copy_undo_status_(const ObTxData &rhs) // use dummy head point to the first undo node ObUndoStatusNode dummy_head; - dummy_head.next_ = rhs.undo_status_list_.head_; + dummy_head.next_ = rhs.op_guard_->get_undo_status_list().head_; ObUndoStatusNode *pre_node = &dummy_head; - tx_data_->undo_status_list_.undo_node_cnt_ = rhs.undo_status_list_.undo_node_cnt_; - for (int64_t i = 0; OB_SUCC(ret) && i < rhs.undo_status_list_.undo_node_cnt_; i++) { + tx_data_->op_guard_->get_undo_status_list().undo_node_cnt_ = rhs.op_guard_->get_undo_status_list().undo_node_cnt_; + for (int64_t i = 0; OB_SUCC(ret) && i < rhs.op_guard_->get_undo_status_list().undo_node_cnt_; i++) { ObUndoStatusNode *rhs_node = pre_node->next_; pre_node = rhs_node; if (OB_ISNULL(rhs_node)) { ret = OB_ERR_UNEXPECTED; - STORAGE_LOG(ERROR, "undo status list node count dismatach", KR(ret), K(i), K(rhs.undo_status_list_.undo_node_cnt_)); + STORAGE_LOG(ERROR, "undo status list node count dismatach", KR(ret), K(i), K(rhs.op_guard_->get_undo_status_list().undo_node_cnt_)); } else { undo_node_array_[i].assign_value(*rhs_node); undo_node_array_[i].next_ = nullptr; if (0 == i) { - tx_data_->undo_status_list_.head_ = undo_node_array_; + tx_data_->op_guard_->get_undo_status_list().head_ = undo_node_array_; } else { undo_node_array_[i-1].next_ = &undo_node_array_[i]; } @@ -192,4 +200,4 @@ int ObTxDataKVCache::put_row(const ObTxDataCacheKey &key, const ObTxDataCacheVal } // namespace storage -} // namespace oceanbase \ No newline at end of file +} // namespace oceanbase diff --git a/src/storage/tx_table/ob_tx_data_cache.h b/src/storage/tx_table/ob_tx_data_cache.h index 6a96cf5ed..fc6c7917d 100644 --- a/src/storage/tx_table/ob_tx_data_cache.h +++ b/src/storage/tx_table/ob_tx_data_cache.h @@ -107,7 +107,7 @@ public: TO_STRING_KV(K_(is_inited), KP_(tx_data), KPC_(tx_data), KP_(mtl_alloc_buf), KP(&reserved_buf_)); public: // derived from ObIKVCacheValue - virtual int64_t size() const { return (IS_INIT && OB_NOT_NULL(tx_data_)) ? sizeof(*this) + tx_data_->size() : 0; } + virtual int64_t size() const { return (IS_INIT && OB_NOT_NULL(tx_data_)) ? sizeof(*this) + tx_data_->size_need_cache() : 0; } virtual int deep_copy(char *buf, const int64_t buf_len, ObIKVCacheValue *&value) const; diff --git a/src/storage/tx_table/ob_tx_data_hash_map.cpp b/src/storage/tx_table/ob_tx_data_hash_map.cpp index 1d3b76fdb..bc10d79bd 100644 --- a/src/storage/tx_table/ob_tx_data_hash_map.cpp +++ b/src/storage/tx_table/ob_tx_data_hash_map.cpp @@ -57,7 +57,6 @@ int ObTxDataHashMap::init() int ObTxDataHashMap::insert(const transaction::ObTransID &key, ObTxData *value) { int ret = OB_SUCCESS; - if (!key.is_valid() || OB_ISNULL(value)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(key), KP(value)); @@ -161,4 +160,4 @@ int ObTxDataHashMap::Iterator::get_next(ObTxDataGuard &guard) } } // namespace storage -} // namespace oceanbase \ No newline at end of file +} // namespace oceanbase diff --git a/src/storage/tx_table/ob_tx_data_memtable.cpp b/src/storage/tx_table/ob_tx_data_memtable.cpp index 5c46ccfa1..68cfced3f 100644 --- a/src/storage/tx_table/ob_tx_data_memtable.cpp +++ b/src/storage/tx_table/ob_tx_data_memtable.cpp @@ -182,15 +182,15 @@ int ObTxDataMemtable::insert(ObTxData *tx_data) max_tx_scn_.inc_update(tx_data->end_scn_); atomic_update_(tx_data); ATOMIC_INC(&inserted_cnt_); - if (OB_UNLIKELY(tx_data->undo_status_list_.undo_node_cnt_ >= 10)) { - if (tx_data->undo_status_list_.undo_node_cnt_ == 10 || tx_data->undo_status_list_.undo_node_cnt_ % 100 == 0) { + if (OB_UNLIKELY(tx_data->op_guard_.is_valid() && tx_data->op_guard_->get_undo_status_list().undo_node_cnt_ >= 10)) { + if (tx_data->op_guard_->get_undo_status_list().undo_node_cnt_ == 10 || tx_data->op_guard_->get_undo_status_list().undo_node_cnt_ % 100 == 0) { STORAGE_LOG(INFO, "attention! this tx write too many rollback to savepoint log", "ls_id", get_ls_id(), "tx_id", tx_data->tx_id_, "state", ObTxData::get_state_string(tx_data->state_), - "undo_node_cnt", tx_data->undo_status_list_.undo_node_cnt_, - "newest_undo_node", tx_data->undo_status_list_.head_, + "undo_node_cnt", tx_data->op_guard_->get_undo_status_list().undo_node_cnt_, + "newest_undo_node", tx_data->op_guard_->get_undo_status_list().head_, K(tx_data->start_scn_), K(tx_data->end_scn_)); } @@ -205,9 +205,21 @@ void ObTxDataMemtable::atomic_update_(ObTxData *tx_data) int64_t thread_idx = common::get_itid() & MAX_CONCURRENCY_MOD_MASK; min_tx_scn_[thread_idx].dec_update(tx_data->end_scn_); min_start_scn_[thread_idx].dec_update(tx_data->start_scn_); - int64_t tx_data_size = TX_DATA_SLICE_SIZE * (1LL + tx_data->undo_status_list_.undo_node_cnt_); + int64_t tx_data_size = 0; + int64_t count = 0; + if (tx_data->state_ == ObTxCommitData::RUNNING) { + tx_data_size = TX_DATA_SLICE_SIZE; + } else if (!tx_data->op_guard_.is_valid()) { + tx_data_size = TX_DATA_SLICE_SIZE; + } else { + count = tx_data->op_guard_->get_undo_status_list().undo_node_cnt_; + int64_t tx_op_size = tx_data->op_guard_->get_tx_op_size(); + tx_data_size = TX_DATA_SLICE_SIZE + TX_DATA_SLICE_SIZE + + count * TX_DATA_SLICE_SIZE + // undo status list + tx_op_size; // tx_op + } ATOMIC_FAA(&occupied_size_[thread_idx], tx_data_size); - ATOMIC_FAA(&total_undo_node_cnt_[thread_idx], tx_data->undo_status_list_.undo_node_cnt_); + ATOMIC_FAA(&total_undo_node_cnt_[thread_idx], count); } int ObTxDataMemtable::get_tx_data(const ObTransID &tx_id, ObTxDataGuard &tx_data_guard) @@ -377,7 +389,6 @@ int ObTxDataMemtable::pre_process_commit_version_row_(ObTxData *fake_tx_data) fake_tx_data->tx_id_ = INT64_MAX; fake_tx_data->commit_version_.convert_for_tx(serialize_size); fake_tx_data->start_scn_.convert_for_tx((int64_t)buf_.get_ptr()); - fake_tx_data->undo_status_list_.head_ = nullptr; } } diff --git a/src/storage/tx_table/ob_tx_data_memtable.h b/src/storage/tx_table/ob_tx_data_memtable.h index 9a74d63fa..f8310b1f9 100644 --- a/src/storage/tx_table/ob_tx_data_memtable.h +++ b/src/storage/tx_table/ob_tx_data_memtable.h @@ -19,6 +19,7 @@ #include "storage/tx/ob_tx_data_define.h" #include "storage/tx_table/ob_tx_table_define.h" #include "storage/tx_table/tx_table_local_buffer.h" +#include "storage/tx/ob_tx_data_op.h" namespace oceanbase { @@ -314,7 +315,6 @@ public: // checkpoint } int flush(const int64_t trace_id); - /** * @brief Because of the random order of clog callbacks, the tx data in a freezing tx data * memtable may not completed. We must wait until the max_consequent_callbacked_scn is larger @@ -512,7 +512,10 @@ public: // printf undo status list fprintf(fd_, "Undo Actions [from, to): {"); - ObUndoStatusNode *cur_node = tx_data->undo_status_list_.head_; + ObUndoStatusNode *cur_node = NULL; + if (tx_data->op_guard_.is_valid()) { + cur_node = tx_data->op_guard_->get_undo_status_list().head_; + } while (OB_NOT_NULL(cur_node)) { for (int i = 0; i < cur_node->size_; i++) { diff --git a/src/storage/tx_table/ob_tx_data_table.cpp b/src/storage/tx_table/ob_tx_data_table.cpp index 8c93f888f..5b484a4bb 100644 --- a/src/storage/tx_table/ob_tx_data_table.cpp +++ b/src/storage/tx_table/ob_tx_data_table.cpp @@ -264,72 +264,12 @@ int ObTxDataTable::alloc_tx_data(ObTxDataGuard &tx_data_guard, } else { ObTxData *tx_data = new (slice_ptr) ObTxData(); tx_data->tx_data_allocator_ = tx_data_allocator_; + tx_data->op_allocator_ = &MTL(share::ObSharedMemAllocMgr*)->tx_data_op_allocator(); tx_data_guard.init(tx_data); } return ret; } -int ObTxDataTable::deep_copy_tx_data(const ObTxDataGuard &in_tx_data_guard, ObTxDataGuard &out_tx_data_guard) -{ - int ret = OB_SUCCESS; - void *slice_ptr = nullptr; - const int64_t abs_expire_time = THIS_WORKER.get_timeout_ts(); - const ObTxData *in_tx_data = in_tx_data_guard.tx_data(); - ObTxData *out_tx_data = nullptr; - - if (OB_ISNULL(slice_ptr = tx_data_allocator_->alloc(true, abs_expire_time))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - STORAGE_LOG(WARN, "allocate memory from slice_allocator fail.", KR(ret), KP(this), - K(tablet_id_), K(abs_expire_time)); - } else if (OB_ISNULL(in_tx_data)) { - ret = OB_ERR_UNEXPECTED; - STORAGE_LOG(ERROR, "invalid nullptr of tx data", K(in_tx_data_guard), KPC(this)); - } else { - out_tx_data = new (slice_ptr) ObTxData(); - *out_tx_data = *in_tx_data; - out_tx_data->tx_data_allocator_ = tx_data_allocator_; - out_tx_data->undo_status_list_.head_ = nullptr; - out_tx_data->ref_cnt_ = 0; - out_tx_data_guard.init(out_tx_data); - - if (OB_FAIL(deep_copy_undo_status_list_(in_tx_data->undo_status_list_, - out_tx_data->undo_status_list_))) { - STORAGE_LOG(WARN, "deep copy undo status list failed."); - } else { - // deep copy succeed. - } - } - return ret; -} - -int ObTxDataTable::deep_copy_undo_status_list_(const ObUndoStatusList &in_list, - ObUndoStatusList &out_list) -{ - int ret = OB_SUCCESS; - ObUndoStatusNode *cur_in_node = in_list.head_; - ObUndoStatusNode *pre_node = nullptr; - ObUndoStatusNode *new_node = nullptr; - - while (OB_SUCC(ret) && nullptr != cur_in_node) { - if (OB_FAIL(alloc_undo_status_node(new_node))) { - STORAGE_LOG(WARN, "alloc undo status node failed.", KR(ret)); - } else { - *new_node = *cur_in_node; - // reset next pointer to avoid invalid free - new_node->next_ = nullptr; - if (nullptr == pre_node) { - out_list.head_ = new_node; - } else { - pre_node->next_ = new_node; - } - pre_node = new_node; - cur_in_node = cur_in_node->next_; - } - } - - return ret; -} - int ObTxDataTable::alloc_undo_status_node(ObUndoStatusNode *&undo_status_node) { int ret = OB_SUCCESS; @@ -974,11 +914,11 @@ int ObTxDataTable::DEBUG_calc_with_row_iter_(ObStoreRowIterator *row_iter, tmp_upper_trans_version = tx_data.commit_version_; } } - - if (OB_NOT_NULL(tx_data.undo_status_list_.head_)) { - free_undo_status_list_(tx_data.undo_status_list_.head_); - tx_data.undo_status_list_.head_ = nullptr; + if (tx_data.op_guard_.is_valid() && OB_NOT_NULL(tx_data.op_guard_->get_undo_status_list().head_)) { + free_undo_status_list_(tx_data.op_guard_->get_undo_status_list().head_); + tx_data.op_guard_->get_undo_status_list().head_ = nullptr; } + } if (OB_ITER_END == ret) { @@ -1137,6 +1077,14 @@ int ObTxDataTable::check_min_start_in_tx_data_(const SCN &sstable_end_scn, return ret; } +int ObTxDataTable::deep_copy_tx_data(const ObTxDataGuard &in_tx_data, ObTxDataGuard &out_tx_data) +{ + int ret = OB_NOT_SUPPORTED; + UNUSED(in_tx_data); + UNUSED(out_tx_data); + return ret; +} + int ObTxDataTable::update_cache_if_needed_(bool &skip_calc) { int ret = OB_SUCCESS; @@ -1243,7 +1191,7 @@ int ObTxDataTable::calc_upper_trans_scn_(const SCN sstable_end_scn, SCN &upper_t return ret; } -int ObTxDataTable::supplement_undo_actions_if_exist(ObTxData *tx_data) +int ObTxDataTable::supplement_tx_op_if_exist(ObTxData *tx_data) { int ret = OB_SUCCESS; ObTxData tx_data_from_sstable; @@ -1256,6 +1204,8 @@ int ObTxDataTable::supplement_undo_actions_if_exist(ObTxData *tx_data) } else if (OB_ISNULL(tx_data)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(ERROR, "tx data is nullptr", KR(ret), KP(this)); + } else if (FALSE_IT(tx_data_from_sstable.tx_data_allocator_ = &MTL(share::ObSharedMemAllocMgr*)->tx_data_allocator())) { + } else if (FALSE_IT(tx_data_from_sstable.op_allocator_ = &MTL(share::ObSharedMemAllocMgr*)->tx_data_op_allocator())) { } else if (OB_FAIL(get_tx_data_in_sstable_(tx_data->tx_id_, tx_data_from_sstable, unused_scn))) { if (ret == OB_TRANS_CTX_NOT_EXIST) { // This transaction does not have undo actions @@ -1263,19 +1213,10 @@ int ObTxDataTable::supplement_undo_actions_if_exist(ObTxData *tx_data) } else { STORAGE_LOG(WARN, "get tx data from sstable failed.", KR(ret)); } - } else { - // assign and reset to avoid deep copy - if (OB_NOT_NULL(tx_data->undo_status_list_.head_)) { - STORAGE_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "invalid undo status list", KPC(tx_data)); - } - tx_data->undo_status_list_ = tx_data_from_sstable.undo_status_list_; - tx_data_from_sstable.undo_status_list_.reset(); - } - - if (OB_NOT_NULL(tx_data_from_sstable.undo_status_list_.head_)) { - STORAGE_LOG(WARN, "supplement undo actions failed", KR(ret), KPC(tx_data), K(get_ls_id())); - free_undo_status_list_(tx_data_from_sstable.undo_status_list_.head_); + } else if (FALSE_IT(*tx_data = tx_data_from_sstable)) { } + tx_data_from_sstable.tx_data_allocator_ = nullptr; + tx_data_from_sstable.reset(); return ret; } diff --git a/src/storage/tx_table/ob_tx_data_table.h b/src/storage/tx_table/ob_tx_data_table.h index e5c75911c..985dbecad 100644 --- a/src/storage/tx_table/ob_tx_data_table.h +++ b/src/storage/tx_table/ob_tx_data_table.h @@ -209,9 +209,9 @@ public: // ObTxDataTable int get_upper_trans_version_before_given_scn(const share::SCN sstable_end_scn, share::SCN &upper_trans_version); /** - * @brief see ObTxTable::supplement_undo_actions_if_exist + * @brief see ObTxTable::supplement_tx_op_if_exist */ - int supplement_undo_actions_if_exist(ObTxData *tx_data); + int supplement_tx_op_if_exist(ObTxData *tx_data); int self_freeze_task(); diff --git a/src/storage/tx_table/ob_tx_table.cpp b/src/storage/tx_table/ob_tx_table.cpp index f99446ee5..a12ecad57 100644 --- a/src/storage/tx_table/ob_tx_table.cpp +++ b/src/storage/tx_table/ob_tx_table.cpp @@ -645,7 +645,8 @@ int ObTxTable::check_with_tx_data(ObReadTxDataArg &read_tx_data_arg, ObITxDataCh // step 1 : read tx data in mini cache int tmp_ret = OB_SUCCESS; bool find_tx_data_in_cache = false; - if (OB_TMP_FAIL(check_tx_data_in_mini_cache_(read_tx_data_arg, fn))) { + if (read_tx_data_arg.skip_cache_) { + } else if (OB_TMP_FAIL(check_tx_data_in_mini_cache_(read_tx_data_arg, fn))) { if (OB_TRANS_CTX_NOT_EXIST != tmp_ret) { STORAGE_LOG(WARN, "check tx data in mini cache failed", KR(tmp_ret), K(read_tx_data_arg)); } @@ -655,7 +656,8 @@ int ObTxTable::check_with_tx_data(ObReadTxDataArg &read_tx_data_arg, ObITxDataCh } // step 2 : read tx data in kv cache - if (find_tx_data_in_cache) { + if (read_tx_data_arg.skip_cache_) { + } else if (find_tx_data_in_cache) { // already find tx data and do function with mini cache } else if (OB_TMP_FAIL(check_tx_data_in_kv_cache_(read_tx_data_arg, fn))) { if (OB_TRANS_CTX_NOT_EXIST != tmp_ret) { @@ -729,7 +731,7 @@ int ObTxTable::check_tx_data_in_kv_cache_(ObReadTxDataArg &read_tx_data_arg, ObI if (ObTxData::RUNNING == tx_data->state_) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(ERROR, "read an unexpected state tx data from kv cache"); - } else if (OB_ISNULL(tx_data->undo_status_list_.head_)) { + } else if (!tx_data->op_guard_.is_valid()) { // put into mini cache only if this tx data do not have undo actions read_tx_data_arg.tx_data_mini_cache_.set(*tx_data); } @@ -768,7 +770,7 @@ int ObTxTable::check_tx_data_in_tables_(ObReadTxDataArg &read_tx_data_arg, ObITx // if tx data is not null, put tx data into cache if (ObTxData::RUNNING == tx_data->state_) { } else { - if (OB_ISNULL(tx_data->undo_status_list_.head_)) { + if (!tx_data->op_guard_.is_valid()) { read_tx_data_arg.tx_data_mini_cache_.set(*tx_data); } @@ -1004,9 +1006,9 @@ int ObTxTable::cleanout_tx_node(ObReadTxDataArg &read_tx_data_arg, return ret; } -int ObTxTable::supplement_undo_actions_if_exist(ObTxData *tx_data) +int ObTxTable::supplement_tx_op_if_exist(ObTxData *tx_data) { - return tx_data_table_.supplement_undo_actions_if_exist(tx_data); + return tx_data_table_.supplement_tx_op_if_exist(tx_data); } int ObTxTable::self_freeze_task() { return tx_data_table_.self_freeze_task(); } @@ -1015,7 +1017,7 @@ int ObTxTable::generate_virtual_tx_data_row(const transaction::ObTransID tx_id, { GenerateVirtualTxDataRowFunctor fn(row_data); ObTxDataMiniCache mini_cache; - ObReadTxDataArg read_tx_data_arg(tx_id, epoch_, mini_cache); + ObReadTxDataArg read_tx_data_arg(tx_id, epoch_, mini_cache, true); int ret = check_with_tx_data(read_tx_data_arg, fn); return ret; } diff --git a/src/storage/tx_table/ob_tx_table.h b/src/storage/tx_table/ob_tx_table.h index d004f41ec..12ec5b154 100644 --- a/src/storage/tx_table/ob_tx_table.h +++ b/src/storage/tx_table/ob_tx_table.h @@ -138,11 +138,11 @@ public: /** * @brief check whether the row key is locked by tx id - * - * @param[in] read_trans_id - * @param[in] data_trans_id - * @param[in] sql_sequence - * @param[out] lock_state + * + * @param[in] read_trans_id + * @param[in] data_trans_id + * @param[in] sql_sequence + * @param[out] lock_state */ int check_row_locked(ObReadTxDataArg &read_tx_data_arg, const transaction::ObTransID &read_tx_id, @@ -151,10 +151,10 @@ public: /** * @brief check whether transaction data_tx_id with sql_sequence is readable. (sql_sequence may be unreadable for txn or stmt rollback) - * - * @param[in] data_tx_id - * @param[in] sql_sequence - * @param[out] can_read + * + * @param[in] data_tx_id + * @param[in] sql_sequence + * @param[out] can_read */ int check_sql_sequence_can_read(ObReadTxDataArg &read_tx_data_arg, const transaction::ObTxSEQ &sql_sequence, @@ -243,7 +243,7 @@ public: * * @param[in & out] tx_data The pointer of tx data to be supplemented which is in tx ctx. */ - int supplement_undo_actions_if_exist(ObTxData *tx_data); + int supplement_tx_op_if_exist(ObTxData *tx_data); int prepare_for_safe_destroy(); diff --git a/src/storage/tx_table/ob_tx_table_interface.cpp b/src/storage/tx_table/ob_tx_table_interface.cpp index 70e21c536..8c39a26ff 100644 --- a/src/storage/tx_table/ob_tx_table_interface.cpp +++ b/src/storage/tx_table/ob_tx_table_interface.cpp @@ -57,6 +57,17 @@ int ObTxTableGuard::check_row_locked(const transaction::ObTransID &read_tx_id, } } +int ObTxTableGuard::load_tx_op(const transaction::ObTransID &tx_id, ObTxData &tx_data) +{ + if (OB_NOT_NULL(tx_table_)) { + ObReadTxDataArg arg(tx_id, epoch_, mini_cache_, false); + LoadTxOpFunctor functor(tx_data); + return tx_table_->check_with_tx_data(arg, functor); + } else { + return OB_NOT_INIT; + } +} + int ObTxTableGuard::check_sql_sequence_can_read(const transaction::ObTransID tx_id, const transaction::ObTxSEQ &sql_sequence, bool &can_read) diff --git a/src/storage/tx_table/ob_tx_table_interface.h b/src/storage/tx_table/ob_tx_table_interface.h index 441dae335..7830769ce 100644 --- a/src/storage/tx_table/ob_tx_table_interface.h +++ b/src/storage/tx_table/ob_tx_table_interface.h @@ -70,6 +70,8 @@ public: // dalegate functions const transaction::ObTxSEQ &sql_sequence, storage::ObStoreRowLockState &lock_state); + int load_tx_op(const transaction::ObTransID &tx_id, ObTxData &tx_data); + int check_sql_sequence_can_read(const transaction::ObTransID tx_id, const transaction::ObTxSEQ &sql_sequence, bool &can_read); int get_tx_state_with_scn(const transaction::ObTransID tx_id, diff --git a/src/storage/tx_table/ob_tx_table_iterator.cpp b/src/storage/tx_table/ob_tx_table_iterator.cpp index 87def4b7e..57415ba95 100644 --- a/src/storage/tx_table/ob_tx_table_iterator.cpp +++ b/src/storage/tx_table/ob_tx_table_iterator.cpp @@ -73,6 +73,10 @@ int ObTxDataMemtableScanIterator::TxData2DatumRowConverter::init(ObTxData *tx_da ret = OB_INVALID_ARGUMENT; STORAGE_LOG(ERROR, "tx data is null", KR(ret)); } else if (INT64_MAX != tx_data->tx_id_.get_id()) {// normal tx data need local buffer to serialize + SpinRLockManualGuard tx_op_guard; + if (tx_data->op_guard_.is_valid()) { + tx_op_guard.lock(tx_data->op_guard_->get_lock()); + } buffer_len_ = tx_data->get_serialize_size(); if (nullptr == (serialize_buffer_ = (char *)DEFAULT_TX_DATA_ALLOCATOR.alloc(buffer_len_))) { ret = OB_ALLOCATE_MEMORY_FAILED; diff --git a/unittest/storage/tx/it/test_register_mds.cpp b/unittest/storage/tx/it/test_register_mds.cpp index 51956f986..4ad59a8f2 100644 --- a/unittest/storage/tx/it/test_register_mds.cpp +++ b/unittest/storage/tx/it/test_register_mds.cpp @@ -142,6 +142,7 @@ public: auto test_name = test_info->name(); MTL_MEM_ALLOC_MGR.init(); _TRANS_LOG(INFO, ">>>> starting test : %s", test_name); + LOG_INFO(">>>>>>starting>>>>>>>>", K(test_name)); } virtual void TearDown() override { @@ -151,6 +152,7 @@ public: _TRANS_LOG(INFO, ">>>> tearDown test : %s", test_name); ObClockGenerator::destroy(); ObMallocAllocator::get_instance()->recycle_tenant_allocator(1001); + LOG_INFO(">>>>>teardown>>>>>>>>", K(test_name)); } MsgBus bus_; }; diff --git a/unittest/storage/tx/mock_utils/basic_fake_define.h b/unittest/storage/tx/mock_utils/basic_fake_define.h index 2ec02762f..d821326ec 100644 --- a/unittest/storage/tx/mock_utils/basic_fake_define.h +++ b/unittest/storage/tx/mock_utils/basic_fake_define.h @@ -37,7 +37,10 @@ namespace transaction { class ObFakeTxDataTable : public ObTxDataTable { public: ObSliceAlloc slice_allocator_; - ObTenantTxDataAllocator *FAKE_ALLOCATOR = (ObTenantTxDataAllocator *)0x1; + ObTenantTxDataAllocator __FAKE_ALLOCATOR_OBJ; + ObTenantTxDataAllocator *FAKE_ALLOCATOR = &__FAKE_ALLOCATOR_OBJ; + ObTenantTxDataOpAllocator __FAKE_ALLOCATOR_OBJ2; + ObTenantTxDataOpAllocator *FAKE_ALLOCATOR2 = &__FAKE_ALLOCATOR_OBJ2; public: ObFakeTxDataTable() : arena_allocator_(), map_(arena_allocator_, 1 << 20 /*2097152*/) @@ -51,6 +54,8 @@ public: OB_ASSERT(OB_SUCCESS == slice_allocator_.init( sizeof(ObTxData), OB_MALLOC_NORMAL_BLOCK_SIZE, common::default_blk_alloc, mem_attr)); slice_allocator_.set_nway(32); + FAKE_ALLOCATOR->init("FAKE_A"); + FAKE_ALLOCATOR2->init(); is_inited_ = true; } virtual int init(ObLS *ls, ObTxCtxTable *tx_ctx_table) override @@ -70,6 +75,7 @@ public: ObTxData *tx_data = new (ptr) ObTxData(); tx_data->ref_cnt_ = 100; tx_data->tx_data_allocator_ = FAKE_ALLOCATOR; + tx_data->op_allocator_ = FAKE_ALLOCATOR2; tx_data_guard.init(tx_data); return OB_ISNULL(tx_data) ? OB_ALLOCATE_MEMORY_FAILED : OB_SUCCESS; } @@ -83,7 +89,6 @@ public: to->tx_data_allocator_ = FAKE_ALLOCATOR; to_guard.init(to); OX (*to = *from); - OZ (deep_copy_undo_status_list_(from->undo_status_list_, to->undo_status_list_)); return ret; } virtual void free_tx_data(ObTxData *tx_data) diff --git a/unittest/storage/tx_table/test_tx_ctx_table.cpp b/unittest/storage/tx_table/test_tx_ctx_table.cpp index 60bded38b..0e2d175fd 100644 --- a/unittest/storage/tx_table/test_tx_ctx_table.cpp +++ b/unittest/storage/tx_table/test_tx_ctx_table.cpp @@ -190,6 +190,7 @@ public: ObIMemtableMgr *mt_mgr_; ObTxCtxMemtableMgr *ctx_mt_mgr_; ObTenantTxDataAllocator tx_data_allocator_; + ObTenantTxDataOpAllocator tx_data_op_allocator_; ObTenantBase tenant_base_; }; @@ -304,6 +305,7 @@ TEST_F(TestTxCtxTable, test_tx_ctx_memtable_mgr) attr.tenant_id_ = MTL_ID(); tx_data_allocator_.init("test"); tx_data_table.tx_data_allocator_ = &tx_data_allocator_; + tx_data_op_allocator_.init(); ObTxPalfParam palf_param((logservice::ObLogHandler *)(0x01), (transaction::ObDupTableLSHandler *)(0x02));