From a1c2792ab7f0e0e631fed657d2d2cb374b6a933d Mon Sep 17 00:00:00 2001 From: windye Date: Thu, 23 Nov 2023 10:41:22 +0000 Subject: [PATCH] optimization for tablet meta iops [FEAT MERGE] Co-authored-by: Tyshawn --- mittest/mtlenv/storage/CMakeLists.txt | 2 +- .../blocksstable/test_shared_macro_block.cpp | 1 - .../mtlenv/storage/test_ls_tablet_service.cpp | 45 +- mittest/mtlenv/storage/test_macro_ref_cnt.cpp | 366 ------ .../test_shared_block_reader_writer.cpp | 83 +- .../storage/test_tablet_block_id_list.cpp | 461 +++++++ .../storage/test_tenant_meta_mem_mgr.cpp | 53 +- src/storage/CMakeLists.txt | 5 + .../index_block/ob_index_block_builder.cpp | 3 +- src/storage/blocksstable/ob_block_manager.cpp | 126 +- src/storage/blocksstable/ob_block_manager.h | 12 + .../ob_shared_macro_block_manager.cpp | 3 +- .../ob_shared_block_reader_writer.cpp | 197 ++- .../ob_shared_block_reader_writer.h | 61 +- .../ob_column_oriented_sstable.cpp | 39 - .../column_store/ob_column_oriented_sstable.h | 2 - src/storage/ls/ob_ls_tablet_service.cpp | 49 +- src/storage/ls/ob_ls_tablet_service.h | 18 + src/storage/meta_mem/ob_meta_obj_struct.cpp | 13 +- src/storage/meta_mem/ob_meta_obj_struct.h | 13 +- src/storage/meta_mem/ob_meta_pointer.h | 471 +++++++ .../meta_mem/ob_storage_meta_cache.cpp | 128 +- src/storage/meta_mem/ob_storage_meta_cache.h | 32 +- src/storage/meta_mem/ob_tablet_pointer.cpp | 19 +- src/storage/meta_mem/ob_tablet_pointer.h | 3 +- .../meta_mem/ob_tablet_pointer_map.cpp | 4 +- src/storage/ob_disk_usage_reporter.cpp | 102 +- src/storage/ob_disk_usage_reporter.h | 1 - .../ob_linked_macro_block_reader.cpp | 2 +- .../ob_linked_macro_block_writer.cpp | 2 +- .../ob_tablet_replay_create_handler.cpp | 562 ++++++++ .../ob_tablet_replay_create_handler.h | 160 +++ .../ob_tenant_checkpoint_slog_handler.cpp | 322 +---- .../ob_tenant_checkpoint_slog_handler.h | 61 +- .../ob_tenant_storage_checkpoint_writer.cpp | 8 +- src/storage/tablet/ob_tablet.cpp | 855 ++++++++++-- src/storage/tablet/ob_tablet.h | 75 +- .../ob_tablet_block_aggregated_info.cpp | 1156 +++++++++++++++++ .../tablet/ob_tablet_block_aggregated_info.h | 250 ++++ src/storage/tablet/ob_tablet_block_header.cpp | 194 +++ src/storage/tablet/ob_tablet_block_header.h | 135 ++ src/storage/tablet/ob_tablet_common.h | 5 + src/storage/tablet/ob_tablet_complex_addr.h | 2 +- .../tablet/ob_tablet_macro_info_iterator.cpp | 324 +++++ .../tablet/ob_tablet_macro_info_iterator.h | 73 ++ src/storage/tablet/ob_tablet_meta.cpp | 9 +- src/storage/tablet/ob_tablet_meta.h | 6 +- src/storage/tablet/ob_tablet_persister.cpp | 995 +++++++++++--- src/storage/tablet/ob_tablet_persister.h | 212 ++- src/storage/tablet/ob_tablet_space_usage.cpp | 105 ++ src/storage/tablet/ob_tablet_space_usage.h | 53 + src/storage/tablet/ob_tablet_table_store.cpp | 16 +- unittest/storage/test_tablet_pointer_map.cpp | 12 +- 53 files changed, 6460 insertions(+), 1446 deletions(-) delete mode 100644 mittest/mtlenv/storage/test_macro_ref_cnt.cpp create mode 100644 mittest/mtlenv/storage/test_tablet_block_id_list.cpp create mode 100644 src/storage/meta_mem/ob_meta_pointer.h create mode 100644 src/storage/slog_ckpt/ob_tablet_replay_create_handler.cpp create mode 100644 src/storage/slog_ckpt/ob_tablet_replay_create_handler.h create mode 100644 src/storage/tablet/ob_tablet_block_aggregated_info.cpp create mode 100644 src/storage/tablet/ob_tablet_block_aggregated_info.h create mode 100644 src/storage/tablet/ob_tablet_block_header.cpp create mode 100644 src/storage/tablet/ob_tablet_block_header.h create mode 100644 src/storage/tablet/ob_tablet_macro_info_iterator.cpp create mode 100644 src/storage/tablet/ob_tablet_macro_info_iterator.h create mode 100644 src/storage/tablet/ob_tablet_space_usage.cpp create mode 100644 src/storage/tablet/ob_tablet_space_usage.h diff --git a/mittest/mtlenv/storage/CMakeLists.txt b/mittest/mtlenv/storage/CMakeLists.txt index 6a19e37ea5..7c86b276c8 100644 --- a/mittest/mtlenv/storage/CMakeLists.txt +++ b/mittest/mtlenv/storage/CMakeLists.txt @@ -24,9 +24,9 @@ storage_dml_unittest(test_tablet_mds_data test_tablet_mds_data.cpp) storage_dml_unittest(test_mds_data_read_write test_mds_data_read_write.cpp) storage_unittest(test_physical_copy_task test_physical_copy_task.cpp) storage_unittest(test_shared_block_reader_writer) -storage_dml_unittest(test_macro_ref_cnt test_macro_ref_cnt.cpp) # storage_dml_unittest(test_multi_version_merge_recycle) # storage_unittest(test_speed_limit test_speed_limit.cpp) +storage_dml_unittest(test_tablet_block_id_list test_tablet_block_id_list.cpp) storage_dml_unittest(test_ls_tablet_info_writer_and_reader test_ls_tablet_info_writer_and_reader.cpp) add_subdirectory(checkpoint) diff --git a/mittest/mtlenv/storage/blocksstable/test_shared_macro_block.cpp b/mittest/mtlenv/storage/blocksstable/test_shared_macro_block.cpp index f882afc98c..4911535e9e 100644 --- a/mittest/mtlenv/storage/blocksstable/test_shared_macro_block.cpp +++ b/mittest/mtlenv/storage/blocksstable/test_shared_macro_block.cpp @@ -54,7 +54,6 @@ TEST_F(TestSharedMacroBlk, test_used_size_mgr) MacroBlockId id1(0, MacroBlockId::AUTONOMIC_BLOCK_INDEX, 0); const int64_t illegal_size = ObSharedMacroBlockMgr::SMALL_SSTABLE_STHRESHOLD_SIZE; OK(shared_mgr.add_block(id1, illegal_size)); - ASSERT_NE(OB_SUCCESS, shared_mgr.free_block(id1, illegal_size)); OK(shared_mgr.free_block(id1, illegal_size - 1)); OK(shared_mgr.free_block(id1, 1)); ASSERT_EQ(OB_ENTRY_NOT_EXIST, shared_mgr.block_used_size_.get(id1, size)); diff --git a/mittest/mtlenv/storage/test_ls_tablet_service.cpp b/mittest/mtlenv/storage/test_ls_tablet_service.cpp index a77f95d0e7..fd58300442 100644 --- a/mittest/mtlenv/storage/test_ls_tablet_service.cpp +++ b/mittest/mtlenv/storage/test_ls_tablet_service.cpp @@ -848,7 +848,7 @@ TEST_F(TestLSTabletService, test_serialize_sstable_full_and_shell) MacroBlockId macro_id; macro_id.set_block_index(1001); macro_id.set_write_seq(111); - addr.set_block_addr(macro_id, 0, 4096); + addr.set_block_addr(macro_id, 0, 4096, ObMetaDiskAddr::DiskType::BLOCK); ret = sstable.set_addr(addr); ASSERT_EQ(common::OB_SUCCESS, ret); @@ -1043,6 +1043,49 @@ TEST_F(TestLSTabletService, update_tablet_release_memtable_for_offline) ASSERT_EQ(OB_SUCCESS, ret); } +TEST_F(TestLSTabletService, update_tablet_ddl_commit_scn) +{ + int ret = OB_SUCCESS; + const int64_t inner_tablet_count = INNER_TABLET_CNT; + ObTabletID data_tablet_id(90000002222); + ObTabletMapKey key; + key.ls_id_ = ls_id_; + key.tablet_id_ = data_tablet_id; + share::schema::ObTableSchema data_schema; + + TestSchemaUtils::prepare_data_schema(data_schema); + + ObLSHandle ls_handle; + ObLSService *ls_svr = MTL(ObLSService*); + ret = ls_svr->get_ls(ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD); + ret = TestTabletHelper::create_tablet(ls_handle, data_tablet_id, data_schema, allocator_); + ASSERT_EQ(OB_SUCCESS, ret); + valid_tablet_num(inner_tablet_count); + ASSERT_EQ(1 + INNER_TABLET_CNT, MTL(ObTenantMetaMemMgr*)->tablet_map_.map_.size()); + ret = TestTabletHelper::create_tablet(ls_handle, data_tablet_id, data_schema, allocator_); + ASSERT_EQ(OB_ENTRY_EXIST, ret); + valid_tablet_num(inner_tablet_count); + ASSERT_EQ(1 + INNER_TABLET_CNT, MTL(ObTenantMetaMemMgr*)->tablet_map_.map_.size()); + + ObTabletHandle tablet_handle; + ASSERT_EQ(OB_SUCCESS, ls_handle.get_ls()->get_tablet_svr()->get_tablet(data_tablet_id, tablet_handle)); + ASSERT_EQ(SCN::min_scn(), tablet_handle.get_obj()->tablet_meta_.ddl_commit_scn_); + share::SCN ddl_commit_scn; + ddl_commit_scn.convert_for_logservice(100); + ASSERT_EQ(OB_NOT_SUPPORTED, ls_handle.get_ls()->get_tablet_svr()->update_tablet_ddl_commit_scn(data_tablet_id, ddl_commit_scn)); + + ObTabletHandle new_tablet_hdl; + ASSERT_EQ(OB_SUCCESS, ObTabletPersister::persist_and_transform_tablet(*tablet_handle.get_obj(), new_tablet_hdl)); + ASSERT_EQ(OB_SUCCESS, MTL(ObTenantMetaMemMgr *)->compare_and_swap_tablet(key, tablet_handle, new_tablet_hdl)); + + ASSERT_EQ(OB_SUCCESS, ls_handle.get_ls()->get_tablet_svr()->update_tablet_ddl_commit_scn(data_tablet_id, ddl_commit_scn)); + ASSERT_EQ(OB_SUCCESS, ls_handle.get_ls()->get_tablet_svr()->get_tablet(data_tablet_id, tablet_handle)); + ASSERT_EQ(ddl_commit_scn, tablet_handle.get_obj()->tablet_meta_.ddl_commit_scn_); + + ret = ls_tablet_service_->do_remove_tablet(key); + ASSERT_EQ(OB_SUCCESS, ret); +} + } // end storage } // end oceanbase diff --git a/mittest/mtlenv/storage/test_macro_ref_cnt.cpp b/mittest/mtlenv/storage/test_macro_ref_cnt.cpp deleted file mode 100644 index 98d8154154..0000000000 --- a/mittest/mtlenv/storage/test_macro_ref_cnt.cpp +++ /dev/null @@ -1,366 +0,0 @@ -/** - * Copyright (c) 2021 OceanBase - * OceanBase CE is licensed under Mulan PubL v2. - * You can use this software according to the terms and conditions of the Mulan PubL v2. - * You may obtain a copy of Mulan PubL v2 at: - * http://license.coscl.org.cn/MulanPubL-2.0 - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, - * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, - * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. - * See the Mulan PubL v2 for more details. - */ - -#include -#include - -#define USING_LOG_PREFIX STORAGE -#include - -#define protected public -#define private public - -#include "storage/ls/ob_ls.h" -#include "storage/ls/ob_ls_tablet_service.h" -#include "storage/meta_mem/ob_tenant_meta_mem_mgr.h" -#include "storage/tablet/ob_tablet_persister.h" -#include "mittest/mtlenv/storage/blocksstable/ob_index_block_data_prepare.h" -#include "storage/blocksstable/ob_block_manager.h" -#include "storage/schema_utils.h" -#include "share/ob_rpc_struct.h" -#include "storage/tablet/ob_tablet_create_delete_helper.h" - -namespace oceanbase -{ -namespace storage -{ -using namespace common; -using namespace blocksstable; -using namespace storage; - -class TestTabletRefCnt : public TestIndexBlockDataPrepare -{ -public: - TestTabletRefCnt(); - ~TestTabletRefCnt(); -}; - -TestTabletRefCnt::TestTabletRefCnt() - : TestIndexBlockDataPrepare( - "Test Tablet Ref Cnt", - compaction::MINI_MERGE, - false, - OB_DEFAULT_MACRO_BLOCK_SIZE, - 10000, - 65536) -{ -} - -TestTabletRefCnt::~TestTabletRefCnt() -{ -} - -void convert_ctx_to_map( - const common::ObSEArray &tablet_meta_write_ctxs, - const common::ObSEArray &sstable_meta_write_ctxs, - std::unordered_map &ref_cnts) -{ - MacroBlockId macro_id; - int64_t offset; - int64_t size; - ObBlockManager::BlockInfo block_info; - - for (int64_t i = 0; i < tablet_meta_write_ctxs.count(); i++) { - ASSERT_EQ(OB_SUCCESS, tablet_meta_write_ctxs[i].addr_.get_block_addr(macro_id, offset, size)); - if (ref_cnts.count(macro_id.block_index_) == 0) { - { - ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); - ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); - } - ref_cnts[macro_id.block_index_] = block_info.ref_cnt_; - } - } - - for (int64_t i = 0; i < tablet_meta_write_ctxs.count(); i++) { - ASSERT_EQ(OB_SUCCESS, tablet_meta_write_ctxs[i].addr_.get_block_addr(macro_id, offset, size)); - ref_cnts[macro_id.block_index_]++; - } - - for (int64_t i = 0; i < sstable_meta_write_ctxs.count(); i++) { - ASSERT_EQ(OB_SUCCESS, sstable_meta_write_ctxs[i].addr_.get_block_addr(macro_id, offset, size)); - ref_cnts[macro_id.block_index_]++; - } -} - -TEST_F(TestTabletRefCnt, test_persist_tablet) -{ - ObTabletID tablet_id(TestIndexBlockDataPrepare::tablet_id_); - ObLSID ls_id(ls_id_); - ObLSHandle ls_handle; - ObTabletHandle tablet_handle; - ObLSService *ls_svr = MTL(ObLSService*); - ASSERT_EQ(OB_SUCCESS, ls_svr->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD)); - ASSERT_EQ(OB_SUCCESS, ls_handle.get_ls()->get_tablet(tablet_id, tablet_handle)); - - common::ObArenaAllocator allocator; - common::ObSEArray tablet_meta_write_ctxs; - common::ObSEArray sstable_meta_write_ctxs; - ObTabletHandle new_tablet_handle; - std::unordered_map ref_cnts; - ObBlockManager::BlockInfo block_info; - - MacroBlockId macro_id; - int64_t offset; - int64_t size; - ObMacroBlockHandle macro_handle; - - // persist 4k tablet - ASSERT_EQ(OB_SUCCESS, ObTabletPersister::recursively_persist( - *(tablet_handle.get_obj()), allocator, tablet_meta_write_ctxs, sstable_meta_write_ctxs, new_tablet_handle)); - convert_ctx_to_map(tablet_meta_write_ctxs, sstable_meta_write_ctxs, ref_cnts); - ASSERT_EQ(OB_SUCCESS, ObTabletPersister::persist_4k_tablet(allocator, new_tablet_handle)); - ASSERT_EQ(OB_SUCCESS, new_tablet_handle.get_obj()->tablet_addr_.get_block_addr(macro_id, offset, size)); - ref_cnts[macro_id.block_index_]++; // tablet_meta_write_ctxs doesn't contain tablet_addr - - // check ref cnt - { - ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); - ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); - } - ASSERT_EQ(block_info.ref_cnt_, ref_cnts[macro_id.block_index_]); - for (int64_t i = 0; i < tablet_meta_write_ctxs.count(); i++) { - ASSERT_EQ(OB_SUCCESS, tablet_meta_write_ctxs[i].addr_.get_block_addr(macro_id, offset, size)); - { - ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); - ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); - } - ASSERT_EQ(block_info.ref_cnt_, ref_cnts[macro_id.block_index_]); - } - - // transform memory - ObTabletHandle tmp_tablet_handle; - ObTabletMapKey key(ls_id, tablet_id); - ASSERT_EQ(OB_SUCCESS, ObTabletCreateDeleteHelper::acquire_tablet_from_pool( - ObTabletPoolType::TP_LARGE, key, tmp_tablet_handle)); - ASSERT_EQ(OB_SUCCESS, ObTabletPersister::transform_tablet_memory_footprint( - *(new_tablet_handle.get_obj()), (char *)(tmp_tablet_handle.get_obj()), tmp_tablet_handle.get_buf_len())); - ASSERT_EQ(true, tmp_tablet_handle.get_obj()->hold_ref_cnt_); - tmp_tablet_handle.get_obj()->hold_ref_cnt_ = false; -} - -TEST_F(TestTabletRefCnt, test_meta_ref_cnt) -{ - int ret = OB_SUCCESS; - ObTabletID tablet_id(TestIndexBlockDataPrepare::tablet_id_); - ObLSID ls_id(ls_id_); - ObLSHandle ls_handle; - ObTabletHandle tablet_handle; - ObLSService *ls_svr = MTL(ObLSService*); - ASSERT_EQ(OB_SUCCESS, ls_svr->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD)); - ASSERT_EQ(OB_SUCCESS, ls_handle.get_ls()->get_tablet(tablet_id, tablet_handle)); - - int64_t offset = 0; - int64_t size = 0; - ObBlockManager::BlockInfo block_info; - MacroBlockId table_store_id; - ObMacroBlockHandle macro_handle; - int64_t ref_cnt = 0; - - ObTablet *tablet = tablet_handle.get_obj(); - ObTabletHandle new_tablet_handle; - - // persist 4k tablet - ASSERT_EQ(OB_SUCCESS, MTL(ObTenantCheckpointSlogHandler*)->get_shared_block_reader_writer().switch_block(macro_handle)); - ASSERT_EQ(OB_SUCCESS, ObTabletPersister::persist_and_transform_tablet(*tablet, new_tablet_handle)); - ASSERT_EQ(OB_SUCCESS, MTL(ObTenantCheckpointSlogHandler*)->get_shared_block_reader_writer().switch_block(macro_handle)); - ObTablet *new_tablet = new_tablet_handle.get_obj(); - ASSERT_EQ(OB_SUCCESS, new_tablet->table_store_addr_.addr_.get_block_addr(table_store_id, offset, size)); - { - ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, table_store_id.hash()); - ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(table_store_id, block_info)); - } - ref_cnt = block_info.ref_cnt_; - - // increase macro ref cnt - ASSERT_EQ(OB_SUCCESS, new_tablet->inc_macro_ref_cnt()); - { - ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, table_store_id.hash()); - ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(table_store_id, block_info)); - } - ASSERT_EQ(ref_cnt * 2, block_info.ref_cnt_); - - // decrease macro ref cnt - new_tablet->dec_macro_ref_cnt(); - { - ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, table_store_id.hash()); - ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(table_store_id, block_info)); - } - ASSERT_EQ(ref_cnt, block_info.ref_cnt_); - - // deserialize tablet - ObTenantCheckpointSlogHandler *ckpt_handler = MTL(ObTenantCheckpointSlogHandler*); - ObTabletHandle tmp_tablet_handle; - ObTabletMapKey key(ls_id, tablet_id); - char *buf = nullptr; - int64_t buf_len = 0; - int64_t pos = 0; - ObArenaAllocator allocator; - ASSERT_EQ(OB_SUCCESS, ckpt_handler->read_from_disk(new_tablet->tablet_addr_, allocator, buf, buf_len)); - ASSERT_EQ(OB_SUCCESS, ObTabletCreateDeleteHelper::acquire_tmp_tablet(key, allocator, tmp_tablet_handle)); - tmp_tablet_handle.get_obj()->tablet_addr_ = new_tablet->tablet_addr_; - ASSERT_EQ(OB_SUCCESS, tmp_tablet_handle.get_obj()->deserialize(allocator, buf, buf_len, pos)); - { - ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, table_store_id.hash()); - ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(table_store_id, block_info)); - } - ASSERT_EQ(ref_cnt * 2, block_info.ref_cnt_); -} - -TEST_F(TestTabletRefCnt, test_data_ref_cnt) -{ - ObBlockManager::BlockInfo block_info; - MacroBlockId macro_id; - int64_t ref_cnt = 0; - common::ObArenaAllocator tmp_allocator("CacheSST"); - ObSafeArenaAllocator safe_allocator(tmp_allocator); - ObSSTableMetaHandle meta_handle; - ObMacroIdIterator iterator; - ASSERT_EQ(OB_SUCCESS, sstable_.get_meta(meta_handle, &safe_allocator)); - ASSERT_EQ(OB_SUCCESS, meta_handle.get_sstable_meta().get_macro_info().get_data_block_iter(iterator)); - ASSERT_EQ(OB_SUCCESS, iterator.get_next_macro_id(macro_id)); - - { - ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); - ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); - } - ref_cnt = block_info.ref_cnt_; - - // increase macro ref cnt - bool inc_success; - ASSERT_EQ(OB_SUCCESS, sstable_.inc_macro_ref(inc_success)); - { - ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); - ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); - } - ASSERT_EQ(ref_cnt + 1, block_info.ref_cnt_); - - // decrease macro ref cnt - sstable_.dec_macro_ref(); - { - ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); - ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); - } - ASSERT_EQ(ref_cnt, block_info.ref_cnt_); -} - -TEST_F(TestTabletRefCnt, test_empty_shell_macro_ref_cnt) -{ - int ret = OB_SUCCESS; - ObLSID ls_id(ls_id_); - ObTabletID tablet_id(10000009); - share::schema::ObTableSchema schema; - TestSchemaUtils::prepare_data_schema(schema); - ObTablet *tablet = nullptr; - ObLSHandle ls_handle; - ObLSService *ls_svr = MTL(ObLSService*); - ASSERT_EQ(OB_SUCCESS, ls_svr->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD)); - ObLSTabletService *ls_tablet_svr = ls_handle.get_ls()->get_tablet_svr(); - - // create and get empty shell - ret = TestTabletHelper::create_tablet(ls_handle, tablet_id, schema, allocator_, ObTabletStatus::Status::DELETED); - ASSERT_EQ(OB_SUCCESS, ret); - ret = ls_tablet_svr->update_tablet_to_empty_shell(tablet_id); - ASSERT_EQ(OB_SUCCESS, ret); - ObTabletMapKey key(ls_id, tablet_id); - ObTabletHandle tablet_handle; - ret = ls_tablet_svr->get_tablet(tablet_id, tablet_handle, 0, ObMDSGetTabletMode::READ_WITHOUT_CHECK); - tablet = tablet_handle.get_obj(); - - // check increasing macro ref cnt for empty shell tablet with file addr - ASSERT_EQ(false, tablet->hold_ref_cnt_); - ASSERT_EQ(OB_SUCCESS, tablet->inc_macro_ref_cnt()); - ASSERT_EQ(true, tablet->hold_ref_cnt_); - - // check increasing macro ref cnt for empty shell tablet with file addr - MacroBlockId macro_id; - int64_t offset; - int64_t size; - ObBlockManager::BlockInfo block_info; - int64_t ref_cnt = 0; - ObTabletHandle new_tablet_handle; - ASSERT_EQ(OB_SUCCESS, ObTabletPersister::persist_and_transform_tablet(*tablet, new_tablet_handle)); - ObTablet *new_tablet = new_tablet_handle.get_obj(); - ASSERT_EQ(OB_SUCCESS, new_tablet->tablet_addr_.get_block_addr(macro_id, offset, size)); - { - ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); - ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); - } - ref_cnt = block_info.ref_cnt_; - - ASSERT_EQ(OB_SUCCESS, new_tablet->inc_macro_ref_cnt()); - { - ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); - ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); - } - ASSERT_EQ(ref_cnt + 1, block_info.ref_cnt_); - - new_tablet->dec_macro_ref_cnt(); - { - ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); - ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); - } - ASSERT_EQ(ref_cnt, block_info.ref_cnt_); -} - -TEST_F(TestTabletRefCnt, test_linked_block_ref_cnt) -{ - int ret = OB_SUCCESS; - ObMacroBlockHandle tmp_handle; - ObSharedBlockReaderWriter &shared_rw = MTL(ObTenantCheckpointSlogHandler*)->get_shared_block_reader_writer(); - ASSERT_EQ(OB_SUCCESS, shared_rw.switch_block(tmp_handle)); - common::ObArenaAllocator arena_allocator("unittest"); - ObSharedBlocksWriteCtx write_ctx; - static const int64_t BLOCK_CNT = 10; - - // write linked blocks and wait - char *buffer = static_cast(arena_allocator.alloc(4096)); - ObSharedBlockWriteInfo write_info; - ObSharedBlockLinkHandle write_handle; - write_info.buffer_ = buffer; - write_info.offset_ = 0; - write_info.size_ = 4096; - write_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_COMPACT_WRITE); - for (int64_t i = 0; i < BLOCK_CNT; i++) { - ASSERT_EQ(OB_SUCCESS, shared_rw.async_link_write(write_info, write_handle)); - } - ASSERT_EQ(OB_SUCCESS, write_handle.get_write_ctx(write_ctx)); - - // increase macro blocks' ref cnt and check - bool inc_success = false; - ObBlockManager::BlockInfo block_info; - MacroBlockId macro_id = write_ctx.addr_.block_id(); - { - ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); - ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); - } - int64_t ref_cnt = block_info.ref_cnt_; - - ASSERT_EQ(OB_SUCCESS, ObTablet::inc_linked_block_ref_cnt(write_ctx.addr_, inc_success)); - { - ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); - ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); - } - ASSERT_EQ(ref_cnt + BLOCK_CNT, block_info.ref_cnt_); -} - -} // storage -} // oceanbase - -int main(int argc, char **argv) -{ - system("rm -f test_tablet_ref_cnt.log*"); - OB_LOGGER.set_file_name("test_tablet_ref_cnt.log", true, true); - oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); - oceanbase::common::ObClusterVersion::get_instance().init(CLUSTER_VERSION_4_1_0_0); - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} \ No newline at end of file diff --git a/mittest/mtlenv/storage/test_shared_block_reader_writer.cpp b/mittest/mtlenv/storage/test_shared_block_reader_writer.cpp index 1236aaeff0..435be8208f 100644 --- a/mittest/mtlenv/storage/test_shared_block_reader_writer.cpp +++ b/mittest/mtlenv/storage/test_shared_block_reader_writer.cpp @@ -148,13 +148,14 @@ TEST_F(TestSharedBlockRWriter, test_rwrite_easy_block) ObSharedBlocksWriteCtx write_ctx; ObSharedBlockReadInfo read_info; OK(write_handle.get_write_ctx(write_ctx)); - ASSERT_EQ(write_ctx.addr_.size_, 10 + sizeof(ObSharedBlockHeader)); + ASSERT_EQ(write_ctx.addr_.size_, 10); if (test_round == 9) { macro_id = write_ctx.addr_.block_id(); } read_info.addr_ = write_ctx.addr_; - ASSERT_EQ(read_info.addr_.size_, 10 + sizeof(ObSharedBlockHeader)); + ASSERT_EQ(read_info.addr_.size_, 10); + ASSERT_TRUE(read_info.addr_.is_raw_block()); read_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_COMPACT_READ); OK(rwriter.async_read(read_info, read_handle)); char *buf = nullptr; @@ -302,14 +303,12 @@ TEST_F(TestSharedBlockRWriter, test_cb_single_write) ObStorageMetaCache::ObStorageMetaIOCallback *cb = nullptr; callback_buf = allocator_.alloc(sizeof(ObStorageMetaCache::ObStorageMetaIOCallback)); ASSERT_NE(nullptr, callback_buf); - cb = new (callback_buf) ObStorageMetaCache::ObStorageMetaIOCallback; - cb->meta_type_ = meta_type; - cb->offset_ = meta_key.get_meta_addr().offset(); - cb->buf_size_ = meta_key.get_meta_addr().size(); - cb->handle_ = meta_handle.cache_handle_; - cb->allocator_ = &allocator_; - cb->tablet_= fake_tablet; - cb->key_ = meta_key; + cb = new (callback_buf) ObStorageMetaCache::ObStorageMetaIOCallback(&allocator_, + meta_type, + meta_key, + meta_handle.cache_handle_, + fake_tablet, + nullptr); ObSharedBlockReadInfo read_info; ObSharedBlockReadHandle read_handle; read_info.addr_ = write_ctx.addr_; @@ -377,14 +376,12 @@ TEST_F(TestSharedBlockRWriter, test_cb_batch_write) ObStorageMetaCache::ObStorageMetaIOCallback *cb = nullptr; callback_buf = allocator_.alloc(sizeof(ObStorageMetaCache::ObStorageMetaIOCallback)); ASSERT_NE(nullptr, callback_buf); - cb = new (callback_buf) ObStorageMetaCache::ObStorageMetaIOCallback; - cb->meta_type_ = meta_type; - cb->offset_ = meta_key.get_meta_addr().offset(); - cb->buf_size_ = meta_key.get_meta_addr().size(); - cb->handle_ = meta_handle.cache_handle_; - cb->allocator_ = &allocator_; - cb->tablet_= fake_tablet; - cb->key_ = meta_key; + cb = new (callback_buf) ObStorageMetaCache::ObStorageMetaIOCallback(&allocator_, + meta_type, + meta_key, + meta_handle.cache_handle_, + fake_tablet, + nullptr); ObSharedBlockReadInfo read_info; ObSharedBlockReadHandle read_handle; read_info.addr_ = write_ctxs[test_round].addr_; @@ -399,6 +396,54 @@ TEST_F(TestSharedBlockRWriter, test_cb_batch_write) ASSERT_EQ(sstable_size, buf_len); } +TEST_F(TestSharedBlockRWriter, test_parse_data_from_macro_block) +{ + ObSharedBlockReaderWriter rwriter; + OK(rwriter.init(true/*need align*/, false/*need cross*/)); + int test_round = 10; + + ObSharedBlockWriteInfo write_info; + ObArray write_infos; + char s[10][20]; + for (int i = 0; i < test_round; ++i) { + for (int j = 0; j < 20; ++j) { + s[i][j] = '0' + i; + } + write_info.buffer_ = s[i]; + write_info.offset_ = 0; + write_info.size_ = 20; + write_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_COMPACT_WRITE); + write_infos.push_back(write_info); + } + + ObSharedBlockBatchHandle write_handle; + OK(rwriter.async_batch_write(write_infos, write_handle)); + ASSERT_TRUE(write_handle.is_valid()); + + ObArray write_ctxs; + OK(write_handle.batch_get_write_ctx(write_ctxs)); + ASSERT_EQ(test_round, write_ctxs.count()); + MacroBlockId block_id = write_ctxs[0].addr_.block_id(); + ObMacroBlockHandle macro_handle; + ObMacroBlockReadInfo read_info; + const int64_t io_buf_size = OB_SERVER_BLOCK_MGR.get_macro_block_size(); + read_info.offset_ = 0; + read_info.size_ = io_buf_size; + read_info.io_desc_.set_mode(ObIOMode::READ); + read_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_COMPACT_READ); + read_info.io_desc_.set_group_id(ObIOModule::SHARED_BLOCK_RW_IO); + read_info.macro_block_id_ = block_id; + read_info.buf_ = static_cast(allocator_.alloc(io_buf_size)); + OK(ObBlockManager::read_block(read_info, macro_handle)); + char *buf = nullptr; + int64_t buf_len = 0; + for (int i = 0; i < test_round; ++i) { + OK(ObSharedBlockReaderWriter::parse_data_from_macro_block(macro_handle, write_ctxs[i].addr_, buf, buf_len)); + ASSERT_EQ(20, buf_len); + ASSERT_EQ(0, MEMCMP(s[i], buf, 20)); + } +} + TEST_F(TestSharedBlockRWriter, test_batch_write_switch_block) { // test switch block when batch write, which means hanging_=true @@ -488,7 +533,7 @@ TEST_F(TestSharedBlockRWriter, test_batch_write_bug1) common_header.set_attr(ObMacroBlockCommonHeader::MacroBlockType::SharedMetaData); const int64_t header_size = common_header.get_serialize_size(); ASSERT_EQ(addr.offset_, 0 + header_size); - ASSERT_EQ(addr.size_, data_size + sizeof(ObSharedBlockHeader)); + ASSERT_EQ(addr.size_, data_size); ASSERT_EQ(rwriter.offset_, 4096); ASSERT_EQ(rwriter.align_offset_, 4096); } diff --git a/mittest/mtlenv/storage/test_tablet_block_id_list.cpp b/mittest/mtlenv/storage/test_tablet_block_id_list.cpp new file mode 100644 index 0000000000..9df271fd03 --- /dev/null +++ b/mittest/mtlenv/storage/test_tablet_block_id_list.cpp @@ -0,0 +1,461 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include +#define private public +#define protected public +#include "storage/tablet/ob_tablet_block_aggregated_info.h" +#include "storage/slog_ckpt/ob_linked_macro_block_writer.h" +#include "storage/tablet/ob_tablet_persister.h" +#include "mittest/mtlenv/storage/blocksstable/ob_index_block_data_prepare.h" +#include "storage/schema_utils.h" +#include "storage/tablet/ob_tablet_macro_info_iterator.h" + +using namespace oceanbase::blocksstable; +namespace oceanbase +{ +namespace storage +{ +const int64_t TEST_LINKED_NUM = ObTabletMacroInfo::ID_COUNT_THRESHOLD / 3; +class TestBlockIdList : public TestIndexBlockDataPrepare +{ +public: + TestBlockIdList(); + virtual ~TestBlockIdList() = default; + int init_info_set(ObArenaAllocator &allocator, const int64_t id_count, ObBlockInfoSet &info_set); +}; + +TestBlockIdList::TestBlockIdList() + : TestIndexBlockDataPrepare( + "Test Tablet Ref Cnt", + MINI_MERGE, + OB_DEFAULT_MACRO_BLOCK_SIZE, + 10000, + 65536) +{ +} + +int TestBlockIdList::init_info_set(ObArenaAllocator &allocator, const int64_t id_count, ObBlockInfoSet &info_set) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < id_count; i++) { + MacroBlockId tmp_macro_id(i + 1, i + 1, 0); + if (OB_FAIL(info_set.data_block_info_set_.set_refactored(tmp_macro_id))) { + LOG_WARN("fail to set refactored for info set", K(ret), K(tmp_macro_id)); + } else if (OB_FAIL(info_set.meta_block_info_set_.set_refactored(tmp_macro_id))) { + LOG_WARN("fail to set refactored for info set", K(ret), K(tmp_macro_id)); + } else if (OB_FAIL(info_set.shared_meta_block_info_set_.set_refactored(tmp_macro_id))) { + LOG_WARN("fail to set refactored for info set", K(ret), K(tmp_macro_id)); + } else if (OB_FAIL(info_set.shared_data_block_info_map_.set_refactored(tmp_macro_id, i + 5))) { + LOG_WARN("fail to set refactored for info set", K(ret), K(tmp_macro_id)); + } + } + return ret; +} + +TEST_F(TestBlockIdList, test_id_list) +{ + ObBlockInfoSet info_set; + ObArenaAllocator allocator; + ObTabletMacroInfo macro_info; + ObLinkedMacroBlockItemWriter linked_writer; + ObBlockManager::BlockInfo block_info; + bool inc_success = false; + + // empty set + ASSERT_EQ(OB_SUCCESS, info_set.init()); + ASSERT_EQ(OB_SUCCESS, macro_info.init(allocator, info_set, linked_writer)); + + // normal set + macro_info.reset(); + MacroBlockId macro_id(0, 0, 0); + info_set.data_block_info_set_.set_refactored(macro_id); + info_set.meta_block_info_set_.set_refactored(macro_id); + info_set.shared_meta_block_info_set_.set_refactored(macro_id); + info_set.shared_data_block_info_map_.set_refactored(macro_id, 10); + ASSERT_EQ(OB_SUCCESS, macro_info.init(allocator, info_set, linked_writer)); + ASSERT_EQ(true, IS_EMPTY_BLOCK_LIST(macro_info.entry_block_)); + ASSERT_EQ(1, macro_info.shared_data_block_info_arr_.cnt_); + ASSERT_EQ(1, macro_info.shared_meta_block_info_arr_.cnt_); + ASSERT_EQ(1, macro_info.data_block_info_arr_.cnt_); + ASSERT_EQ(1, macro_info.meta_block_info_arr_.cnt_); + ObSArray meta_block_arr; + ObSArray data_block_arr; + ObSArray shared_meta_block_arr; + ObSArray shared_data_block_arr; + ASSERT_EQ(OB_SUCCESS, macro_info.get_all_macro_ids(meta_block_arr, data_block_arr, shared_meta_block_arr, shared_data_block_arr)); + ASSERT_EQ(macro_id, meta_block_arr.at(0)); + ASSERT_EQ(macro_id, data_block_arr.at(0)); + ASSERT_EQ(macro_id, shared_meta_block_arr.at(0)); + ASSERT_EQ(macro_id, shared_data_block_arr.at(0)); + + inc_success = false; + ASSERT_EQ(OB_SUCCESS, macro_info.inc_macro_ref(inc_success)); + ASSERT_EQ(true, inc_success); + { + ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); + ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); + } + ASSERT_EQ(4, block_info.ref_cnt_); // four arrars all have this macro id + + macro_info.dec_macro_ref(); + { + ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); + ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); + } + ASSERT_EQ(0, block_info.ref_cnt_); + + info_set.data_block_info_set_.reuse(); + info_set.meta_block_info_set_.reuse(); + info_set.shared_meta_block_info_set_.reuse(); + info_set.shared_data_block_info_map_.reuse(); + + // large set + int64_t linked_ref_cnt = 0; + macro_info.reset(); + macro_id = MacroBlockId(1, 1, 0); + ASSERT_EQ(OB_SUCCESS, init_info_set(allocator, TEST_LINKED_NUM, info_set)); + ASSERT_EQ(OB_SUCCESS, macro_info.init(allocator, info_set, linked_writer)); + ASSERT_EQ(false, IS_EMPTY_BLOCK_LIST(macro_info.entry_block_)); + ASSERT_EQ(TEST_LINKED_NUM, macro_info.shared_data_block_info_arr_.cnt_); + ASSERT_EQ(TEST_LINKED_NUM, macro_info.shared_meta_block_info_arr_.cnt_); + ASSERT_EQ(TEST_LINKED_NUM, macro_info.data_block_info_arr_.cnt_); + ASSERT_EQ(TEST_LINKED_NUM, macro_info.meta_block_info_arr_.cnt_); + meta_block_arr.reset(); + data_block_arr.reset(); + shared_meta_block_arr.reset(); + shared_data_block_arr.reset(); + ASSERT_EQ(OB_SUCCESS, macro_info.get_all_macro_ids(meta_block_arr, data_block_arr, shared_meta_block_arr, shared_data_block_arr)); + ASSERT_EQ(TEST_LINKED_NUM, meta_block_arr.count()); + + ObIArray &linked_ids = linked_writer.get_meta_block_list(); + MacroBlockId &linked_id = linked_ids.at(0); + { + ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, linked_id.hash()); + ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(linked_id, block_info)); + } + linked_ref_cnt = block_info.ref_cnt_; + + inc_success = false; + int64_t linked_macro_ref = 0; + ASSERT_EQ(OB_SUCCESS, macro_info.inc_macro_ref(inc_success)); + ASSERT_EQ(true, inc_success); + { + ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); + ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); + } + ASSERT_EQ(4, block_info.ref_cnt_); // four arrars all have this macro id + + { + ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, linked_id.hash()); + ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(linked_id, block_info)); + } + ASSERT_EQ(linked_ref_cnt + 1, block_info.ref_cnt_); + + inc_success = false; + ASSERT_EQ(OB_SUCCESS, macro_info.inc_macro_ref(inc_success)); + ASSERT_EQ(true, inc_success); + { + ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); + ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); + } + ASSERT_EQ(8, block_info.ref_cnt_); + + { + ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, linked_id.hash()); + ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(linked_id, block_info)); + } + ASSERT_EQ(linked_ref_cnt + 2, block_info.ref_cnt_); +} + +TEST_F(TestBlockIdList, test_meta_macro_ref_cnt) +{ + ObTabletID tablet_id(TestIndexBlockDataPrepare::tablet_id_); + ObLSID ls_id(ls_id_); + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + ObLSService *ls_svr = MTL(ObLSService*); + ASSERT_EQ(OB_SUCCESS, ls_svr->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD)); + ASSERT_EQ(OB_SUCCESS, ls_handle.get_ls()->get_tablet(tablet_id, tablet_handle)); + + int64_t offset = 0; + int64_t size = 0; + ObBlockManager::BlockInfo block_info; + MacroBlockId table_store_id; + ObMacroBlockHandle macro_handle; + int64_t ref_cnt = 0; + + ObTablet *tablet = tablet_handle.get_obj(); + ObTabletHandle new_tablet_handle; + + // persist 4k tablet + ASSERT_EQ(OB_SUCCESS, MTL(ObTenantCheckpointSlogHandler*)->get_shared_block_reader_writer().switch_block(macro_handle)); + ASSERT_EQ(OB_SUCCESS, ObTabletPersister::persist_and_transform_tablet(*tablet, new_tablet_handle)); + ASSERT_EQ(OB_SUCCESS, MTL(ObTenantCheckpointSlogHandler*)->get_shared_block_reader_writer().switch_block(macro_handle)); + ObTablet *new_tablet = new_tablet_handle.get_obj(); + + ASSERT_EQ(OB_SUCCESS, new_tablet->table_store_addr_.addr_.get_block_addr(table_store_id, offset, size)); + { + ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, table_store_id.hash()); + ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(table_store_id, block_info)); + } + ASSERT_EQ(1, block_info.ref_cnt_); + + ASSERT_EQ(OB_SUCCESS, new_tablet->inc_macro_ref_cnt()); + { + ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, table_store_id.hash()); + ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(table_store_id, block_info)); + } + ASSERT_EQ(2, block_info.ref_cnt_); + + new_tablet->dec_macro_ref_cnt(); + { + ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, table_store_id.hash()); + ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(table_store_id, block_info)); + } + ASSERT_EQ(1, block_info.ref_cnt_); +} + +TEST_F(TestBlockIdList, test_info_iterator) +{ + ObMacroInfoIterator macro_iter; + ObLinkedMacroBlockItemWriter linked_writer; + ObArenaAllocator allocator; + ObTabletBlockInfo block_info; + ObTabletID tablet_id(TestIndexBlockDataPrepare::tablet_id_); + ObLSID ls_id(ls_id_); + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + ObLSService *ls_svr = MTL(ObLSService*); + ASSERT_EQ(OB_SUCCESS, ls_svr->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD)); + ASSERT_EQ(OB_SUCCESS, ls_handle.get_ls()->get_tablet(tablet_id, tablet_handle)); + ObTablet *tablet = tablet_handle.get_obj(); + tablet->macro_info_addr_.addr_.set_mem_addr(0, sizeof(ObTabletMacroInfo)); + + // linked macro info + ObBlockInfoSet info_set; + ObTabletMacroInfo macro_info; + tablet->macro_info_addr_.ptr_ = ¯o_info; + ASSERT_EQ(OB_SUCCESS, info_set.init()); + ASSERT_EQ(OB_SUCCESS, init_info_set(allocator, TEST_LINKED_NUM, info_set)); + ASSERT_EQ(OB_SUCCESS, macro_info.init(allocator, info_set, linked_writer)); + ASSERT_EQ(OB_SUCCESS, macro_iter.init(ObTabletMacroType::MAX, *tablet)); + for (int64_t i = 0; i < TEST_LINKED_NUM * 4; i++) { + ASSERT_EQ(OB_SUCCESS, macro_iter.get_next(block_info)); + ASSERT_EQ(OB_HASH_EXIST, info_set.data_block_info_set_.exist_refactored(block_info.macro_id_)); + } + ASSERT_EQ(OB_ITER_END, macro_iter.get_next(block_info)); + + macro_iter.destroy(); // iterate targeted ids + ASSERT_NE(OB_SUCCESS, macro_iter.init(ObTabletMacroType::INVALID_TYPE, *tablet)); + ASSERT_EQ(OB_SUCCESS, macro_iter.init(ObTabletMacroType::SHARED_DATA_BLOCK, *tablet)); + for (int64_t i = 0; i < TEST_LINKED_NUM; i++) { + ASSERT_EQ(OB_SUCCESS, macro_iter.get_next(block_info)); + ASSERT_EQ(OB_HASH_EXIST, info_set.data_block_info_set_.exist_refactored(block_info.macro_id_)); + ASSERT_EQ(ObTabletMacroType::SHARED_DATA_BLOCK, block_info.block_type_); + ASSERT_NE(OB_DEFAULT_MACRO_BLOCK_SIZE, block_info.occupy_size_); + } + ASSERT_EQ(OB_ITER_END, macro_iter.get_next(block_info)); + + // memory macro info + linked_writer.reset(); + ObBlockInfoSet info_set_2; + ObTabletMacroInfo macro_info_2; + tablet->macro_info_addr_.ptr_ = ¯o_info_2; + ASSERT_EQ(OB_SUCCESS, info_set_2.init()); + ASSERT_EQ(OB_SUCCESS, init_info_set(allocator, 15, info_set_2)); + ASSERT_EQ(OB_SUCCESS, macro_info_2.init(allocator, info_set_2, linked_writer)); + macro_iter.destroy(); + ASSERT_EQ(OB_SUCCESS, macro_iter.init(ObTabletMacroType::MAX, *tablet)); + for (int64_t i = 0; i < 60; i++) { + ASSERT_EQ(OB_SUCCESS, macro_iter.get_next(block_info)); + ASSERT_EQ(OB_HASH_EXIST, info_set_2.data_block_info_set_.exist_refactored(block_info.macro_id_)); + } + ASSERT_EQ(OB_ITER_END, macro_iter.get_next(block_info)); + + macro_iter.destroy(); // iterate targeted ids + ASSERT_EQ(OB_SUCCESS, macro_iter.init(ObTabletMacroType::META_BLOCK, *tablet)); + for (int64_t i = 0; i < 15; i++) { + ASSERT_EQ(OB_SUCCESS, macro_iter.get_next(block_info)); + ASSERT_EQ(OB_HASH_EXIST, info_set.data_block_info_set_.exist_refactored(block_info.macro_id_)); + ASSERT_EQ(ObTabletMacroType::META_BLOCK, block_info.block_type_); + ASSERT_EQ(OB_DEFAULT_MACRO_BLOCK_SIZE, block_info.occupy_size_); + } + ASSERT_EQ(OB_ITER_END, macro_iter.get_next(block_info)); + + // empty macro info + linked_writer.reset(); + ObBlockInfoSet info_set_3; + ObTabletMacroInfo macro_info_3; + tablet->macro_info_addr_.ptr_ = ¯o_info_3; + ASSERT_EQ(OB_SUCCESS, info_set_3.init()); + ASSERT_EQ(OB_SUCCESS, macro_info_3.init(allocator, info_set_3, linked_writer)); + macro_iter.destroy(); + ASSERT_EQ(OB_SUCCESS, macro_iter.init(ObTabletMacroType::MAX, *tablet)); + ASSERT_EQ(OB_ITER_END, macro_iter.get_next(block_info)); + + // linked macro info without meta_block_id and shared_meta_block_id + linked_writer.reset(); + ObBlockInfoSet info_set_4; + ObTabletMacroInfo macro_info_4; + tablet->macro_info_addr_.ptr_ = ¯o_info_4; + ASSERT_EQ(OB_SUCCESS, info_set_4.init()); + for (int64_t i = 0; i < ObTabletMacroInfo::ID_COUNT_THRESHOLD; i++) { + MacroBlockId tmp_macro_id(i + 1, i + 1, 0); + ASSERT_EQ(OB_SUCCESS, info_set_4.data_block_info_set_.set_refactored(tmp_macro_id)); + ASSERT_EQ(OB_SUCCESS, info_set_4.shared_data_block_info_map_.set_refactored(tmp_macro_id, i + 5)); + } + ASSERT_EQ(OB_SUCCESS, macro_info_4.init(allocator, info_set_4, linked_writer)); + macro_iter.destroy(); + ASSERT_EQ(OB_SUCCESS, macro_iter.init(ObTabletMacroType::MAX, *tablet)); + for (int64_t i = 0; i < ObTabletMacroInfo::ID_COUNT_THRESHOLD * 2; i++) { + ASSERT_EQ(OB_SUCCESS, macro_iter.get_next(block_info)); + ASSERT_EQ(OB_HASH_EXIST, info_set_4.data_block_info_set_.exist_refactored(block_info.macro_id_)); + } + ASSERT_EQ(OB_ITER_END, macro_iter.get_next(block_info)); + + // memory macro info without meta_block_id and shared_meta_block_id + linked_writer.reset(); + ObBlockInfoSet info_set_5; + ObTabletMacroInfo macro_info_5; + tablet->macro_info_addr_.ptr_ = ¯o_info_5; + static const int64_t memory_id_cnt = 100; + ASSERT_EQ(OB_SUCCESS, info_set_5.init()); + for (int64_t i = 0; i < memory_id_cnt; i++) { + MacroBlockId tmp_macro_id(i + 1, i + 1, 0); + ASSERT_EQ(OB_SUCCESS, info_set_5.data_block_info_set_.set_refactored(tmp_macro_id)); + ASSERT_EQ(OB_SUCCESS, info_set_5.shared_data_block_info_map_.set_refactored(tmp_macro_id, i + 5)); + } + ASSERT_EQ(OB_SUCCESS, macro_info_5.init(allocator, info_set_5, linked_writer)); + macro_iter.destroy(); + ASSERT_EQ(OB_SUCCESS, macro_iter.init(ObTabletMacroType::MAX, *tablet)); + for (int64_t i = 0; i < memory_id_cnt * 2; i++) { + ASSERT_EQ(OB_SUCCESS, macro_iter.get_next(block_info)); + ASSERT_EQ(OB_HASH_EXIST, info_set_5.data_block_info_set_.exist_refactored(block_info.macro_id_)); + } + ASSERT_EQ(OB_ITER_END, macro_iter.get_next(block_info)); + tablet->macro_info_addr_.ptr_ = nullptr; +} + +TEST_F(TestBlockIdList, test_empty_shell_macro_ref_cnt) +{ + int ret = OB_SUCCESS; + ObLSID ls_id(ls_id_); + ObTabletID tablet_id(10000009); + share::schema::ObTableSchema schema; + TestSchemaUtils::prepare_data_schema(schema); + ObTablet *tablet = nullptr; + ObLSHandle ls_handle; + ObLSService *ls_svr = MTL(ObLSService*); + ASSERT_EQ(OB_SUCCESS, ls_svr->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD)); + ObLSTabletService *ls_tablet_svr = ls_handle.get_ls()->get_tablet_svr(); + + // create and get empty shell + ret = TestTabletHelper::create_tablet(ls_handle, tablet_id, schema, allocator_, ObTabletStatus::Status::DELETED); + ASSERT_EQ(OB_SUCCESS, ret); + ret = ls_tablet_svr->update_tablet_to_empty_shell(tablet_id); + ASSERT_EQ(OB_SUCCESS, ret); + ObTabletMapKey key(ls_id, tablet_id); + ObTabletHandle tablet_handle; + ret = ls_tablet_svr->get_tablet(tablet_id, tablet_handle, 0, ObMDSGetTabletMode::READ_WITHOUT_CHECK); + tablet = tablet_handle.get_obj(); + + // check increasing macro ref cnt for empty shell tablet with file addr + ASSERT_EQ(false, tablet->hold_ref_cnt_); + ASSERT_EQ(OB_SUCCESS, tablet->inc_macro_ref_cnt()); + ASSERT_EQ(true, tablet->hold_ref_cnt_); + + // check increasing macro ref cnt for empty shell tablet with file addr + MacroBlockId macro_id; + int64_t offset; + int64_t size; + ObBlockManager::BlockInfo block_info; + int64_t ref_cnt = 0; + ObTabletHandle new_tablet_handle; + ASSERT_EQ(OB_SUCCESS, ObTabletPersister::persist_and_transform_tablet(*tablet, new_tablet_handle)); + ObTablet *new_tablet = new_tablet_handle.get_obj(); + ASSERT_EQ(OB_SUCCESS, new_tablet->tablet_addr_.get_block_addr(macro_id, offset, size)); + { + ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); + ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); + } + ref_cnt = block_info.ref_cnt_; + + ASSERT_EQ(OB_SUCCESS, new_tablet->inc_macro_ref_cnt()); + { + ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); + ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); + } + ASSERT_EQ(ref_cnt + 1, block_info.ref_cnt_); + + new_tablet->dec_macro_ref_cnt(); + { + ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); + ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); + } + ASSERT_EQ(ref_cnt, block_info.ref_cnt_); +} + +TEST_F(TestBlockIdList, test_linked_block_ref_cnt) +{ + ObMacroBlockHandle tmp_handle; + ObSharedBlockReaderWriter &shared_rw = MTL(ObTenantCheckpointSlogHandler*)->get_shared_block_reader_writer(); + ASSERT_EQ(OB_SUCCESS, shared_rw.switch_block(tmp_handle)); + common::ObArenaAllocator arena_allocator("unittest"); + ObSharedBlocksWriteCtx write_ctx; + static const int64_t BLOCK_CNT = 10; + + // write linked blocks and wait + char *buffer = static_cast(arena_allocator.alloc(4096)); + ObSharedBlockWriteInfo write_info; + ObSharedBlockLinkHandle write_handle; + write_info.buffer_ = buffer; + write_info.offset_ = 0; + write_info.size_ = 4096; + write_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_COMPACT_WRITE); + for (int64_t i = 0; i < BLOCK_CNT; i++) { + ASSERT_EQ(OB_SUCCESS, shared_rw.async_link_write(write_info, write_handle)); + } + ASSERT_EQ(OB_SUCCESS, write_handle.get_write_ctx(write_ctx)); + + // increase macro blocks' ref cnt and check + bool inc_success = false; + ObBlockManager::BlockInfo block_info; + MacroBlockId macro_id = write_ctx.addr_.block_id(); + { + ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); + ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); + } + int64_t ref_cnt = block_info.ref_cnt_; + + ASSERT_EQ(OB_SUCCESS, ObTablet::inc_linked_block_ref_cnt(write_ctx.addr_, inc_success)); + { + ObBucketHashWLockGuard lock_guard(OB_SERVER_BLOCK_MGR.bucket_lock_, macro_id.hash()); + ASSERT_EQ(OB_SUCCESS, OB_SERVER_BLOCK_MGR.block_map_.get(macro_id, block_info)); + } + ASSERT_EQ(ref_cnt + BLOCK_CNT, block_info.ref_cnt_); +} + +} // storage +} // oceanbase + +int main(int argc, char **argv) +{ + system("rm -f test_tablet_block_id_list.log*"); + OB_LOGGER.set_file_name("test_tablet_block_id_list.log", true, true); + oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); + oceanbase::common::ObClusterVersion::get_instance().init(CLUSTER_VERSION_4_1_0_0); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/mittest/mtlenv/storage/test_tenant_meta_mem_mgr.cpp b/mittest/mtlenv/storage/test_tenant_meta_mem_mgr.cpp index f77aa34f27..eaa8e82144 100644 --- a/mittest/mtlenv/storage/test_tenant_meta_mem_mgr.cpp +++ b/mittest/mtlenv/storage/test_tenant_meta_mem_mgr.cpp @@ -30,6 +30,7 @@ #include "storage/tablet/ob_tablet_status.h" #include "mtlenv/mock_tenant_module_env.h" #include "storage/test_dml_common.h" +#include "storage/slog_ckpt/ob_linked_macro_block_writer.h" namespace oceanbase { @@ -675,8 +676,7 @@ TEST_F(TestTenantMetaMemMgr, test_wash_tablet) ASSERT_TRUE(tablet->pointer_hdl_.is_valid()); ObSSTable sstable; - common::ObSEArray tablet_meta_write_ctxs; - common::ObSEArray sstable_meta_write_ctxs; + common::ObSEArray total_write_ctxs; checkpoint::ObCheckpointExecutor ckpt_executor; checkpoint::ObDataCheckpoint data_checkpoint; ObLS ls; @@ -685,6 +685,9 @@ TEST_F(TestTenantMetaMemMgr, test_wash_tablet) ObLSTabletService ls_tablet_svr; MockObLogHandler log_handler; ObFreezer freezer; + ObTabletSpaceUsage space_usage; + ObTabletMacroInfo tablet_macro_info; + ObLinkedMacroBlockItemWriter linked_writer; ObArenaAllocator schema_allocator; ObCreateTabletSchema create_tablet_schema; prepare_data_schema(schema_allocator, create_tablet_schema); @@ -701,12 +704,13 @@ TEST_F(TestTenantMetaMemMgr, test_wash_tablet) create_scn, create_scn.get_val_for_tx(), create_tablet_schema, true, &freezer); ASSERT_EQ(common::OB_SUCCESS, ret); ASSERT_EQ(1, tablet->get_ref()); + ObTabletPersister persister; ObTabletHandle new_handle; ASSERT_EQ(common::OB_SUCCESS, t3m_.acquire_tablet_from_pool(ObTabletPoolType::TP_NORMAL, WashTabletPriority::WTP_HIGH, key, new_handle)); - ASSERT_EQ(common::OB_SUCCESS, ObTabletPersister::persist_and_fill_tablet( - *tablet, allocator_, tablet_meta_write_ctxs, sstable_meta_write_ctxs, new_handle)); - ASSERT_EQ(common::OB_SUCCESS, ObTabletPersister::persist_4k_tablet(allocator_, new_handle)); + ASSERT_EQ(common::OB_SUCCESS, persister.persist_and_fill_tablet( + *tablet, linked_writer, total_write_ctxs, new_handle, space_usage, tablet_macro_info)); + ASSERT_EQ(common::OB_SUCCESS, persister.persist_aggregated_meta(tablet_macro_info, new_handle, space_usage)); ObMetaDiskAddr addr = new_handle.get_obj()->get_tablet_addr(); ret = t3m_.compare_and_swap_tablet(key, new_handle, new_handle); @@ -765,8 +769,7 @@ TEST_F(TestTenantMetaMemMgr, test_wash_inner_tablet) ASSERT_TRUE(tablet->pointer_hdl_.is_valid()); ObSSTable sstable; - common::ObSEArray tablet_meta_write_ctxs; - common::ObSEArray sstable_meta_write_ctxs; + common::ObSEArray total_write_ctxs; checkpoint::ObCheckpointExecutor ckpt_executor; checkpoint::ObDataCheckpoint data_checkpoint; ObLS ls; @@ -775,6 +778,9 @@ TEST_F(TestTenantMetaMemMgr, test_wash_inner_tablet) ObLSTabletService ls_tablet_svr; MockObLogHandler log_handler; ObFreezer freezer; + ObTabletSpaceUsage space_usage; + ObTabletMacroInfo tablet_macro_info; + ObLinkedMacroBlockItemWriter linked_writer; ObArenaAllocator schema_allocator; ObCreateTabletSchema create_tablet_schema; prepare_data_schema(schema_allocator, create_tablet_schema); @@ -794,10 +800,11 @@ TEST_F(TestTenantMetaMemMgr, test_wash_inner_tablet) ASSERT_EQ(1, tablet->get_ref()); ObTabletHandle new_handle; + ObTabletPersister persister; ASSERT_EQ(common::OB_SUCCESS, t3m_.acquire_tablet_from_pool(ObTabletPoolType::TP_NORMAL, WashTabletPriority::WTP_HIGH, key, new_handle)); - ASSERT_EQ(common::OB_SUCCESS, ObTabletPersister::persist_and_fill_tablet( - *tablet, allocator_, tablet_meta_write_ctxs, sstable_meta_write_ctxs, new_handle)); - ASSERT_EQ(common::OB_SUCCESS, ObTabletPersister::persist_4k_tablet(allocator_, new_handle)); + ASSERT_EQ(common::OB_SUCCESS, persister.persist_and_fill_tablet( + *tablet, linked_writer, total_write_ctxs, new_handle, space_usage, tablet_macro_info)); + ASSERT_EQ(common::OB_SUCCESS, persister.persist_aggregated_meta(tablet_macro_info, new_handle, space_usage)); ObMetaDiskAddr addr = new_handle.get_obj()->get_tablet_addr(); @@ -868,8 +875,7 @@ TEST_F(TestTenantMetaMemMgr, test_wash_no_sstable_tablet) ASSERT_TRUE(nullptr != tablet); ASSERT_TRUE(tablet->pointer_hdl_.is_valid()); - common::ObSEArray tablet_meta_write_ctxs; - common::ObSEArray sstable_meta_write_ctxs; + common::ObSEArray total_write_ctxs; checkpoint::ObCheckpointExecutor ckpt_executor; checkpoint::ObDataCheckpoint data_checkpoint; ObLS ls; @@ -878,6 +884,9 @@ TEST_F(TestTenantMetaMemMgr, test_wash_no_sstable_tablet) ObLSTabletService ls_tablet_svr; MockObLogHandler log_handler; ObFreezer freezer; + ObTabletSpaceUsage space_usage; + ObTabletMacroInfo tablet_macro_info; + ObLinkedMacroBlockItemWriter linked_writer; ObArenaAllocator schema_allocator; ObCreateTabletSchema create_tablet_schema; prepare_data_schema(schema_allocator, create_tablet_schema); @@ -897,10 +906,11 @@ TEST_F(TestTenantMetaMemMgr, test_wash_no_sstable_tablet) ASSERT_EQ(1, tablet->get_ref()); ObTabletHandle new_handle; + ObTabletPersister persister; ASSERT_EQ(common::OB_SUCCESS, t3m_.acquire_tablet_from_pool(ObTabletPoolType::TP_NORMAL, WashTabletPriority::WTP_HIGH, key, new_handle)); - ASSERT_EQ(common::OB_SUCCESS, ObTabletPersister::persist_and_fill_tablet( - *tablet, allocator_, tablet_meta_write_ctxs, sstable_meta_write_ctxs, new_handle)); - ASSERT_EQ(common::OB_SUCCESS, ObTabletPersister::persist_4k_tablet(allocator_, new_handle)); + ASSERT_EQ(common::OB_SUCCESS, persister.persist_and_fill_tablet( + *tablet, linked_writer, total_write_ctxs, new_handle, space_usage, tablet_macro_info)); + ASSERT_EQ(common::OB_SUCCESS, persister.persist_aggregated_meta(tablet_macro_info, new_handle, space_usage)); ret = t3m_.compare_and_swap_tablet(key, new_handle, new_handle); ASSERT_EQ(common::OB_SUCCESS, ret); @@ -951,8 +961,7 @@ TEST_F(TestTenantMetaMemMgr, test_get_tablet_with_allocator) ASSERT_TRUE(tablet->pointer_hdl_.is_valid()); ObSSTable sstable; - common::ObSEArray tablet_meta_write_ctxs; - common::ObSEArray sstable_meta_write_ctxs; + common::ObSEArray total_write_ctxs; checkpoint::ObCheckpointExecutor ckpt_executor; checkpoint::ObDataCheckpoint data_checkpoint; ObLS ls; @@ -961,6 +970,9 @@ TEST_F(TestTenantMetaMemMgr, test_get_tablet_with_allocator) ObLSTabletService ls_tablet_svr; MockObLogHandler log_handler; ObFreezer freezer; + ObTabletSpaceUsage space_usage; + ObTabletMacroInfo tablet_macro_info; + ObLinkedMacroBlockItemWriter linked_writer; ObArenaAllocator schema_allocator; ObCreateTabletSchema create_tablet_schema; prepare_data_schema(schema_allocator, create_tablet_schema); @@ -986,10 +998,11 @@ TEST_F(TestTenantMetaMemMgr, test_get_tablet_with_allocator) ASSERT_EQ(1, tablet->get_ref()); ObTabletHandle new_handle; + ObTabletPersister persister; ASSERT_EQ(common::OB_SUCCESS, t3m_.acquire_tablet_from_pool(ObTabletPoolType::TP_NORMAL, WashTabletPriority::WTP_HIGH, key, new_handle)); - ASSERT_EQ(common::OB_SUCCESS, ObTabletPersister::persist_and_fill_tablet( - *tablet, allocator_, tablet_meta_write_ctxs, sstable_meta_write_ctxs, new_handle)); - ASSERT_EQ(common::OB_SUCCESS, ObTabletPersister::persist_4k_tablet(allocator_, new_handle)); + ASSERT_EQ(common::OB_SUCCESS, persister.persist_and_fill_tablet( + *tablet, linked_writer, total_write_ctxs, new_handle, space_usage, tablet_macro_info)); + ASSERT_EQ(common::OB_SUCCESS, persister.persist_aggregated_meta(tablet_macro_info, new_handle, space_usage)); ret = t3m_.compare_and_swap_tablet(key, new_handle, new_handle); tablet = new_handle.get_obj(); diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index 8f2c9d10e4..1ab11feaa4 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -162,6 +162,7 @@ ob_set_subtarget(ob_storage ckpt slog_ckpt/ob_tenant_checkpoint_slog_handler.cpp slog_ckpt/ob_tenant_storage_checkpoint_reader.cpp slog_ckpt/ob_tenant_storage_checkpoint_writer.cpp + slog_ckpt/ob_tablet_replay_create_handler.cpp ) ob_set_subtarget(ob_storage high_availability @@ -266,7 +267,11 @@ ob_set_subtarget(ob_storage tablet tablet/ob_tablet_persister.cpp tablet/ob_tablet_obj_load_helper.cpp tablet/ob_tablet.cpp + tablet/ob_tablet_block_header.cpp tablet/ob_tablet_medium_info_reader.cpp + tablet/ob_tablet_space_usage.cpp + tablet/ob_tablet_block_aggregated_info.cpp + tablet/ob_tablet_macro_info_iterator.cpp ) ob_set_subtarget(ob_storage tx_wrs diff --git a/src/storage/blocksstable/index_block/ob_index_block_builder.cpp b/src/storage/blocksstable/index_block/ob_index_block_builder.cpp index d0a9809672..0f4a7746e8 100644 --- a/src/storage/blocksstable/index_block/ob_index_block_builder.cpp +++ b/src/storage/blocksstable/index_block/ob_index_block_builder.cpp @@ -1388,7 +1388,8 @@ int ObBaseIndexBlockBuilder::close(ObIAllocator &allocator, ObIndexTreeInfo &tre root_builder->block_to_row_desc(micro_block_desc, root_row_desc); if (OB_FAIL(root_addr.set_block_addr(root_row_desc.macro_id_, root_row_desc.block_offset_, - root_row_desc.block_size_))) { + root_row_desc.block_size_, + ObMetaDiskAddr::DiskType::BLOCK))) { STORAGE_LOG(WARN, "fail to set block address", K(ret), K(root_row_desc)); } } diff --git a/src/storage/blocksstable/ob_block_manager.cpp b/src/storage/blocksstable/ob_block_manager.cpp index 4c903cbdcf..0d5f3e7158 100644 --- a/src/storage/blocksstable/ob_block_manager.cpp +++ b/src/storage/blocksstable/ob_block_manager.cpp @@ -1096,10 +1096,16 @@ int ObBlockManager::mark_tenant_blocks( } else { LOG_WARN("fail to get next in-memory tablet", K(ret)); } - } else if (OB_FAIL(mark_tablet_meta_blocks(mark_info, handle, macro_id_set, tmp_status))) { - LOG_WARN("fail to mark tablet meta blocks", K(ret)); - } else if (OB_FAIL(mark_sstable_blocks(mark_info, handle, macro_id_set, tmp_status))) { - LOG_WARN("fail to mark tablet blocks", K(ret)); + } else if (handle.get_obj()->is_old_tablet()) { + if (OB_FAIL(mark_tablet_meta_blocks(mark_info, handle, macro_id_set, tmp_status))) { + LOG_WARN("fail to mark tablet meta blocks", K(ret)); + } else if (OB_FAIL(mark_sstable_blocks(mark_info, handle, macro_id_set, tmp_status))) { + LOG_WARN("fail to mark tablet blocks", K(ret)); + } + } else { + if (OB_FAIL(mark_tablet_block(mark_info, handle, macro_id_set, tmp_status))) { + LOG_WARN("fail to mark tablet's macro blocks", K(ret), K(tmp_status), KPC(handle.get_obj())); + } } } } @@ -1237,7 +1243,7 @@ int ObBlockManager::mark_tablet_meta_blocks( int ret = OB_SUCCESS; const ObTablet *tablet = handle.get_obj(); ObSArray meta_ids; - if (OB_FAIL(tablet->get_tablet_meta_ids(meta_ids))) { + if (OB_FAIL(tablet->get_tablet_first_second_level_meta_ids(meta_ids))) { LOG_WARN("fail to get tablet meta block ids", K(ret), KPC(tablet)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < meta_ids.count(); i++) { @@ -1288,6 +1294,116 @@ int ObBlockManager::mark_sstable_meta_block( return ret; } +int ObBlockManager::mark_tablet_block( + MacroBlkIdMap &mark_info, + storage::ObTabletHandle &handle, + common::hash::ObHashSet ¯o_id_set, + ObMacroBlockMarkerStatus &tmp_status) +{ + int ret = OB_SUCCESS; + ObSArray meta_block_arr; + ObSArray data_block_arr; + ObSArray shared_meta_block_arr; + ObSArray shared_data_block_arr; + if (OB_FAIL(handle.get_obj()->get_all_macro_ids( + meta_block_arr, + data_block_arr, + shared_meta_block_arr, + shared_data_block_arr))) { + LOG_WARN("fail to get tablet's macro info", K(ret)); + } else if (OB_FAIL(do_mark_tablet_block( + meta_block_arr, + ObTabletMacroType::META_BLOCK, + mark_info, + macro_id_set, + tmp_status))) { + LOG_WARN("fail to mark meta block ids", K(ret), K(meta_block_arr)); + } else if (OB_FAIL(do_mark_tablet_block( + data_block_arr, + ObTabletMacroType::DATA_BLOCK, + mark_info, + macro_id_set, + tmp_status))) { + LOG_WARN("fail to mark meta block ids", K(ret), K(data_block_arr)); + } else if (OB_FAIL(do_mark_tablet_block( + shared_meta_block_arr, + ObTabletMacroType::SHARED_META_BLOCK, + mark_info, + macro_id_set, + tmp_status))) { + LOG_WARN("fail to mark meta block ids", K(ret), K(shared_meta_block_arr)); + } else if (OB_FAIL(do_mark_tablet_block( + shared_data_block_arr, + ObTabletMacroType::SHARED_DATA_BLOCK, + mark_info, + macro_id_set, + tmp_status))) { + LOG_WARN("fail to mark meta block ids", K(ret), K(shared_data_block_arr)); + } else { + const ObMetaDiskAddr &addr = handle.get_obj()->get_tablet_addr(); + if (addr.is_block()) { + const MacroBlockId ¯o_id = addr.block_id(); + if (OB_FAIL(update_mark_info(macro_id, mark_info))) { + LOG_WARN("fail to update mark info", K(ret), K(macro_id)); + } else if (OB_FAIL(macro_id_set.set_refactored(macro_id, 0 /* not overwrite */))) { + if (OB_HASH_EXIST != ret) { + LOG_WARN("fail to put macro id into set", K(ret), K(macro_id)); + } else { + ret = OB_SUCCESS; + } + } else { + tmp_status.hold_count_--; + tmp_status.shared_meta_block_count_++; + } + } + } + return ret; +} + +int ObBlockManager::do_mark_tablet_block( + const ObIArray &id_arr, + const ObTabletMacroType block_type, + MacroBlkIdMap &mark_info, + common::hash::ObHashSet ¯o_id_set, + ObMacroBlockMarkerStatus &tmp_status) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < id_arr.count(); i++) { + const MacroBlockId ¯o_id = id_arr.at(i); + if (OB_FAIL(update_mark_info(macro_id, mark_info))) { + LOG_WARN("fail to update mark info", K(ret), K(macro_id)); + } else if (OB_FAIL(macro_id_set.set_refactored(macro_id, 0 /* not overwrite */))) { + if (OB_HASH_EXIST != ret) { + LOG_WARN("fail to put macro id into set", K(ret), K(macro_id)); + } else { + ret = OB_SUCCESS; + } + } else { + switch (block_type) { + case ObTabletMacroType::META_BLOCK: + tmp_status.index_block_count_++; + break; + case ObTabletMacroType::DATA_BLOCK: + tmp_status.data_block_count_++; + break; + case ObTabletMacroType::SHARED_META_BLOCK: + tmp_status.shared_meta_block_count_++; + break; + case ObTabletMacroType::SHARED_DATA_BLOCK: + tmp_status.shared_data_block_count_++; + break; + default: + ret = OB_INVALID_ARGUMENT; + LOG_WARN("block type is invalid", K(ret), K(block_type)); + } + if (OB_SUCC(ret)) { + tmp_status.hold_count_--; + } + } + } + return ret; +} + int ObBlockManager::mark_tenant_ckpt_blocks( MacroBlkIdMap &mark_info, common::hash::ObHashSet ¯o_id_set, diff --git a/src/storage/blocksstable/ob_block_manager.h b/src/storage/blocksstable/ob_block_manager.h index e91ef12564..9c8c98ab2d 100644 --- a/src/storage/blocksstable/ob_block_manager.h +++ b/src/storage/blocksstable/ob_block_manager.h @@ -24,6 +24,7 @@ #include "storage/blocksstable/ob_macro_block_checker.h" #include "storage/blocksstable/ob_super_block_buffer_holder.h" #include "storage/ob_super_block_struct.h" +#include "storage/tablet/ob_tablet_block_aggregated_info.h" namespace oceanbase { @@ -348,6 +349,17 @@ private: MacroBlkIdMap &mark_info, common::hash::ObHashSet ¯o_id_set, ObMacroBlockMarkerStatus &tmp_status); + int mark_tablet_block( + MacroBlkIdMap &mark_info, + storage::ObTabletHandle &handle, + common::hash::ObHashSet ¯o_id_set, + ObMacroBlockMarkerStatus &tmp_status); + int do_mark_tablet_block( + const ObIArray &id_arr, + const ObTabletMacroType block_type, + MacroBlkIdMap &mark_info, + common::hash::ObHashSet ¯o_id_set, + ObMacroBlockMarkerStatus &tmp_status); int mark_sstable_blocks( MacroBlkIdMap &mark_info, storage::ObTabletHandle &handle, diff --git a/src/storage/blocksstable/ob_shared_macro_block_manager.cpp b/src/storage/blocksstable/ob_shared_macro_block_manager.cpp index ae83bb3dd2..8953a7ad8f 100644 --- a/src/storage/blocksstable/ob_shared_macro_block_manager.cpp +++ b/src/storage/blocksstable/ob_shared_macro_block_manager.cpp @@ -375,8 +375,7 @@ int ObSharedMacroBlockMgr::free_block(const MacroBlockId &block_id, const int64_ { int ret = OB_SUCCESS; int32_t curr_size = 0; - if (OB_UNLIKELY(!block_id.is_valid() || block_size <= 0 - || block_size >= SMALL_SSTABLE_STHRESHOLD_SIZE)) { + if (OB_UNLIKELY(!block_id.is_valid() || block_size <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid block size or id", K(ret), K(block_id), K(block_size)); } else { diff --git a/src/storage/blockstore/ob_shared_block_reader_writer.cpp b/src/storage/blockstore/ob_shared_block_reader_writer.cpp index 733db51f8e..79cf0c9929 100644 --- a/src/storage/blockstore/ob_shared_block_reader_writer.cpp +++ b/src/storage/blockstore/ob_shared_block_reader_writer.cpp @@ -317,37 +317,32 @@ int ObSharedBlockReadHandle::get_data(ObIAllocator &allocator, char *&buf, int64 int ret = OB_SUCCESS; if (OB_FAIL(wait())) { LOG_WARN("Fail to wait io finish", K(ret)); + } else if (OB_UNLIKELY(!addr_.is_block())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected addr type", K(ret), K(addr_)); } else { const char *data_buf = macro_handle_.get_buffer(); const int64_t data_size = macro_handle_.get_data_size(); int64_t header_size = 0; - if (allocator_ == &allocator && OB_FAIL(parse_data(data_buf, data_size, buf, buf_len))) { // shallow copy - LOG_WARN("fail to parse data", K(ret)); - } else if (OB_FAIL(verify_checksum(data_buf, data_size, header_size, buf_len))) { - LOG_WARN("fail to verify checksum", K(ret), KP(data_buf), K(data_size), K(header_size), K(buf_len)); + if (!addr_.is_raw_block()) { + if (OB_FAIL(verify_checksum(data_buf, data_size, header_size, buf_len))) { + LOG_WARN("fail to verify checksum", K(ret), KP(data_buf), K(data_size), K(header_size), K(buf_len)); + } + } else { // is raw block + buf_len = data_size; + } + + if (OB_FAIL(ret)) { + } else if (allocator_ == &allocator) { // allocator is same, use shallow copy + buf = const_cast(data_buf) + header_size; } else if (OB_ISNULL(buf = static_cast(allocator.alloc(buf_len)))) { ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to alloc buf", K(ret), K(buf_len)); + LOG_WARN("fail to alloc buf", K(ret),K(buf_len)); } else { MEMCPY(buf, data_buf + header_size, buf_len); } } - return ret; -} -int ObSharedBlockReadHandle::parse_data( - const char *data_buf, - const int64_t data_size, - char *&buf, - int64_t &buf_len) -{ - int ret = OB_SUCCESS; - int64_t header_size = 0; - if (OB_FAIL(verify_checksum(data_buf, data_size, header_size, buf_len))) { - LOG_WARN("fail to verify checksum", K(ret), KP(data_buf), K(data_size), K(header_size), K(buf_len)); - } else { - buf = const_cast(data_buf) + header_size; - } return ret; } @@ -382,13 +377,15 @@ int ObSharedBlockReadHandle::verify_checksum( return ret; } -int ObSharedBlockReadHandle::set_macro_handle(const ObMacroBlockHandle ¯o_handle) +int ObSharedBlockReadHandle::set_addr_and_macro_handle( + const ObMetaDiskAddr &addr, const ObMacroBlockHandle ¯o_handle) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!macro_handle.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", K(ret), K(macro_handle)); } else { + addr_ = addr; macro_handle_ = macro_handle; } return ret; @@ -560,6 +557,66 @@ int ObSharedBlockLinkIter::read_next_block(ObSharedBlockReadHandle &block_handle return ret; } +//=================================== ObSharedBlockIOCallback ============================= +ObSharedBlockIOCallback::~ObSharedBlockIOCallback() +{ + if (nullptr != io_allocator_ && NULL != data_buf_) { + io_allocator_->free(data_buf_); + } + io_allocator_ = nullptr; + data_buf_ = nullptr; +} + +int ObSharedBlockIOCallback::alloc_data_buf(const char *io_data_buffer, const int64_t data_size) +{ + int ret = alloc_and_copy_data(io_data_buffer, data_size, io_allocator_, data_buf_); + return ret; +} + +int ObSharedBlockIOCallback::inner_process(const char *data_buffer, const int64_t size) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(size <= 0 || data_buffer == nullptr)) { + ret = OB_INVALID_DATA; + LOG_WARN("invalid data buffer size", K(ret), K(size), KP(data_buffer)); + } else if (OB_UNLIKELY(!is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected addr type", K(ret), K(addr_)); + } else if (OB_FAIL(alloc_and_copy_data(data_buffer, size, io_allocator_, data_buf_))) { + LOG_WARN("Fail to allocate memory, ", K(ret), K(size)); + } else { + const char *raw_buf = nullptr; // buf without shared block header + int64_t raw_buf_len = 0; + int64_t header_size = 0; + if (!addr_.is_raw_block()) { + if (OB_FAIL(ObSharedBlockReadHandle::verify_checksum(data_buf_, size, header_size, raw_buf_len))) { + LOG_WARN("fail to verify checksum", K(ret), KP(data_buffer), K(size), K(header_size)); + } else { + raw_buf = data_buf_ + header_size; + } + } else { // is raw block + raw_buf = data_buf_; + raw_buf_len = size; + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(do_process(raw_buf, raw_buf_len))) { + LOG_WARN("fail to do process", K(ret), KP(raw_buf), K(raw_buf_len)); + } + } + } + + if (OB_FAIL(ret) && NULL != io_allocator_ && NULL != data_buf_) { + io_allocator_->free(data_buf_); + data_buf_ = NULL; + } + return ret; +} + +const char *ObSharedBlockIOCallback::get_data() +{ + return data_buf_; +} //=================================== ObSharedBlockReaderWriter ============================= const MacroBlockId ObSharedBlockHeader::DEFAULT_MACRO_ID(0, MacroBlockId::AUTONOMIC_BLOCK_INDEX, 0); @@ -615,20 +672,32 @@ void ObSharedBlockReaderWriter::reset() need_cross_ = false; is_inited_ = false; } + int ObSharedBlockReaderWriter::async_write( const ObSharedBlockWriteInfo &write_info, ObSharedBlockWriteHandle &block_handle) { int ret = OB_SUCCESS; lib::ObMutexGuard guard(mutex_); - ObSharedBlocksWriteCtx write_ctx; ObSharedBlockWriteArgs write_args; - write_args.need_align_ = need_align_; + ObMetaDiskAddr prev_addr; + prev_addr.set_none_addr(); + write_args.with_header_ = false; + ObSharedBlockHeader header; + if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("Not init", K(ret)); - } else if (OB_FAIL(inner_async_write(write_info, write_args, block_handle, write_ctx))) { - LOG_WARN("Fail to inner async write block", K(ret), K(write_info), K(write_args)); + } else if (OB_UNLIKELY(!write_info.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(write_info)); + } else if (OB_FAIL(inner_write_block( + header, + write_info.buffer_, + write_info.size_, + write_args, + block_handle))) { + LOG_WARN("fail to write block", K(ret), K(write_info), K(write_args)); } return ret; } @@ -764,7 +833,7 @@ int ObSharedBlockReaderWriter::switch_block(ObMacroBlockHandle ¯o_handle) } int ObSharedBlockReaderWriter::calc_store_size( - const ObSharedBlockHeader &header, + const int64_t total_size, const bool need_align, int64_t &store_size, int64_t &align_store_size) @@ -772,7 +841,7 @@ int ObSharedBlockReaderWriter::calc_store_size( int ret = OB_SUCCESS; store_size = 0; align_store_size = 0; - store_size = header.header_size_ + header.data_size_; + store_size = total_size; const int64_t next_align_offset = upper_align(offset_ + store_size, write_align_size_); align_store_size = next_align_offset - align_offset_; if (need_align) { @@ -780,7 +849,7 @@ int ObSharedBlockReaderWriter::calc_store_size( } if (OB_UNLIKELY(store_size > DEFAULT_MACRO_BLOCK_SIZE)) { ret = OB_NOT_SUPPORTED; - LOG_WARN("Not supported block size", K(ret), K(header), K_(offset), K_(align_offset), K(store_size)); + LOG_WARN("Not supported block size", K(ret), K_(offset), K_(align_offset), K(store_size)); } return ret; } @@ -788,17 +857,18 @@ int ObSharedBlockReaderWriter::calc_store_size( int ObSharedBlockReaderWriter::inner_write_block( const ObSharedBlockHeader &header, const char *buf, - const int64_t &size, - ObSharedBlockBaseHandle &block_handle, - const bool need_flush, - const bool need_align) + const int64_t size, + const ObSharedBlockWriteArgs &write_args, + ObSharedBlockBaseHandle &block_handle) { int ret = OB_SUCCESS; ObMacroBlockHandle macro_handle; ObMetaDiskAddr addr; - const int64_t blk_size = header.header_size_ + header.data_size_; + const int64_t blk_size = write_args.with_header_ ? header.header_size_ + header.data_size_ : size; int64_t store_size = 0, align_store_size = 0; - if (OB_FAIL(calc_store_size(header, need_align, store_size, align_store_size))) { + bool need_align = write_args.need_align_; + bool need_flush = write_args.need_flush_; + if (OB_FAIL(calc_store_size(blk_size, need_align, store_size, align_store_size))) { LOG_WARN("fail to calc store size", K(ret)); } else if (!macro_handle_.get_macro_id().is_valid() && OB_FAIL(OB_SERVER_BLOCK_MGR.alloc_block(macro_handle_))) { @@ -808,7 +878,7 @@ int ObSharedBlockReaderWriter::inner_write_block( LOG_WARN("Fail to switch new block", K(ret)); } else if (macro_handle.is_valid() && OB_FAIL(block_handle.add_macro_handle(macro_handle))) { LOG_WARN("Fail to flush last macro block", K(ret), K(macro_handle)); - } else if (OB_FAIL(calc_store_size(header, need_align, store_size, align_store_size))) { + } else if (OB_FAIL(calc_store_size(blk_size, need_align, store_size, align_store_size))) { LOG_WARN("fail to calc store size", K(ret)); } } @@ -820,7 +890,7 @@ int ObSharedBlockReaderWriter::inner_write_block( const int64_t prev_offset = offset_; const int64_t prev_align_offset = align_offset_; const bool prev_hanging = hanging_; - if (OB_FAIL(header.serialize(data_.current(), header.header_size_, pos))) { + if (write_args.with_header_ && OB_FAIL(header.serialize(data_.current(), header.header_size_, pos))) { LOG_WARN("Fail to serialize header", K(ret), K(header)); } else { MEMCPY(data_.current() + pos, buf, size); @@ -833,7 +903,8 @@ int ObSharedBlockReaderWriter::inner_write_block( // io_callback if (OB_FAIL(addr.set_block_addr(macro_handle_.get_macro_id(), offset_, - blk_size))) { + blk_size, + write_args.with_header_ ? ObMetaDiskAddr::DiskType::BLOCK : ObMetaDiskAddr::DiskType::RAW_BLOCK))) { LOG_WARN("Fail to set block addr", K(ret)); } else if (OB_FAIL(block_handle.add_meta_addr(addr))) { LOG_WARN("Fail to add meta addr", K(ret), K(addr)); @@ -863,7 +934,7 @@ int ObSharedBlockReaderWriter::inner_write_block( if (OB_FAIL(ret)) { int tmp_ret = OB_SUCCESS; if (OB_TMP_FAIL(data_.set_pos(prev_pos))) { - LOG_ERROR("fail to roll back data buffer", K(ret), K(tmp_ret), K(prev_pos), K(header)); + LOG_ERROR("fail to roll back data buffer", K(ret), K(tmp_ret), K(prev_pos), K(header), K(write_args)); ob_usleep(1000 * 1000); ob_abort(); } else { @@ -907,8 +978,7 @@ int ObSharedBlockReaderWriter::write_block( header.checksum_ = ob_crc64_sse42(write_info.buffer_, write_info.size_); header.next_macro_id_ = ObSharedBlockHeader::DEFAULT_MACRO_ID; header.prev_addr_ = prev_addr; - if (OB_FAIL(inner_write_block(header, write_info.buffer_, write_info.size_, - block_handle, write_args.need_flush_, write_args.need_align_))) { + if (OB_FAIL(inner_write_block(header, write_info.buffer_, write_info.size_, write_args, block_handle))) { LOG_WARN("Fail to write block", K(ret), K(write_info), K(write_args)); } else { const int64_t cnt = block_handle.addrs_.count(); @@ -954,23 +1024,48 @@ int ObSharedBlockReaderWriter::async_read( macro_read_info.offset_, macro_read_info.size_))) { LOG_WARN("Fail to get block addr", K(ret), K(read_info)); + } else if (nullptr == read_info.io_callback_ + && OB_FAIL(block_handle.alloc_io_buf(macro_read_info.buf_, macro_read_info.size_))) { + LOG_WARN("Fail to alloc io buf", K(ret), K(macro_read_info)); + } else if (OB_FAIL(macro_handle.async_read(macro_read_info))) { + LOG_WARN("Fail to async read block", K(ret), K(macro_read_info)); + } else if (OB_FAIL(block_handle.set_addr_and_macro_handle(read_info.addr_, macro_handle))) { + LOG_WARN("Fail to add macro handle", K(ret), K(macro_read_info)); + } + return ret; +} + +int ObSharedBlockReaderWriter::parse_data_from_macro_block( + ObMacroBlockHandle ¯o_handle, + const ObMetaDiskAddr addr, + char *&buf, int64_t &buf_len) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!macro_handle.is_valid() || !addr.is_valid() || !addr.is_block())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(macro_handle), K(addr)); } else { - if (nullptr == read_info.io_callback_ - && OB_FAIL(block_handle.alloc_io_buf(macro_read_info.buf_, macro_read_info.size_))) { - LOG_WARN("Fail to alloc io buf", K(ret), K(macro_read_info)); - } else { - macro_read_info.io_callback_ = read_info.io_callback_; - } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(macro_handle.async_read(macro_read_info))) { - LOG_WARN("Fail to async read block", K(ret), K(macro_read_info)); - } else if (OB_FAIL(block_handle.set_macro_handle(macro_handle))) { - LOG_WARN("Fail to add macro handle", K(ret), K(macro_read_info)); + const char *block_buf = macro_handle.get_buffer(); + const int64_t block_buf_len = macro_handle.get_data_size(); + if (OB_UNLIKELY(addr.offset() + addr.size() > block_buf_len)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("block_buf is not enough", K(ret), K(addr), K(block_buf_len)); + } else if (!addr.is_raw_block()) { + int64_t header_size = 0; + if (OB_FAIL(ObSharedBlockReadHandle::verify_checksum(block_buf + addr.offset(), addr.size(), header_size, buf_len))) { + LOG_WARN("fail to verify checksum", K(ret), K(addr)); + } else { + buf = const_cast(block_buf) + addr.offset() + header_size; + } + } else { // is raw block + buf = const_cast(block_buf) + addr.offset(); + buf_len = addr.size(); } } return ret; } + } // end namespace storage } // end namespace oceanbase diff --git a/src/storage/blockstore/ob_shared_block_reader_writer.h b/src/storage/blockstore/ob_shared_block_reader_writer.h index d271ab6054..c5757f841a 100644 --- a/src/storage/blockstore/ob_shared_block_reader_writer.h +++ b/src/storage/blockstore/ob_shared_block_reader_writer.h @@ -22,6 +22,7 @@ namespace oceanbase { namespace storage { +class ObSharedBlockIOCallback; struct ObSharedBlockWriteInfo final { public: @@ -36,7 +37,7 @@ public: int64_t offset_; int64_t size_; common::ObIOFlag io_desc_; - common::ObIOCallback *io_callback_; + ObSharedBlockIOCallback *io_callback_; }; struct ObSharedBlockReadInfo final @@ -51,7 +52,7 @@ public: public: ObMetaDiskAddr addr_; common::ObIOFlag io_desc_; - common::ObIOCallback *io_callback_; + ObSharedBlockIOCallback *io_callback_; int64_t io_timeout_ms_; DISALLOW_COPY_AND_ASSIGN(ObSharedBlockReadInfo); }; @@ -127,11 +128,37 @@ protected: DISALLOW_COPY_AND_ASSIGN(ObSharedBlockBaseHandle); }; +class ObSharedBlockIOCallback : public common::ObIOCallback +{ +public: + ObSharedBlockIOCallback(common::ObIAllocator *io_allocator, const ObMetaDiskAddr addr) + : io_allocator_(io_allocator), addr_(addr), data_buf_(nullptr) {} + virtual ~ObSharedBlockIOCallback(); + virtual int alloc_data_buf(const char *io_data_buffer, const int64_t data_size) override; + int inner_process(const char *data_buffer, const int64_t size) override; + virtual int do_process(const char *buf, const int64_t buf_len) = 0; + virtual int64_t size() const = 0; + virtual const char *get_data() override; + virtual ObIAllocator *get_allocator() override { return io_allocator_; } + + VIRTUAL_TO_STRING_KV(K_(addr), KP_(io_allocator), KP_(data_buf)); + bool is_valid() const + { + return addr_.is_block() && nullptr != io_allocator_; + } + +private: + ObIAllocator *io_allocator_; + ObMetaDiskAddr addr_; + char *data_buf_; // actual data buffer +}; + class ObSharedBlockReadHandle final { friend class ObSharedBlockReaderWriter; friend class ObSharedBlockLinkIter; + friend class ObSharedBlockIOCallback; public: ObSharedBlockReadHandle(); ObSharedBlockReadHandle(ObIAllocator &allocator); @@ -143,13 +170,7 @@ public: int wait(); int get_data(ObIAllocator &allocator, char *&buf, int64_t &buf_len); void reset(); - TO_STRING_KV(K_(macro_handle)); -public: - static int parse_data( - const char *data_buf, - const int64_t data_size, - char *&buf, - int64_t &buf_len); + TO_STRING_KV(K_(addr), K_(macro_handle)); private: static int verify_checksum( @@ -158,10 +179,12 @@ private: int64_t &header_size, int64_t &buf_len); int alloc_io_buf(char *&buf, const int64_t &buf_size); - int set_macro_handle(const blocksstable::ObMacroBlockHandle ¯o_handle); + int set_addr_and_macro_handle(const ObMetaDiskAddr &addr, const blocksstable::ObMacroBlockHandle ¯o_handle); + private: ObIAllocator *allocator_; blocksstable::ObMacroBlockHandle macro_handle_; + ObMetaDiskAddr addr_; }; class ObSharedBlockWriteHandle final : public ObSharedBlockBaseHandle @@ -239,6 +262,10 @@ public: void reset(); void get_cur_shared_block(blocksstable::MacroBlockId ¯o_id); static int async_read(const ObSharedBlockReadInfo &read_info, ObSharedBlockReadHandle &block_handle); + static int parse_data_from_macro_block( + blocksstable::ObMacroBlockHandle ¯o_handle, + const ObMetaDiskAddr addr, + char *&buf, int64_t &buf_len); int async_write( const ObSharedBlockWriteInfo &write_info, ObSharedBlockWriteHandle &block_handle); @@ -264,17 +291,16 @@ private: const ObSharedBlockWriteArgs &write_args, ObSharedBlockBaseHandle &block_handle); // cross int calc_store_size( - const ObSharedBlockHeader &header, + const int64_t total_size, const bool need_align, int64_t &store_size, int64_t &align_store_size); int inner_write_block( const ObSharedBlockHeader &header, const char *buf, - const int64_t &size, - ObSharedBlockBaseHandle &block_handle, - const bool need_flush = true, - const bool need_align = true); + const int64_t size, + const ObSharedBlockWriteArgs &write_args, + ObSharedBlockBaseHandle &block_handle); int switch_block(blocksstable::ObMacroBlockHandle ¯o_handle); int reserve_header(); private: @@ -282,13 +308,14 @@ struct ObSharedBlockWriteArgs final { public: ObSharedBlockWriteArgs() - : need_flush_(true), need_align_(true), is_linked_(false) + : need_flush_(true), need_align_(true), is_linked_(false), with_header_(true) {} ~ObSharedBlockWriteArgs() = default; - TO_STRING_KV(K_(need_flush), K_(need_align), K_(is_linked)); + TO_STRING_KV(K_(need_flush), K_(need_align), K_(is_linked), K_(with_header)); bool need_flush_; bool need_align_; bool is_linked_; + bool with_header_; }; private: lib::ObMutex mutex_; diff --git a/src/storage/column_store/ob_column_oriented_sstable.cpp b/src/storage/column_store/ob_column_oriented_sstable.cpp index fcc1dbd2fe..424aa2995d 100644 --- a/src/storage/column_store/ob_column_oriented_sstable.cpp +++ b/src/storage/column_store/ob_column_oriented_sstable.cpp @@ -227,45 +227,6 @@ int ObCOSSTableV2::fill_cg_sstables(const common::ObIArray &cg_table return ret; } -int ObCOSSTableV2::inc_macro_ref(bool &inc_success) const -{ - int ret = OB_SUCCESS; - inc_success = false; - bool co_success = false; - bool cg_success = false; - if (OB_FAIL(ObSSTable::inc_macro_ref(co_success))) { - LOG_WARN("fail to increase row store macro blocks' ref cnt", K(ret), K(co_success)); - } else if (is_empty_co_) { // no cg sstable - inc_success = true; - } else if (!valid_for_cs_reading_) { - cg_success = true; - } else if (OB_FAIL(cg_sstables_.inc_macro_ref(cg_success))) { - LOG_WARN("fail to increase ref cnt of cg sstables' macro blocks", K(ret), K(cg_success)); - } - - if (OB_FAIL(ret)) { - if (co_success) { - ObSSTable::dec_macro_ref(); - } - if (cg_success) { - cg_sstables_.dec_macro_ref(); - } - } else { - inc_success = true; - } - return ret; -} - -void ObCOSSTableV2::dec_macro_ref() const -{ - ObSSTable::dec_macro_ref(); - if (is_empty_co_) { - // do nothing - } else if (valid_for_cs_reading_) { - cg_sstables_.dec_macro_ref(); - } -} - int ObCOSSTableV2::build_cs_meta() { int ret = OB_SUCCESS; diff --git a/src/storage/column_store/ob_column_oriented_sstable.h b/src/storage/column_store/ob_column_oriented_sstable.h index 2cdda56414..7acd839f1e 100644 --- a/src/storage/column_store/ob_column_oriented_sstable.h +++ b/src/storage/column_store/ob_column_oriented_sstable.h @@ -107,8 +107,6 @@ public: bool is_empty_co_table() const { return is_empty_co_; } int fill_cg_sstables(const common::ObIArray &cg_tables); - virtual int inc_macro_ref(bool &inc_success) const override; - virtual void dec_macro_ref() const override; OB_INLINE const ObCOSSTableMeta &get_cs_meta() const { return cs_meta_; } OB_INLINE ObSSTableArray &get_cg_sstables() { return cg_sstables_; } OB_INLINE const ObSSTableArray &get_cg_sstables() const { return cg_sstables_; } diff --git a/src/storage/ls/ob_ls_tablet_service.cpp b/src/storage/ls/ob_ls_tablet_service.cpp index 82deac5693..6573a2c6c2 100644 --- a/src/storage/ls/ob_ls_tablet_service.cpp +++ b/src/storage/ls/ob_ls_tablet_service.cpp @@ -1418,6 +1418,51 @@ int ObLSTabletService::update_tablet_release_memtable_for_offline( return ret; } +int ObLSTabletService::update_tablet_ddl_commit_scn( + const common::ObTabletID &tablet_id, + const SCN ddl_commit_scn) +{ + int ret = OB_SUCCESS; + const ObTabletMapKey key(ls_->get_ls_id(), tablet_id); + ObTabletHandle old_handle; + ObTimeGuard time_guard("ObLSTabletService::update_tablet_ddl_commit_scn", 1_s); + ObBucketHashWLockGuard lock_guard(bucket_lock_, tablet_id.hash()); + time_guard.click("Lock"); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret), K_(is_inited)); + } else if (OB_UNLIKELY(!tablet_id.is_valid() || !ddl_commit_scn.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(tablet_id), K(ddl_commit_scn)); + } else if (OB_FAIL(ObTabletCreateDeleteHelper::get_tablet(key, old_handle))) { + LOG_WARN("fail to direct get tablet", K(ret), K(key)); + } else { + time_guard.click("get_tablet"); + ObTenantMetaMemMgr *t3m = MTL(ObTenantMetaMemMgr*); + ObMetaDiskAddr disk_addr; + ObUpdateDDLCommitSCN modifier(ddl_commit_scn); + ObTabletHandle new_handle; + const ObTablet &old_tablet = *old_handle.get_obj(); + if (OB_FAIL(ObTabletPersister::persist_and_transform_only_tablet_meta(old_tablet, modifier, new_handle))) { + LOG_WARN("fail to persist and transform only tablet meta", K(ret), K(old_tablet), K(ddl_commit_scn)); + } else if (FALSE_IT(time_guard.click("Persist"))) { + } else if (FALSE_IT(disk_addr = new_handle.get_obj()->tablet_addr_)) { + } else if (OB_FAIL(ObTabletSlogHelper::write_update_tablet_slog(key.ls_id_, tablet_id, disk_addr))) { + LOG_WARN("failed to write update tablet slog", K(ret), K(key), K(disk_addr)); + } else if (FALSE_IT(time_guard.click("WrSlog"))) { + } else if (OB_FAIL(t3m->compare_and_swap_tablet(key, old_handle, new_handle))) { + LOG_ERROR("failed to compare and swap tablet", K(ret), K(key), K(old_handle), K(new_handle)); + ob_usleep(1000 * 1000); + ob_abort(); + } else { + time_guard.click("CASwap"); + LOG_INFO("succeeded to update tablet ddl commit scn", K(ret), K(key), K(disk_addr), K(old_handle), + K(new_handle), K(ddl_commit_scn), K(time_guard)); + } + } + return ret; +} + int ObLSTabletService::update_tablet_report_status( const common::ObTabletID &tablet_id, const bool found_column_group_checksum_error) @@ -1761,8 +1806,8 @@ int ObLSTabletService::replay_create_tablet( } else if (FALSE_IT(tablet->tablet_addr_ = disk_addr)) { } else if (OB_FAIL(t3m->get_tablet_addr(key, old_addr))) { LOG_WARN("fail to get tablet addr", K(ret), K(key)); - } else if (OB_FAIL(tablet->deserialize(allocator, buf, buf_len, pos))) { - LOG_WARN("fail to deserialize tablet", K(ret), K(buf), K(buf_len), K(pos)); + } else if (OB_FAIL(tablet->deserialize_for_replay(allocator, buf, buf_len, pos))) { + LOG_WARN("fail to deserialize tablet", K(ret), KP(buf), K(buf_len), K(pos)); } else if (FALSE_IT(time_guard.click("Deserialize"))) { } else if (OB_FAIL(tablet->init_shared_params(ls_id, tablet_id, diff --git a/src/storage/ls/ob_ls_tablet_service.h b/src/storage/ls/ob_ls_tablet_service.h index c561644ed7..6009fe0029 100644 --- a/src/storage/ls/ob_ls_tablet_service.h +++ b/src/storage/ls/ob_ls_tablet_service.h @@ -31,6 +31,7 @@ #include "storage/tablet/ob_tablet_common.h" #include "storage/tablet/ob_tablet_memtable_mgr.h" #include "storage/tablet/ob_tablet_id_set.h" +#include "storage/tablet/ob_tablet_persister.h" #include "storage/lob/ob_lob_manager.h" #include "storage/multi_data_source/mds_table_mgr.h" @@ -248,6 +249,9 @@ public: int update_tablet_release_memtable_for_offline( const common::ObTabletID &tablet_id, const SCN scn); + int update_tablet_ddl_commit_scn( + const common::ObTabletID &tablet_id, + const SCN ddl_commit_scn); int update_tablet_restore_status( const common::ObTabletID &tablet_id, const ObTabletRestoreStatus::STATUS &restore_status, @@ -468,6 +472,20 @@ private: common::ObTabletID cur_tablet_id_; ObLSTabletService *tablet_svr_; }; + class ObUpdateDDLCommitSCN final : public ObITabletMetaModifier + { + public: + explicit ObUpdateDDLCommitSCN(const share::SCN ddl_commit_scn) : ddl_commit_scn_(ddl_commit_scn) {} + virtual ~ObUpdateDDLCommitSCN() = default; + virtual int modify_tablet_meta(ObTabletMeta &meta) override + { + meta.ddl_commit_scn_ = ddl_commit_scn_; + return OB_SUCCESS; + } + private: + const share::SCN ddl_commit_scn_; + DISALLOW_COPY_AND_ASSIGN(ObUpdateDDLCommitSCN); + }; private: static int refresh_memtable_for_ckpt( const ObMetaDiskAddr &old_addr, diff --git a/src/storage/meta_mem/ob_meta_obj_struct.cpp b/src/storage/meta_mem/ob_meta_obj_struct.cpp index bbb164d8b1..88cc67c519 100644 --- a/src/storage/meta_mem/ob_meta_obj_struct.cpp +++ b/src/storage/meta_mem/ob_meta_obj_struct.cpp @@ -48,7 +48,7 @@ int ObMetaDiskAddr::get_block_addr( int64_t &size) const { int ret = OB_SUCCESS; - if (OB_UNLIKELY(DiskType::BLOCK != type_)) { + if (OB_UNLIKELY(!is_block())) { ret = OB_NOT_SUPPORTED; LOG_WARN("type isn't block, not support", K(ret), KPC(this)); } else { @@ -63,21 +63,23 @@ int ObMetaDiskAddr::get_block_addr( int ObMetaDiskAddr::set_block_addr( const blocksstable::MacroBlockId ¯o_id, const int64_t offset, - const int64_t size) + const int64_t size, + const DiskType block_type) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!macro_id.is_valid() || offset < 0 || offset > MAX_OFFSET - || size < 0 || size > MAX_SIZE)) { + || size < 0 || size > MAX_SIZE + || (DiskType::RAW_BLOCK != block_type && DiskType::BLOCK != block_type))) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(macro_id), K(offset), K(size)); + LOG_WARN("invalid argument", K(ret), K(macro_id), K(offset), K(size), K(block_type)); } else { first_id_ = macro_id.first_id(); second_id_ = macro_id.second_id(); third_id_ = macro_id.third_id(); offset_ = offset; size_ = size; - type_ = DiskType::BLOCK; + type_ = block_type; } return ret; } @@ -155,6 +157,7 @@ bool ObMetaDiskAddr::is_valid() const && size_ <= ObLogConstants::MAX_LOG_FILE_SIZE; break; case DiskType::BLOCK: + case DiskType::RAW_BLOCK: ret = second_id_ >= -1 && second_id_ < INT64_MAX && size_ > 0; break; case DiskType::MEM: diff --git a/src/storage/meta_mem/ob_meta_obj_struct.h b/src/storage/meta_mem/ob_meta_obj_struct.h index 1daf72c4b9..297f4c8bbf 100644 --- a/src/storage/meta_mem/ob_meta_obj_struct.h +++ b/src/storage/meta_mem/ob_meta_obj_struct.h @@ -36,7 +36,8 @@ public: FILE = 1, BLOCK = 2, MEM = 3, - MAX = 4, + RAW_BLOCK = 4, // refer the shared block of storage meta which has no header + MAX = 5, }; public: ObMetaDiskAddr(); @@ -49,13 +50,16 @@ public: bool operator !=(const ObMetaDiskAddr &other) const; bool is_equal_for_persistence(const ObMetaDiskAddr &other) const; - OB_INLINE bool is_block() const { return BLOCK == type_; } - OB_INLINE bool is_disked() const { return BLOCK == type_ || FILE == type_; } + OB_INLINE bool is_block() const { return BLOCK == type_ || RAW_BLOCK == type_; } + OB_INLINE bool is_raw_block() const { return RAW_BLOCK == type_; } + OB_INLINE bool is_disked() const { return BLOCK == type_ || FILE == type_ || RAW_BLOCK == type_; } OB_INLINE bool is_file() const { return FILE == type_; } OB_INLINE bool is_memory() const { return MEM == type_; } OB_INLINE bool is_none() const { return NONE == type_; } OB_INLINE void set_none_addr() { type_ = NONE; } OB_INLINE void set_seq(const uint64_t seq) { seq_ = seq; } + OB_INLINE void set_size(const uint64_t size) { size_ = size; } + OB_INLINE void set_type(const DiskType type) { type_ = type; } OB_INLINE int64_t file_id() const { return file_id_; } OB_INLINE uint64_t size() const { return size_; } OB_INLINE uint64_t offset() const { return offset_; } @@ -72,7 +76,8 @@ public: int set_block_addr( const blocksstable::MacroBlockId ¯o_id, const int64_t offset, - const int64_t size); + const int64_t size, + const DiskType block_type); int get_file_addr( int64_t &file_id, int64_t &offset, diff --git a/src/storage/meta_mem/ob_meta_pointer.h b/src/storage/meta_mem/ob_meta_pointer.h new file mode 100644 index 0000000000..e9b8979eb3 --- /dev/null +++ b/src/storage/meta_mem/ob_meta_pointer.h @@ -0,0 +1,471 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_STORAGE_OB_META_POINTER_H_ +#define OCEANBASE_STORAGE_OB_META_POINTER_H_ + +#include "lib/allocator/ob_allocator.h" +#include "share/rc/ob_tenant_base.h" +#include "storage/meta_mem/ob_meta_obj_struct.h" +#include "storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.h" + +namespace oceanbase +{ +namespace storage +{ + +template +class ObMetaPointer +{ +public: + ObMetaPointer(); + explicit ObMetaPointer(const ObMetaDiskAddr &addr); + ObMetaPointer(const ObMetaDiskAddr &addr, ObMetaObjGuard &guard); + ObMetaPointer(const ObMetaPointer &other); + virtual ~ObMetaPointer(); + + int get_in_memory_obj(ObMetaObjGuard &guard); + void get_obj(ObMetaObjGuard &guard); + + void set_obj_pool(ObITenantMetaObjPool &obj_pool); + void set_obj(const ObMetaObjGuard &guard); + void set_addr_without_reset_obj(const ObMetaDiskAddr &addr); + void set_addr_with_reset_obj(const ObMetaDiskAddr &addr); + OB_INLINE const ObMetaDiskAddr &get_addr() const { return phy_addr_; } + + virtual int set_attr_for_obj(T *t); + + virtual int deep_copy(char *buf, const int64_t buf_len, ObMetaPointer *&value) const; + virtual int64_t get_deep_copy_size() const; + bool is_in_memory() const; + + ObMetaPointer &operator = (const ObMetaPointer &other); + void reset_obj(); + + VIRTUAL_TO_STRING_KV(K_(phy_addr), K_(obj)); + + int serialize(char* buf, const int64_t buf_len, int64_t& pos) const; + int deserialize(const char *buf, const int64_t buf_len, int64_t &pos); + int64_t get_serialize_size() const; + + // load and dump interface + virtual int acquire_obj(T *&t); + int read_from_disk(const bool is_full_load, + common::ObArenaAllocator &allocator, char *&r_buf, int64_t &r_len, ObMetaDiskAddr &addr); + int deserialize( + common::ObArenaAllocator &allocator, + const char *buf, + const int64_t buf_len, T *t); + int deserialize( + const char *buf, + const int64_t buf_len, T *t); + int hook_obj(T *&t, ObMetaObjGuard &guard); + virtual int release_obj(T *&t); + virtual int dump_meta_obj(ObMetaObjGuard &guard, void *&free_obj) { return OB_NOT_IMPLEMENT; } + +protected: + virtual void reset(); + +protected: + ObMetaDiskAddr phy_addr_; + ObMetaObj obj_; +}; + +template +ObMetaPointer::ObMetaPointer() + : phy_addr_(), + obj_() +{ +} + +template +ObMetaPointer::ObMetaPointer(const ObMetaDiskAddr &addr) + : phy_addr_(addr), + obj_() +{ +} + +template +ObMetaPointer::ObMetaPointer(const ObMetaDiskAddr &addr, ObMetaObjGuard &guard) + : phy_addr_(addr), + obj_() +{ + guard.get_obj(obj_); + if (nullptr != obj_.ptr_) { + if (nullptr == obj_.pool_ && nullptr == obj_.allocator_) { + STORAGE_LOG_RET(ERROR, common::OB_ERR_UNEXPECTED, "object pool is nullptr", K_(obj)); + ob_abort(); + } else { + obj_.ptr_->inc_ref(); + } + } +} + +template +ObMetaPointer::ObMetaPointer(const ObMetaPointer &other) + : phy_addr_(), + obj_() +{ + *this = other; +} + +template +ObMetaPointer::~ObMetaPointer() +{ + reset(); +} + +template +int ObMetaPointer::acquire_obj(T *&t) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(obj_.pool_)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "object pool is nullptr", K(ret), K(obj_)); + } else if (OB_FAIL(static_cast *>(obj_.pool_)->acquire(t))) { + STORAGE_LOG(WARN, "fail to acquire object", K(ret), K(phy_addr_)); + } else if (OB_ISNULL(t)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "acquired object is nullptr", K(ret), KP(t)); + } + return ret; +} + +template +int ObMetaPointer::read_from_disk(const bool is_full_load, + common::ObArenaAllocator &allocator, char *&r_buf, int64_t &r_len, ObMetaDiskAddr &addr) +{ + int ret = OB_SUCCESS; + const int64_t buf_len = phy_addr_.size(); + const ObMemAttr mem_attr(MTL_ID(), "MetaPointer"); + ObTenantCheckpointSlogHandler *ckpt_slog_hanlder = MTL(ObTenantCheckpointSlogHandler*); + + if (OB_ISNULL(ckpt_slog_hanlder)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "slog handler is nullptr", K(ret), KP(ckpt_slog_hanlder)); + } else { + ObMetaDiskAddr real_load_addr = phy_addr_; + if (!is_full_load && addr.is_raw_block()) { + if (phy_addr_.size() > ObTabletCommon::MAX_TABLET_FIRST_LEVEL_META_SIZE) { + real_load_addr.set_size(ObTabletCommon::MAX_TABLET_FIRST_LEVEL_META_SIZE); + } + } + if (OB_FAIL(ckpt_slog_hanlder->read_from_disk(real_load_addr, allocator, r_buf, r_len))) { + if (OB_SEARCH_NOT_FOUND != ret) { + STORAGE_LOG(WARN, "fail to read from addr", K(ret), K(phy_addr_)); + } + } else { + addr = phy_addr_; + } + } + return ret; +} + +template +int ObMetaPointer::hook_obj(T *&t, ObMetaObjGuard &guard) +{ + int ret = OB_SUCCESS; + guard.reset(); + + if (OB_ISNULL(t)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(ERROR, "load null obj from disk", K(ret), K(phy_addr_)); + } else if (OB_NOT_NULL(obj_.ptr_)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(ERROR, "obj already hooked", K(ret), K(phy_addr_), KP(t), KP(obj_.ptr_)); + } else if (OB_UNLIKELY(0 != t->get_ref())) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(ERROR, "obj ref cnt not 0", K(ret), K(phy_addr_), K(t->get_ref())); + } else { + t->inc_ref(); + t->set_tablet_addr(phy_addr_); + obj_.ptr_ = t; + guard.set_obj(obj_); + ObMetaObjBufferHelper::set_in_map(reinterpret_cast(t), true/*in_map*/); + } + + if (OB_FAIL(ret) && OB_NOT_NULL(t)) { + obj_.pool_->free_obj(t); + obj_.ptr_ = nullptr; + t = nullptr; + } + + return ret; +} + +template +int ObMetaPointer::release_obj(T *&t) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(t)) { + // do nothing + } else if (OB_UNLIKELY(nullptr == obj_.pool_ && nullptr == obj_.allocator_)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "object pool or allocator is nullptr", K(ret), K(obj_)); + } else if (nullptr != obj_.pool_) { + obj_.pool_->free_obj(t); + t = nullptr; + } else { + t->~T(); + obj_.allocator_->free(t); + t = nullptr; + } + return ret; +} + +template +int ObMetaPointer::get_in_memory_obj(ObMetaObjGuard &guard) +{ + int ret = OB_SUCCESS; + guard.reset(); + + if (OB_UNLIKELY(phy_addr_.is_none())) { + ret = OB_ITEM_NOT_SETTED; + STORAGE_LOG(DEBUG, "meta disk addr is none, no object to be got", K(ret), K(phy_addr_)); + } else if (OB_UNLIKELY(!is_in_memory())) { + ret = OB_NOT_SUPPORTED; + STORAGE_LOG(ERROR, "object isn't in memory, not support", K(ret), K(phy_addr_)); + } else { + guard.set_obj(obj_); + } + return ret; +} + +template +void ObMetaPointer::get_obj(ObMetaObjGuard &guard) +{ + guard.set_obj(obj_); +} + +template +int ObMetaPointer::deep_copy(char *buf, const int64_t buf_len, ObMetaPointer *&value) const +{ + int ret = OB_SUCCESS; + const int64_t deep_copy_size = get_deep_copy_size(); + if (OB_ISNULL(buf) || buf_len < deep_copy_size) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "invalid argument", K(ret), KP(buf), K(buf_len)); + } else { + ObMetaPointer *pvalue = new (buf) ObMetaPointer(phy_addr_); + pvalue->obj_.pool_ = obj_.pool_; + pvalue->obj_.allocator_ = obj_.allocator_; + pvalue->obj_.ptr_ = obj_.ptr_; + if (nullptr != obj_.ptr_) { + if (OB_UNLIKELY(nullptr == obj_.pool_ && nullptr == obj_.allocator_)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(ERROR, "object pool is nullptr", K(ret), K_(obj)); + ob_abort(); + } else { + obj_.ptr_->inc_ref(); + } + } + + if (OB_SUCC(ret)) { + value = pvalue; + } else { + pvalue->~ObMetaPointer(); + } + } + return ret; +} + +template +int64_t ObMetaPointer::get_deep_copy_size() const +{ + return sizeof(*this); +} + +template +bool ObMetaPointer::is_in_memory() const +{ + return nullptr != obj_.ptr_; +} + +template +void ObMetaPointer::set_obj_pool(ObITenantMetaObjPool &obj_pool) +{ + obj_.pool_ = &obj_pool; +} + +template +void ObMetaPointer::set_addr_without_reset_obj(const ObMetaDiskAddr &addr) +{ + phy_addr_ = addr; +} + +template +void ObMetaPointer::set_addr_with_reset_obj(const ObMetaDiskAddr &addr) +{ + reset_obj(); + phy_addr_ = addr; +} + +template +void ObMetaPointer::set_obj(const ObMetaObjGuard &guard) +{ + reset_obj(); + guard.get_obj(obj_); + set_attr_for_obj(obj_.ptr_); + if (nullptr != obj_.ptr_) { + if (OB_UNLIKELY(nullptr == obj_.pool_ && nullptr == obj_.allocator_)) { + STORAGE_LOG_RET(ERROR, common::OB_ERR_UNEXPECTED, "object pool is nullptr", K_(obj)); + ob_abort(); + } else { + obj_.ptr_->inc_ref(); + } + } +} + +template +int ObMetaPointer::set_attr_for_obj(T *t) +{ + // do nothing. + UNUSED(t); + return common::OB_SUCCESS; +} + +template +int ObMetaPointer::serialize(char* buf, const int64_t buf_len, int64_t& pos) const +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(buf) || OB_UNLIKELY(buf_len <= 0 || pos < 0 || pos >= buf_len)) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "invalid argument", K(ret), K(buf), K(buf_len), K(pos)); + } else if (OB_FAIL(phy_addr_.serialize(buf, buf_len, pos))) { + STORAGE_LOG(WARN, "fail to serialize physical address", K(ret), K(phy_addr_)); + } + return ret; +} + +template +int ObMetaPointer::deserialize(const char *buf, const int64_t buf_len, int64_t &pos) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(buf_len <= 0 || pos < 0 || pos >= buf_len) || OB_ISNULL(buf)) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "invalid argument", K(ret), KP(buf), K(buf_len), K(pos)); + } else if (OB_FAIL(phy_addr_.deserialize(buf, buf_len, pos))) { + STORAGE_LOG(WARN, "fail to de-serialize physical address", K(ret), K(phy_addr_)); + } + return ret; +} + +template +int64_t ObMetaPointer::get_serialize_size() const +{ + return phy_addr_.get_serialize_size(); +} + +template +int ObMetaPointer::deserialize( + common::ObArenaAllocator &allocator, + const char *buf, + const int64_t buf_len, + T *t) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + if (OB_UNLIKELY(buf_len <= 0) || OB_ISNULL(buf) || OB_ISNULL(t)) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "invalid argument", K(ret), KP(buf), K(buf_len), KP(t)); + } else if (OB_FAIL(set_attr_for_obj(t))) { + STORAGE_LOG(WARN, "fail to set attr for obj", K(ret)); + } else if (OB_FAIL(t->load_deserialize(allocator, buf, buf_len, pos))) { + STORAGE_LOG(WARN, "fail to de-serialize T", K(ret), KP(buf), K(buf_len), KP(t)); + } + return ret; +} + +template +int ObMetaPointer::deserialize( + const char *buf, + const int64_t buf_len, + T *t) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + if (OB_UNLIKELY(buf_len <= 0) || OB_ISNULL(buf) || OB_ISNULL(t)) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "invalid argument", K(ret), KP(buf), K(buf_len), KP(t)); + } else if (OB_FAIL(set_attr_for_obj(t))) { + STORAGE_LOG(WARN, "fail to set attr for obj", K(ret)); + } else if (OB_FAIL(t->deserialize(buf, buf_len, pos))) { + STORAGE_LOG(WARN, "fail to de-serialize T", K(ret), KP(buf), K(buf_len), KP(t)); + } + return ret; +} + +template +void ObMetaPointer::reset_obj() +{ + if (nullptr != obj_.ptr_) { + if (OB_UNLIKELY(nullptr == obj_.pool_ && nullptr == obj_.allocator_)) { + STORAGE_LOG_RET(ERROR, common::OB_ERR_UNEXPECTED, "object pool is nullptr", K_(obj)); + ob_abort(); + } else { + const int64_t ref_cnt = obj_.ptr_->dec_ref(); + if (0 == ref_cnt) { + if (nullptr != obj_.pool_) { + obj_.pool_->free_obj(obj_.ptr_); + } else { + obj_.ptr_->~T(); + obj_.allocator_->free(obj_.ptr_); + } + } else if (OB_UNLIKELY(ref_cnt < 0)) { + STORAGE_LOG_RET(ERROR, common::OB_ERR_UNEXPECTED, "obj ref cnt may be leaked", K(ref_cnt), KPC(this)); + } + // The pool ptr on tablet pointer cann't be reset nullptr here. Otherwise, you will + // encounter the following bug when the tablet is deleted from the map. + // + // Bug timeline: + // - Thread 1 load tablet from meta. + // - Thread 2 remove tablet from map. + // - Thread 1 fail to hook loaded tablet into pointer. + // - Thread 1 rolls back and releases the tablet and encounters an error. + obj_.ptr_ = nullptr; + obj_.allocator_ = nullptr; + } + } +} + +template +void ObMetaPointer::reset() +{ + reset_obj(); + phy_addr_.reset(); +} + +template +ObMetaPointer &ObMetaPointer::operator = (const ObMetaPointer &other) +{ + if (this != &other) { + reset(); + phy_addr_ = other.phy_addr_; + obj_.pool_ = other.obj_.pool_; + if (nullptr != other.obj_.ptr_) { + if (nullptr == other.obj_.pool_) { + STORAGE_LOG_RET(ERROR, common::OB_ERR_UNEXPECTED, "object pool is nullptr", K(other)); + ob_abort(); + } else { + obj_.ptr_ = other.obj_.ptr_; + other.obj_.ptr_->inc_ref(); + } + } + } + + return *this; +} + +} // end namespace storage +} // end namespace oceanbase + +#endif /* OCEANBASE_STORAGE_OB_META_POINTER_H_ */ diff --git a/src/storage/meta_mem/ob_storage_meta_cache.cpp b/src/storage/meta_mem/ob_storage_meta_cache.cpp index d91a36441a..5080ee915a 100644 --- a/src/storage/meta_mem/ob_storage_meta_cache.cpp +++ b/src/storage/meta_mem/ob_storage_meta_cache.cpp @@ -44,20 +44,6 @@ ObStorageMetaKey::ObStorageMetaKey(const uint64_t tenant_id, const ObMetaDiskAdd { } -ObStorageMetaKey::ObStorageMetaKey( - const uint64_t tenant_id, - const blocksstable::MacroBlockId &block_id, - const int64_t offset, - const int64_t size) - : tenant_id_(tenant_id) -{ - int ret = OB_SUCCESS; - if (OB_FAIL(phy_addr_.set_block_addr(block_id, offset, size))) { - LOG_ERROR("fail to set block address", K(ret), K(block_id), K(offset), K(size)); - } - abort_unless(OB_SUCCESS == ret); -} - ObStorageMetaKey::~ObStorageMetaKey() { } @@ -657,78 +643,47 @@ void ObStorageMetaCache::destory() common::ObKVCache::destroy(); } -ObStorageMetaCache::ObStorageMetaIOCallback::ObStorageMetaIOCallback() - : meta_type_(ObStorageMetaValue::MetaType::MAX), - offset_(0), - buf_size_(0), - data_buf_(nullptr), - handle_(), - allocator_(nullptr), - key_(), - tablet_(nullptr), - arena_allocator_(nullptr) +ObStorageMetaCache::ObStorageMetaIOCallback::ObStorageMetaIOCallback( + common::ObIAllocator *io_allocator, + const ObStorageMetaValue::MetaType type, + const ObStorageMetaKey &key, + ObStorageMetaValueHandle &handle, + const ObTablet *tablet, + common::ObSafeArenaAllocator *arena_allocator) + : ObSharedBlockIOCallback(io_allocator, key.get_meta_addr()), + meta_type_(type), + key_(key), + handle_(handle), + tablet_(tablet), + arena_allocator_(arena_allocator) { static_assert(sizeof(*this) <= CALLBACK_BUF_SIZE, "IOCallback buf size not enough"); } ObStorageMetaCache::ObStorageMetaIOCallback::~ObStorageMetaIOCallback() { - if (nullptr != allocator_ && NULL != data_buf_) { - allocator_->free(data_buf_); - data_buf_ = nullptr; - } meta_type_ = ObStorageMetaValue::MetaType::MAX; - offset_ = 0; - buf_size_ = 0; handle_.reset(); - allocator_ = nullptr; } -int ObStorageMetaCache::ObStorageMetaIOCallback::alloc_data_buf(const char *io_data_buffer, const int64_t data_size) -{ - int ret = alloc_and_copy_data(io_data_buffer, data_size, allocator_, data_buf_); - return ret; -} - -int ObStorageMetaCache::ObStorageMetaIOCallback::inner_process(const char *data_buffer, const int64_t size) +int ObStorageMetaCache::ObStorageMetaIOCallback::do_process(const char *buf, const int64_t buf_len) { // TODO: callback need to deal with block-crossed shared blocks, // in which scene we only store the first blocks' addr int ret = OB_SUCCESS; - ObTimeGuard time_guard("StorageMeta_Callback_Process", 100000); //100ms if (OB_UNLIKELY(!is_valid())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid storage meta cache callback", K(ret), K_(handle)); - } else if (OB_UNLIKELY(size <= 0 || data_buffer == nullptr)) { + } else if (OB_UNLIKELY(buf_len <= 0 || buf == nullptr)) { ret = OB_INVALID_DATA; - LOG_WARN("invalid data buffer size", K(ret), K(size), KP(data_buffer)); - } else if (OB_FAIL(alloc_data_buf(data_buffer, size))) { - LOG_WARN("Fail to allocate memory, ", K(ret), K(size)); - } else if (FALSE_IT(time_guard.click("alloc_data_buf"))) { - } else { - char *buf = nullptr; - int64_t buf_len = 0; - if (OB_FAIL(ObSharedBlockReadHandle::parse_data(data_buf_, size, buf, buf_len))) { - LOG_WARN("fail to parse data by shared block handle", K(ret), KP(data_buf_)); - } else if (FALSE_IT(time_guard.click("parse_data"))) { - } else if (OB_UNLIKELY(nullptr != arena_allocator_)) { // bypass cache processor - if (OB_FAIL(ObStorageMetaValue::bypass_processor[meta_type_](meta_type_, *arena_allocator_, - handle_, buf, buf_len))) { - LOG_WARN("fail to process io buf", K(ret), K(meta_type_), KP(buf), K(buf_len)); - } - } else if (OB_FAIL(ObStorageMetaValue::processor[meta_type_](handle_, key_, buf, buf_len, tablet_))) { + LOG_WARN("invalid data buffer size", K(ret), K(buf_len), KP(buf)); + } else if (OB_UNLIKELY(nullptr != arena_allocator_)) { // bypass cache processor + if (OB_FAIL(ObStorageMetaValue::bypass_processor[meta_type_](meta_type_, *arena_allocator_, + handle_, buf, buf_len))) { LOG_WARN("fail to process io buf", K(ret), K(meta_type_), KP(buf), K(buf_len)); } - if (nullptr != arena_allocator_) { - time_guard.click("bypass_process"); - } else { - time_guard.click("cache_process"); - } - } - - if (OB_FAIL(ret) && NULL != allocator_ && NULL != data_buf_) { - allocator_->free(data_buf_); - data_buf_ = NULL; + } else if (OB_FAIL(ObStorageMetaValue::processor[meta_type_](handle_, key_, buf, buf_len, tablet_))) { + LOG_WARN("fail to process io buf", K(ret), K(meta_type_), KP(buf), K(buf_len)); } return ret; } @@ -738,18 +693,10 @@ int64_t ObStorageMetaCache::ObStorageMetaIOCallback::size() const return sizeof(*this); } -const char *ObStorageMetaCache::ObStorageMetaIOCallback::get_data() -{ - return data_buf_; -} bool ObStorageMetaCache::ObStorageMetaIOCallback::is_valid() const { - return key_.is_valid() - && handle_.is_valid() - && nullptr != allocator_ - && offset_ >= 0 - && buf_size_ > 0; + return ObSharedBlockIOCallback::is_valid() && key_.is_valid() && handle_.is_valid(); } int ObStorageMetaCache::get_meta( @@ -853,15 +800,12 @@ int ObStorageMetaCache::prefetch( ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(WARN, "allocate callback memory failed", K(ret)); } else { - callback = new (buf) ObStorageMetaIOCallback; - //fill callback - callback->meta_type_ = type; - callback->offset_ = key.get_meta_addr().offset(); - callback->buf_size_ = key.get_meta_addr().size(); - callback->handle_ = meta_handle.cache_handle_; - callback->allocator_ = &(io_allocator); - callback->tablet_= tablet; - callback->key_ = key; + callback = new (buf) ObStorageMetaIOCallback(&io_allocator, + type, + key, + meta_handle.cache_handle_, + tablet, + nullptr/*bypass_cache if nullptr*/); if (OB_FAIL(read_io(key.get_meta_addr(), *callback, meta_handle))) { LOG_WARN("fail to read storage meta from io", K(ret), K(key), K(meta_handle)); } @@ -894,16 +838,12 @@ int ObStorageMetaCache::get_meta_and_bypass_cache( ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(WARN, "allocate callback memory failed", K(ret)); } else { - callback = new (buf) ObStorageMetaIOCallback; - //fill callback - callback->meta_type_ = type; - callback->offset_ = key.get_meta_addr().offset(); - callback->buf_size_ = key.get_meta_addr().size(); - callback->handle_ = handle.cache_handle_; - callback->allocator_ = &(io_allocator); - callback->tablet_= nullptr;/*tablet*/ - callback->key_ = key; - callback->arena_allocator_ = &allocator;/*bypass_cache*/ + callback = new (buf) ObStorageMetaIOCallback(&io_allocator, + type, + key, + handle.cache_handle_, + nullptr/*tablet*/, + &allocator/*bypass_cache if nullptr*/); if (OB_FAIL(read_io(key.get_meta_addr(), *callback, handle))) { LOG_WARN("fail to read storage meta from io", K(ret), K(key), K(handle)); } diff --git a/src/storage/meta_mem/ob_storage_meta_cache.h b/src/storage/meta_mem/ob_storage_meta_cache.h index a74ecf7662..66e32dad3e 100644 --- a/src/storage/meta_mem/ob_storage_meta_cache.h +++ b/src/storage/meta_mem/ob_storage_meta_cache.h @@ -48,11 +48,6 @@ public: ObStorageMetaKey( const uint64_t tenant_id, const ObMetaDiskAddr &phy_addr); - ObStorageMetaKey( - const uint64_t tenant_id, - const blocksstable::MacroBlockId &block_id, - const int64_t offset, - const int64_t size); virtual ~ObStorageMetaKey(); virtual bool operator ==(const ObIKVCacheKey &other) const override; virtual uint64_t get_tenant_id() const override; @@ -250,29 +245,32 @@ public: common::ObSafeArenaAllocator &allocator, common::ObIArray &meta_handles); private: - class ObStorageMetaIOCallback : public common::ObIOCallback + class ObStorageMetaIOCallback : public ObSharedBlockIOCallback { public: - ObStorageMetaIOCallback(); + ObStorageMetaIOCallback( + common::ObIAllocator *io_allocator, + const ObStorageMetaValue::MetaType type, + const ObStorageMetaKey &key, + ObStorageMetaValueHandle &handle, + const ObTablet *tablet, + common::ObSafeArenaAllocator *arena_allocator); virtual ~ObStorageMetaIOCallback(); - virtual int alloc_data_buf(const char *io_data_buffer, const int64_t data_size) override; - virtual int inner_process(const char *data_buffer, const int64_t size) override; + virtual int do_process(const char *data_buffer, const int64_t size) override; virtual int64_t size() const override; - virtual const char *get_data() override; - virtual ObIAllocator *get_allocator() override { return allocator_; } bool is_valid() const; - TO_STRING_KV("callback_type:", "ObStorageMetaIOCallback", K_(offset), K_(buf_size), KP_(data_buf), K_(key), KP_(tablet), KP_(arena_allocator)); + + INHERIT_TO_STRING_KV("ObSharedBlockIOCallback", ObSharedBlockIOCallback, + K_(key), KP_(tablet), KP_(arena_allocator)); + private: DISALLOW_COPY_AND_ASSIGN(ObStorageMetaIOCallback); + private: friend class ObStorageMetaCache; ObStorageMetaValue::MetaType meta_type_; - int64_t offset_; // offset in block. - int64_t buf_size_; // read size in block. - char *data_buf_; // actual data buffer - ObStorageMetaValueHandle handle_; - common::ObIAllocator *allocator_; ObStorageMetaKey key_; + ObStorageMetaValueHandle handle_; const ObTablet *tablet_; common::ObSafeArenaAllocator *arena_allocator_; }; diff --git a/src/storage/meta_mem/ob_tablet_pointer.cpp b/src/storage/meta_mem/ob_tablet_pointer.cpp index bc2721af1b..98967dbbbc 100644 --- a/src/storage/meta_mem/ob_tablet_pointer.cpp +++ b/src/storage/meta_mem/ob_tablet_pointer.cpp @@ -124,6 +124,7 @@ void ObTabletPointer::reset_obj() } int ObTabletPointer::read_from_disk( + const bool is_full_load, common::ObArenaAllocator &allocator, char *&r_buf, int64_t &r_len, @@ -136,12 +137,20 @@ int ObTabletPointer::read_from_disk( if (OB_ISNULL(ckpt_slog_hanlder)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "slog handler is nullptr", K(ret), KP(ckpt_slog_hanlder)); - } else if (OB_FAIL(ckpt_slog_hanlder->read_from_disk(phy_addr_, allocator, r_buf, r_len))) { - if (OB_SEARCH_NOT_FOUND != ret) { - STORAGE_LOG(WARN, "fail to read from addr", K(ret), K(phy_addr_)); - } } else { - addr = phy_addr_; + ObMetaDiskAddr real_load_addr = phy_addr_; + if (!is_full_load && addr.is_raw_block()) { + if (phy_addr_.size() > ObTabletCommon::MAX_TABLET_FIRST_LEVEL_META_SIZE) { + real_load_addr.set_size(ObTabletCommon::MAX_TABLET_FIRST_LEVEL_META_SIZE); + } + } + if (OB_FAIL(ckpt_slog_hanlder->read_from_disk(phy_addr_, allocator, r_buf, r_len))) { + if (OB_SEARCH_NOT_FOUND != ret) { + STORAGE_LOG(WARN, "fail to read from addr", K(ret), K(phy_addr_)); + } + } else { + addr = phy_addr_; + } } return ret; } diff --git a/src/storage/meta_mem/ob_tablet_pointer.h b/src/storage/meta_mem/ob_tablet_pointer.h index 64fae4243a..aee7435771 100644 --- a/src/storage/meta_mem/ob_tablet_pointer.h +++ b/src/storage/meta_mem/ob_tablet_pointer.h @@ -59,7 +59,8 @@ public: // load and dump interface int acquire_obj(ObTablet *&t); - int read_from_disk(common::ObArenaAllocator &allocator, char *&r_buf, int64_t &r_len, ObMetaDiskAddr &addr); + int read_from_disk(const bool is_full_load, + common::ObArenaAllocator &allocator, char *&r_buf, int64_t &r_len, ObMetaDiskAddr &addr); int deserialize( common::ObArenaAllocator &allocator, const char *buf, diff --git a/src/storage/meta_mem/ob_tablet_pointer_map.cpp b/src/storage/meta_mem/ob_tablet_pointer_map.cpp index 9246e29a71..8da34c584e 100644 --- a/src/storage/meta_mem/ob_tablet_pointer_map.cpp +++ b/src/storage/meta_mem/ob_tablet_pointer_map.cpp @@ -328,7 +328,7 @@ int ObTabletPointerMap::load_meta_obj( } else { STORAGE_LOG(INFO, "the tablet has been deleted", K(ret), K(key)); } - } else if (OB_FAIL(meta_pointer->read_from_disk(arena_allocator, buf, buf_len, load_addr))) { + } else if (OB_FAIL(meta_pointer->read_from_disk(true/*is_full_load*/, arena_allocator, buf, buf_len, load_addr))) { STORAGE_LOG(WARN, "fail to read from disk", K(ret), KPC(meta_pointer)); } else { t->tablet_addr_ = load_addr; @@ -373,7 +373,7 @@ int ObTabletPointerMap::load_meta_obj( } else { STORAGE_LOG(INFO, "the tablet has been deleted", K(ret), K(key)); } - } else if (OB_FAIL(meta_pointer->read_from_disk(arena_allocator, buf, buf_len, load_addr))) { + } else if (OB_FAIL(meta_pointer->read_from_disk(false/*is_full_load*/, arena_allocator, buf, buf_len, load_addr))) { STORAGE_LOG(WARN, "fail to read from disk", K(ret), KPC(meta_pointer)); } else { t->tablet_addr_ = load_addr; diff --git a/src/storage/ob_disk_usage_reporter.cpp b/src/storage/ob_disk_usage_reporter.cpp index 9661bb0de7..137d999cbb 100644 --- a/src/storage/ob_disk_usage_reporter.cpp +++ b/src/storage/ob_disk_usage_reporter.cpp @@ -173,42 +173,59 @@ int ObDiskUsageReportTask::report_tenant_disk_usage(const char *svr_ip, return ret; } + int ObDiskUsageReportTask::count_tenant_data(const uint64_t tenant_id) { int ret = OB_SUCCESS; - ObTenantMetaMemMgr *t3m = MTL(ObTenantMetaMemMgr*); - ObArenaAllocator iter_allocator("DiskReport", OB_MALLOC_NORMAL_BLOCK_SIZE, tenant_id); - ObTenantTabletIterator tablet_iter(*t3m, iter_allocator); - ObTabletHandle tablet_handle; - ObDiskUsageReportKey report_key; - + common::ObSArray block_list; + ObDiskUsageReportKey meta_key; + ObDiskUsageReportKey data_key; + int64_t meta_size = 0; int64_t data_size = 0; - int64_t sstable_size = 0; - while (OB_SUCC(ret) && OB_SUCC(tablet_iter.get_next_tablet(tablet_handle))) { - if (OB_UNLIKELY(!tablet_handle.is_valid())) { - ret = OB_ERR_UNEXPECTED; - STORAGE_LOG(WARN, "unexpected invalid tablet", K(ret), K(tablet_handle)); - } else if (tablet_handle.get_obj()->is_empty_shell()) { - // skip empty shell - } else if (OB_FAIL(tablet_handle.get_obj()->get_sstables_size(sstable_size, true /*ignore shared block*/))) { - STORAGE_LOG(WARN, "failed to get new tablet's disk usage", K(ret), K(sstable_size)); - } else { - data_size += sstable_size; - } - sstable_size = 0; - tablet_handle.reset(); - iter_allocator.reuse(); - } - if (OB_ITER_END == ret || OB_SUCCESS == ret) { - ret = OB_SUCCESS; - data_size += MTL(ObSharedMacroBlockMgr*)->get_shared_block_cnt() * OB_DEFAULT_MACRO_BLOCK_SIZE; - report_key.tenant_id_ = tenant_id; - report_key.file_type_ = ObDiskReportFileType::OB_DISK_REPORT_TENANT_DATA; - if (OB_FAIL(result_map_.set_refactored(report_key, data_size, 1))) { - STORAGE_LOG(WARN, "failed to set result_map_", K(ret), K(report_key), K(data_size)); - } - } + int64_t tmp_meta_size = 0; + int64_t tmp_data_size = 0; + if (OB_FAIL(MTL(ObTenantCheckpointSlogHandler*)->get_meta_block_list(block_list))) { + STORAGE_LOG(WARN, "failed to get tenant's meta block list", K(ret)); + } else { + ObTenantMetaMemMgr *t3m = MTL(ObTenantMetaMemMgr*); + ObArenaAllocator iter_allocator("DiskReport", OB_MALLOC_NORMAL_BLOCK_SIZE, tenant_id); + ObTenantTabletIterator tablet_iter(*t3m, iter_allocator); + ObTabletHandle tablet_handle; + while (OB_SUCC(ret) && OB_SUCC(tablet_iter.get_next_tablet(tablet_handle))) { + if (OB_UNLIKELY(!tablet_handle.is_valid())) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "unexpected invalid tablet", K(ret), K(tablet_handle)); + } else if (tablet_handle.get_obj()->is_empty_shell()) { + // skip empty shell + } else if (OB_FAIL(tablet_handle.get_obj()->get_tablet_size( + true /*ignore shared block*/, tmp_meta_size, tmp_data_size))) { + STORAGE_LOG(WARN, "failed to get tablet's meta and data size", K(ret)); + } else { + meta_size += tmp_meta_size; + data_size += tmp_data_size; + } + tablet_handle.reset(); + iter_allocator.reuse(); + tmp_meta_size = 0; + tmp_data_size = 0; + } + + if (OB_ITER_END == ret || OB_SUCCESS == ret) { + ret = OB_SUCCESS; + data_size += MTL(ObSharedMacroBlockMgr*)->get_shared_block_cnt() * OB_DEFAULT_MACRO_BLOCK_SIZE; + meta_size += block_list.count() * OB_DEFAULT_MACRO_BLOCK_SIZE; + meta_key.tenant_id_ = tenant_id; + meta_key.file_type_ = ObDiskReportFileType::OB_DISK_REPORT_TENANT_META_DATA; + data_key.tenant_id_ = tenant_id; + data_key.file_type_ = ObDiskReportFileType::OB_DISK_REPORT_TENANT_DATA; + if (OB_FAIL(result_map_.set_refactored(meta_key, meta_size, 1 /* whether allowed to override */))) { + STORAGE_LOG(WARN, "failed to insert meta info result_map_", K(ret), K(meta_key), K(meta_size)); + } else if (OB_FAIL(result_map_.set_refactored(data_key, data_size, 1 /* whether allowed to override */))) { + STORAGE_LOG(WARN, "failed to insert data info result_map_", K(ret), K(data_key), K(data_size)); + } + } + } return ret; } @@ -231,8 +248,6 @@ int ObDiskUsageReportTask::count_tenant() STORAGE_LOG(WARN, "failed to count tenant's slog", K(ret)); } else if (OB_FAIL(count_tenant_clog(tenant_id))) { STORAGE_LOG(WARN, "failed to count tenant's clog", K(ret)); - } else if (OB_FAIL(count_tenant_meta(tenant_id))) { - STORAGE_LOG(WARN, "failed to count tenant's meta", K(ret)); } else if (OB_FAIL(count_tenant_data(tenant_id))) { STORAGE_LOG(WARN, "failed to count tenant's data", K(ret)); } @@ -305,27 +320,6 @@ int ObDiskUsageReportTask::count_tenant_clog(const uint64_t tenant_id) return ret; } -int ObDiskUsageReportTask::count_tenant_meta(const uint64_t tenant_id) -{ - int ret = OB_SUCCESS; - ObTenantCheckpointSlogHandler *ckp_handler = nullptr; - common::ObSArray block_list; - ObDiskUsageReportKey report_key; - if (OB_ISNULL(ckp_handler = MTL(ObTenantCheckpointSlogHandler*))) { - ret = OB_ERR_UNEXPECTED; - STORAGE_LOG(WARN, "checkpoint slog handler is null", K(ret), KP(ckp_handler)); - } else if (OB_FAIL(ckp_handler->get_meta_block_list(block_list))) { - STORAGE_LOG(WARN, "failed to get tenant's meta block list", K(ret)); - } else { - report_key.tenant_id_ = tenant_id; - report_key.file_type_ = ObDiskReportFileType::OB_DISK_REPORT_TENANT_META_DATA; - if (OB_FAIL(result_map_.set_refactored(report_key, block_list.count() * common::OB_DEFAULT_MACRO_BLOCK_SIZE, 1))) { - STORAGE_LOG(WARN, "failed to set result_map_", K(ret), K(report_key), K(block_list.count())); - } - } - return ret; -} - int ObDiskUsageReportTask::count_server_slog() { int ret = OB_SUCCESS; diff --git a/src/storage/ob_disk_usage_reporter.h b/src/storage/ob_disk_usage_reporter.h index 24980b4fc8..52ba701691 100644 --- a/src/storage/ob_disk_usage_reporter.h +++ b/src/storage/ob_disk_usage_reporter.h @@ -97,7 +97,6 @@ private: int count_tenant(); int count_tenant_slog(const uint64_t tenant_id); int count_tenant_clog(const uint64_t tenant_id); - int count_tenant_meta(const uint64_t tenant_id); int count_tenant_data(const uint64_t tenant_id); int count_server_slog(); int count_server_clog(); diff --git a/src/storage/slog_ckpt/ob_linked_macro_block_reader.cpp b/src/storage/slog_ckpt/ob_linked_macro_block_reader.cpp index a855caa543..cc2f4ab0b2 100644 --- a/src/storage/slog_ckpt/ob_linked_macro_block_reader.cpp +++ b/src/storage/slog_ckpt/ob_linked_macro_block_reader.cpp @@ -413,7 +413,7 @@ int ObLinkedMacroBlockItemReader::parse_item( } else { offset = buf_pos_; } - if (OB_FAIL(addr.set_block_addr(buf_block_id_, offset, size))) { + if (OB_FAIL(addr.set_block_addr(buf_block_id_, offset, size, ObMetaDiskAddr::DiskType::BLOCK))) { LOG_WARN("fail to set block address", K(ret), K(buf_block_id_), K(offset), K(size)); } else { buf_pos_ += sizeof(ObLinkedMacroBlockItemHeader); diff --git a/src/storage/slog_ckpt/ob_linked_macro_block_writer.cpp b/src/storage/slog_ckpt/ob_linked_macro_block_writer.cpp index 9e9a31103f..15f50998d3 100644 --- a/src/storage/slog_ckpt/ob_linked_macro_block_writer.cpp +++ b/src/storage/slog_ckpt/ob_linked_macro_block_writer.cpp @@ -301,7 +301,7 @@ int ObLinkedMacroBlockItemWriter::set_pre_block_inflight_items_addr( for (int64_t idx = first_inflight_item_idx_; OB_SUCC(ret) && idx < first_inflight_item_idx_ + pre_block_inflight_items_cnt_; idx++) { ObMetaDiskAddr addr; - if (OB_FAIL(addr.set_block_addr(pre_block_id, offset, item_size_arr_.at(idx)))) { + if (OB_FAIL(addr.set_block_addr(pre_block_id, offset, item_size_arr_.at(idx), ObMetaDiskAddr::DiskType::BLOCK))) { LOG_WARN("fail to push back address", K(ret), K(addr)); } else if (OB_FAIL(item_disk_addr_arr_.push_back(addr))) { LOG_WARN("fail to push back address", K(ret), K(addr)); diff --git a/src/storage/slog_ckpt/ob_tablet_replay_create_handler.cpp b/src/storage/slog_ckpt/ob_tablet_replay_create_handler.cpp new file mode 100644 index 0000000000..111aecbed7 --- /dev/null +++ b/src/storage/slog_ckpt/ob_tablet_replay_create_handler.cpp @@ -0,0 +1,562 @@ + +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include "ob_tablet_replay_create_handler.h" +#include "share/rc/ob_tenant_base.h" +#include "observer/omt/ob_tenant.h" +#include "share/scn.h" +#include "storage/tx_storage/ob_ls_handle.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "storage/blocksstable/ob_macro_block_id.h" +#include "storage/high_availability/ob_tablet_transfer_info.h" +#include "storage/high_availability/ob_storage_ha_struct.h" +#include "storage/ls/ob_ls_tablet_service.h" +#include "storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.h" +#include "storage/blockstore/ob_shared_block_reader_writer.h" + +namespace oceanbase +{ +using namespace share; +using namespace blocksstable; + +namespace storage +{ + +int ObTabletReplayCreateTask::init( + const int64_t task_idx, + const ObTabletReplayCreateTask::Type type, + share::ObTenantBase *tenant_base, + ObTabletReplayCreateHandler *handler) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("task has been inited", K(ret), KPC(this)); + } else { + idx_ = task_idx; + type_ = type; + tenant_base_ = tenant_base; + handler_ = handler; + handler->inc_inflight_task_cnt(); + is_inited_ = true; + } + return ret; +} + +void ObTabletReplayCreateTask::destroy() +{ + if (IS_INIT) { + handler_->dec_inflight_task_cnt(); + idx_ = -1; + type_ = Type::MAX; + tenant_base_ = nullptr; + handler_ = nullptr; + replay_item_range_arr_.reset(); + tablet_cnt_ = 0; + is_inited_ = false; + } +} +int ObTabletReplayCreateTask::execute() +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("task not init", K(ret), KPC(this)); + } else { + ObTenantSwitchGuard guard(tenant_base_); + if (Type::DISCRETE == type_ && + OB_FAIL(handler_->replay_discrete_tablets(replay_item_range_arr_))) { + LOG_WARN("fail to execute replay_discrete_tablets", K(ret), KPC(this)); + } else if (Type::AGGREGATE == type_ && + OB_FAIL(handler_->replay_aggregate_tablets(replay_item_range_arr_))) { + } else { + FLOG_INFO("successfully execute replay create tablet task", KPC(this)); + handler_->inc_finished_tablet_cnt(tablet_cnt_); + } + } + if (OB_FAIL(ret)) { + handler_->set_errcode(ret); + } + return ret; +} + +int ObTabletReplayCreateTask::add_item_range(const ObTabletReplayItemRange &range, bool &is_enough) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("task not init", K(ret)); + } else if (OB_FAIL(replay_item_range_arr_.push_back(range))) { + LOG_WARN("fail to push_back", K(ret), K(*this)); + } else { + is_enough = false; + tablet_cnt_ += range.second - range.first; + if (AGGREGATE == type_) { + if (replay_item_range_arr_.count() >= MAX_AGGREGATE_BLOCK_CNT_PER_TASK) { + is_enough = true; + } + } else { + if (tablet_cnt_ >= MAX_DISCRETE_TABLET_CNT_PER_TASK) { + is_enough = true; + } + } + } + return ret; +} + +// ObMetaDiskAddr order: FILE < BLOCK < RAW_BLOCK +// the FILE disk type is only for empty shell tablet +bool ObTabletReplayItem::operator<(const ObTabletReplayItem &r) const +{ + bool ret = false; + if (addr_.type() < r.addr_.type()) { + ret = true; + } else if (addr_.type() == r.addr_.type()) { + if (addr_.is_block()) { + if (addr_.block_id() < r.addr_.block_id()) { + ret = true; + } else { + // addrs in same block are no need to sort between themself by offset + } + } else { + // FILE addrs are no need to sort between themself + } + } + return ret; +} + +//============================= ObTabletReplayCreateHandler ==============================// +ObTabletReplayCreateHandler::ObTabletReplayCreateHandler() + : is_inited_(false), + allocator_("TabletReplay", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), + task_idx_(0), + inflight_task_cnt_(0), + finished_tablet_cnt_(0), + errcode_(OB_SUCCESS), + total_tablet_item_arr_(nullptr), + total_tablet_cnt_(0), + aggrgate_task_(nullptr), + discrete_task_(nullptr) +{ +} + +int ObTabletReplayCreateHandler::init( + const common::hash::ObHashMap &tablet_item_map) +{ + int ret = OB_SUCCESS; + int64_t cost_time_us = 0; + const int64_t start_time = ObTimeUtility::current_time(); + total_tablet_cnt_ = tablet_item_map.size(); + common::hash::ObHashMap::const_iterator iter = tablet_item_map.begin(); + + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("ObTabletReplayCreateHandler has inited", K(ret)); + } else if (0 == total_tablet_cnt_) { + // do nothing + } else if (OB_ISNULL(total_tablet_item_arr_ = + static_cast(allocator_.alloc(total_tablet_cnt_ * sizeof(ObTabletReplayItem))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc tablet_addr_arr", K(ret), K(total_tablet_cnt_)); + } else { + int64_t i = 0; + for ( ; iter != tablet_item_map.end(); iter++, i++) { + total_tablet_item_arr_[i] = ObTabletReplayItem(iter->first, iter->second); + } + if (i != total_tablet_cnt_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet count mismatch", K(ret), K(i), K(total_tablet_cnt_)); + } else { + std::sort(total_tablet_item_arr_, total_tablet_item_arr_ + total_tablet_cnt_); + } + } + + if (OB_SUCC(ret)) { + cost_time_us = ObTimeUtility::current_time() - start_time; + FLOG_INFO("finish init ObTabletReplayCreateHandler", K(ret), K(total_tablet_cnt_), K(cost_time_us)); + is_inited_ = true; + } + + return ret; +} + +int ObTabletReplayCreateHandler::concurrent_replay() +{ +// for version <= 4.1 or FILE type addr, only support discrete replay +#define ADD_ITEM_RANGE_TO_TASK(start_item_idx, end_item_idx, only_support_discrete) \ + ObTabletReplayItemRange range(start_item_idx, end_item_idx); \ + if (only_support_discrete) { \ + if (OB_FAIL(add_item_range_to_task_( \ + ObTabletReplayCreateTask::DISCRETE, range, discrete_task_))) { \ + LOG_WARN("fail to add_item_range_to_task_", K(ret), KPC(discrete_task_)); \ + } \ + } else if (is_suitable_to_aggregate_(tablet_cnt_in_block, valid_size_in_block)) { \ + if (OB_FAIL(add_item_range_to_task_( \ + ObTabletReplayCreateTask::AGGREGATE, range, aggrgate_task_))) { \ + LOG_WARN("fail to add_item_range_to_task_", K(ret), KPC(aggrgate_task_)); \ + } \ + } else { \ + if (OB_FAIL(add_item_range_to_task_( \ + ObTabletReplayCreateTask::DISCRETE, range, discrete_task_))) { \ + LOG_WARN("fail to add_item_range_to_task_", K(ret), KPC(discrete_task_)); \ + } \ + } + +#define ADD_LAST_TASK(task) \ + if (OB_SUCC(ret)) { \ + if (OB_NOT_NULL(task) && OB_FAIL(add_task_(task))) { \ + LOG_WARN("fail to add last task", K(ret), KPC(task), K(inflight_task_cnt_)); \ + task->~ObTabletReplayCreateTask(); \ + SERVER_STARTUP_TASK_HANDLER.get_task_allocator().free(task); \ + task = nullptr; \ + } \ + } + + int ret = OB_SUCCESS; + const int64_t start_time = ObTimeUtility::current_time(); + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObTabletReplayCreateHandler not inited", K(ret)); + } else if (0 == total_tablet_cnt_) { + // do nothing + } else { + int64_t tablet_cnt_in_block = 0; + int64_t valid_size_in_block = 0; + const bool is_old_version = static_cast(MTL_CTX())->get_super_block().is_old_version(); + + // <1> ObMetaDiskAddr order is FILE < BLOCK < RAW_BLOCK, so handle FILE type addrs firstly + int64_t i = 0; + while (i < total_tablet_cnt_ && total_tablet_item_arr_[i].addr_.is_file()) { + i++; + } + if (i > 0) { // addrs of the file type is expected not much, so only use one task here + ADD_ITEM_RANGE_TO_TASK(0, i, true); + } + + // <2> handle block addr + MacroBlockId pre_block_id = total_tablet_item_arr_[i].addr_.block_id(); + MacroBlockId curr_block_id; + for ( ; OB_SUCC(ret) && i < total_tablet_cnt_; i++ ) { + curr_block_id = total_tablet_item_arr_[i].addr_.block_id(); + if (pre_block_id == curr_block_id) { + tablet_cnt_in_block ++; + valid_size_in_block += upper_align(total_tablet_item_arr_[i].addr_.size(), 4096); + } else { + ADD_ITEM_RANGE_TO_TASK(i - tablet_cnt_in_block, i, is_old_version); // [start_item_idx, end_item_idx) + pre_block_id = curr_block_id; + tablet_cnt_in_block = 1; + valid_size_in_block = upper_align(total_tablet_item_arr_[i].addr_.size(), 4096); + } + } + if (OB_SUCC(ret)) { // handle last range + ADD_ITEM_RANGE_TO_TASK(total_tablet_cnt_ - tablet_cnt_in_block, total_tablet_cnt_, is_old_version); + } + // handle last task + ADD_LAST_TASK(aggrgate_task_); + ADD_LAST_TASK(discrete_task_); +#undef ADD_ITEM_RANGE_TO_TASK +#undef ADD_LAST_TASK + + // <3> waiting all task finish even if failure has occurred + while (ATOMIC_LOAD(&inflight_task_cnt_) != 0) { + LOG_INFO("waiting all inflight replay create tablet task finish", K(inflight_task_cnt_)); + ob_usleep(20 * 1000); // 20ms + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(ATOMIC_LOAD(&errcode_))) { + LOG_WARN("ObReplayCreateTabletTask has failed", K(ret)); + } else if (ATOMIC_LOAD(&finished_tablet_cnt_) != total_tablet_cnt_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("finished replay tablet cnt mismatch", K(ret), K_(finished_tablet_cnt), K(total_tablet_cnt_)); + } + } + } + + int64_t cost_time_us = ObTimeUtility::current_time() - start_time; + FLOG_INFO("finish concurrently repaly tablets", K(ret), K(total_tablet_cnt_), K(cost_time_us)); + return ret; +} + +int ObTabletReplayCreateHandler::add_item_range_to_task_( + const ObTabletReplayCreateTask::Type type, const ObTabletReplayItemRange &range, ObTabletReplayCreateTask *&task) +{ + int ret = OB_SUCCESS; + + if (nullptr == task) { + if (OB_ISNULL(task = reinterpret_cast( + SERVER_STARTUP_TASK_HANDLER.get_task_allocator().alloc(sizeof(ObTabletReplayCreateTask))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc task buf", K(ret)); + } else if (FALSE_IT(task = new(task) ObTabletReplayCreateTask())) { + } else if (OB_FAIL(task->init(task_idx_++, type, MTL_CTX(), this))) { + LOG_WARN("fail to init ObTabletReplayCreateTask", K(ret)); + } + } + + if (OB_SUCC(ret)) { + bool is_enough = false; + if (OB_FAIL(task->add_item_range(range, is_enough))) { + LOG_WARN("fail to add tablet item range", K(ret), K(range.first), K(range.second), KPC(task)); + } else if (is_enough) { // tablet count of this task is enough and will create a new task at next round + if (OB_FAIL(add_task_(task))) { + LOG_WARN("fail to add replay tablet task", K(ret), KPC(task), K(inflight_task_cnt_)); + } else { + task = nullptr; + } + } + } + + if (OB_FAIL(ret) && OB_NOT_NULL(task)) { + task->~ObTabletReplayCreateTask(); + SERVER_STARTUP_TASK_HANDLER.get_task_allocator().free(task); + task = nullptr; + } + return ret; +} + +int ObTabletReplayCreateHandler::add_task_(ObTabletReplayCreateTask *task) +{ + int ret = OB_SUCCESS; + bool need_retry = false; + FLOG_INFO("add replay tablet task", KPC(task), K(inflight_task_cnt_)); + do { + need_retry = false; + if (OB_FAIL(ATOMIC_LOAD(&errcode_))) { + LOG_WARN("someone ObTabletReplayCreateTask has failed", K(ret), K(inflight_task_cnt_)); + } else if (OB_FAIL(SERVER_STARTUP_TASK_HANDLER.push_task(task))) { + if (OB_EAGAIN == ret) { + LOG_INFO("task queue is full, wait and retry", KPC(task), K(inflight_task_cnt_)); + need_retry = true; + ob_usleep(20 * 1000); // 20ms + } else { + LOG_WARN("fail to push task", K(ret), KPC(task), K(inflight_task_cnt_)); + } + } + } while(OB_FAIL(ret) && need_retry); + + return ret; +} + +int ObTabletReplayCreateHandler::get_tablet_svr_( + const ObLSID &ls_id, + ObLSTabletService *&ls_tablet_svr, + ObLSHandle &ls_handle) +{ + int ret = OB_SUCCESS; + ObLS *ls = nullptr; + if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("fail to get ls handle", K(ret), K(ls_id)); + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls is null", K(ret), K(ls_id)); + } else if (OB_ISNULL(ls_tablet_svr = ls->get_tablet_svr())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet service is null", K(ret), K(ls_id)); + } + return ret; +} + +int ObTabletReplayCreateHandler::replay_discrete_tablets(const ObIArray &range_arr) +{ + int ret = OB_SUCCESS; + char *buf = nullptr; + int64_t buf_len = 0; + ObTabletTransferInfo tablet_transfer_info; + ObArenaAllocator io_allocator("DiscreteRep", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObTabletReplayCreateHandler not inited", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < range_arr.count(); i++) { + for (int64_t idx = range_arr.at(i).first; OB_SUCC(ret) && idx < range_arr.at(i).second; idx++) { + io_allocator.reuse(); + const ObTabletMapKey &key = total_tablet_item_arr_[idx].key_; + const ObMetaDiskAddr &addr = total_tablet_item_arr_[idx].addr_; + ObLSTabletService *ls_tablet_svr = nullptr; + ObLSHandle ls_handle; + tablet_transfer_info.reset(); + if (OB_FAIL(ATOMIC_LOAD(&errcode_))) { + LOG_WARN("replay create has already failed", K(ret)); + } else { + // io maybe timeout, so need retry + int64_t max_retry_time = 5; + do { + if (OB_FAIL(MTL(ObTenantCheckpointSlogHandler*)->read_from_disk(addr, io_allocator, buf, buf_len))) { + LOG_WARN("fail to read from disk", K(ret), K(addr), KP(buf), K(buf_len)); + } else if (OB_FAIL(get_tablet_svr_(key.ls_id_, ls_tablet_svr, ls_handle))) { + LOG_WARN("fail to get ls tablet service", K(ret)); + } else if (OB_FAIL(ls_tablet_svr->replay_create_tablet(addr, buf, buf_len, key.tablet_id_, tablet_transfer_info))) { + LOG_WARN("fail to create tablet for replay", K(ret), K(key), K(addr)); + } + } while (OB_FAIL(ret) && OB_TIMEOUT == ret && max_retry_time-- > 0); + + if (OB_SUCC(ret)) { + if (tablet_transfer_info.has_transfer_table() && + OB_FAIL(record_ls_transfer_info_(ls_handle, key.tablet_id_, tablet_transfer_info))) { + LOG_WARN("fail to record_ls_transfer_info", K(ret), K(key), K(tablet_transfer_info)); + } + } + } + } + } + return ret; +} + +int ObTabletReplayCreateHandler::replay_aggregate_tablets(const ObIArray &range_arr) +{ + int ret = OB_SUCCESS; + char *buf = nullptr; + int64_t buf_len = 0; + ObTabletTransferInfo tablet_transfer_info; + ObArenaAllocator io_allocator("AggregateRep", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + char *io_buf = nullptr; + const int64_t io_buf_size = OB_SERVER_BLOCK_MGR.get_macro_block_size(); + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObTabletReplayCreateHandler not inited", K(ret)); + } else if (OB_ISNULL(io_buf = + reinterpret_cast(io_allocator.alloc(io_buf_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc macro read info buffer", K(ret), K(io_buf_size)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < range_arr.count(); i++) { + ObMacroBlockHandle macro_handle; + ObMacroBlockReadInfo read_info; + read_info.offset_ = 0; + read_info.buf_ = io_buf; + read_info.size_ = io_buf_size; + read_info.io_timeout_ms_ = 20000; // 20s + read_info.io_desc_.set_mode(ObIOMode::READ); + read_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_COMPACT_READ); + read_info.io_desc_.set_group_id(ObIOModule::SHARED_BLOCK_RW_IO); + read_info.macro_block_id_ = total_tablet_item_arr_[range_arr.at(i).first].addr_.block_id(); + if (OB_FAIL(ObBlockManager::read_block(read_info, macro_handle))) { + LOG_WARN("fail to read block", K(ret), K(read_info)); + } + for (int64_t idx = range_arr.at(i).first; OB_SUCC(ret) && idx < range_arr.at(i).second; idx++) { + const ObTabletMapKey &key = total_tablet_item_arr_[idx].key_; + const ObMetaDiskAddr &addr = total_tablet_item_arr_[idx].addr_; + ObLSTabletService *ls_tablet_svr = nullptr; + ObLSHandle ls_handle; + tablet_transfer_info.reset(); + if (OB_FAIL(ATOMIC_LOAD(&errcode_))) { + LOG_WARN("replay create has already failed", K(ret)); + } else if (OB_FAIL(get_tablet_svr_(key.ls_id_, ls_tablet_svr, ls_handle))) { + LOG_WARN("fail to get ls tablet service", K(ret)); + } else if (OB_FAIL(ObSharedBlockReaderWriter::parse_data_from_macro_block(macro_handle, addr, buf, buf_len))) { + LOG_WARN("fail to parse_data_from_macro_block", K(ret), K(macro_handle), K(addr), K(i), K(idx)); + } else if (OB_FAIL(ls_tablet_svr->replay_create_tablet(addr, buf, buf_len, key.tablet_id_, tablet_transfer_info))) { + LOG_WARN("fail to create tablet for replay", K(ret), K(key), K(addr)); + } else if (tablet_transfer_info.has_transfer_table() && + OB_FAIL(record_ls_transfer_info_(ls_handle, key.tablet_id_, tablet_transfer_info))) { + LOG_WARN("fail to record_ls_transfer_info", K(ret), K(key), K(tablet_transfer_info)); + } + } + } + + return ret; +} + +int ObTabletReplayCreateHandler::check_is_need_record_transfer_info_( + const share::ObLSID &src_ls_id, + const share::SCN &transfer_start_scn, + bool &is_need) +{ + int ret = OB_SUCCESS; + ObLSService* ls_srv = nullptr; + ObLSHandle src_ls_handle; + ObLS *src_ls = NULL; + is_need = false; + if (!src_ls_id.is_valid() || !transfer_start_scn.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("src_ls_id or transfer_start_scn is invalid", K(ret), K(src_ls_id), K(transfer_start_scn)); + } else if (OB_ISNULL(ls_srv = MTL(ObLSService*))) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("ls srv should not be NULL", K(ret), KP(ls_srv)); + } else if (OB_FAIL(ls_srv->get_ls(src_ls_id, src_ls_handle, ObLSGetMod::STORAGE_MOD))) { + if (OB_LS_NOT_EXIST == ret) { + is_need = false; + LOG_WARN("source ls is not exist", KR(ret), K(src_ls_id)); + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get ls", KR(ret), K(src_ls_id)); + } + } else if (OB_ISNULL(src_ls = src_ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls is NULL", KR(ret), K(src_ls_id)); + } else if (src_ls->get_ls_meta().get_clog_checkpoint_scn() < transfer_start_scn) { + is_need = true; + LOG_INFO("src ls max decided scn is smaller than transfer start scn, need wait clog replay", K(ret), + K(src_ls_id), K(transfer_start_scn), "ls_meta", src_ls->get_ls_meta()); + } + return ret; +} + +int ObTabletReplayCreateHandler::record_ls_transfer_info_( + const ObLSHandle &ls_handle, + const ObTabletID &tablet_id, + const ObTabletTransferInfo &tablet_transfer_info) +{ + int ret = OB_SUCCESS; + storage::ObLS *ls = NULL; + bool is_need = false; + ObMigrationStatus current_migration_status = ObMigrationStatus::OB_MIGRATION_STATUS_MAX; + ObMigrationStatus new_migration_status = ObMigrationStatus::OB_MIGRATION_STATUS_MAX; + ObLSRestoreStatus ls_restore_status(ObLSRestoreStatus::LS_RESTORE_STATUS_MAX); + if (!ls_handle.is_valid() || !tablet_transfer_info.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(ls_handle), K(tablet_transfer_info)); + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("log stream not exist", K(ret)); + } else if (OB_FAIL(ls->get_migration_status(current_migration_status))) { + LOG_WARN("failed to get ls migration status", K(ret)); + } else if (OB_FAIL(ObMigrationStatusHelper::trans_reboot_status(current_migration_status, new_migration_status))) { + LOG_WARN("failed to trans fail status", K(ret), "ls_id", ls->get_ls_id(), + K(current_migration_status), K(new_migration_status)); + } else if (ObMigrationStatus::OB_MIGRATION_STATUS_NONE != new_migration_status) { + LOG_INFO("The log stream does not need to record transfer_info", "ls_id", ls->get_ls_id(), K(current_migration_status), K(new_migration_status)); + } else if (OB_FAIL(ls->get_restore_status(ls_restore_status))) { + LOG_WARN("failed to get ls restore status", K(ret), KPC(ls)); + } else if (ls_restore_status.is_in_restore_and_before_quick_restore()) { + LOG_INFO("the log stream in restore and before quick restore, no need to record transfer info", "ls_id", ls->get_ls_id(), K(ls_restore_status)); + } else if (!tablet_transfer_info.has_transfer_table()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet should have transfer table", K(ret), "ls_id", ls->get_ls_id(), K(tablet_id), K(tablet_transfer_info)); + } else if (ls->get_ls_startup_transfer_info().is_valid()) { + if (ls->get_ls_startup_transfer_info().ls_id_ != tablet_transfer_info.ls_id_ + || ls->get_ls_startup_transfer_info().transfer_start_scn_ != tablet_transfer_info.transfer_start_scn_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("The transfer_info of different tablet records on the same ls is different", K(ret), "ls_id", ls->get_ls_id(), + K(tablet_id), K(tablet_transfer_info), "ls_startup_transfer_info", ls->get_ls_startup_transfer_info()); + } + } else if (OB_FAIL(check_is_need_record_transfer_info_(tablet_transfer_info.ls_id_, + tablet_transfer_info.transfer_start_scn_, is_need))) { + LOG_WARN("failed to check is need record ls", K(ret), "ls_id", ls->get_ls_id(), K(tablet_id), K(tablet_transfer_info)); + } else if (!is_need) { + // do nothing + } else if (OB_FAIL(ls->get_ls_startup_transfer_info().init(tablet_transfer_info.ls_id_, + tablet_transfer_info.transfer_start_scn_))) { + LOG_WARN("failed to init ls transfer info", K(ret), "ls_id", ls->get_ls_id(), K(tablet_id), K(tablet_transfer_info)); + } + return ret; +} + +} // namespace storage +} // namespace oceanbase \ No newline at end of file diff --git a/src/storage/slog_ckpt/ob_tablet_replay_create_handler.h b/src/storage/slog_ckpt/ob_tablet_replay_create_handler.h new file mode 100644 index 0000000000..67592e4144 --- /dev/null +++ b/src/storage/slog_ckpt/ob_tablet_replay_create_handler.h @@ -0,0 +1,160 @@ + +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_STORAGE_SLOG_CKPT_TBALET_REPLAY_CREATE_HANDLER_H +#define OB_STORAGE_SLOG_CKPT_TBALET_REPLAY_CREATE_HANDLER_H + +#include "storage/meta_mem/ob_tablet_map_key.h" +#include "observer/ob_server_startup_task_handler.h" +#include "storage/meta_mem/ob_meta_obj_struct.h" + + +namespace oceanbase +{ + +namespace share +{ +class ObTenantBase; +class SCN; +} + +namespace storage +{ +class ObLSHandle; +class ObTabletReplayCreateHandler; +class ObTabletTransferInfo; +class ObLSTabletService; + +struct ObTabletReplayItem +{ +public: + ObTabletReplayItem(const ObTabletMapKey &key, const ObMetaDiskAddr &addr) + : key_(key), addr_(addr) {} + ObTabletReplayItem() + : key_(), addr_() {} + ~ObTabletReplayItem() {} + bool operator<(const ObTabletReplayItem &r) const; + + ObTabletMapKey key_; + ObMetaDiskAddr addr_; +}; + +using ObTabletReplayItemRange = std::pair; // record the start_item_idx and end_item_idx in total_tablet_item_arr + +class ObTabletReplayCreateTask : public observer::ObServerStartupTask +{ +public: + enum Type + { + DISCRETE = 0, + AGGREGATE = 1, + MAX = 2, + }; + ObTabletReplayCreateTask() + : is_inited_(false), + idx_(-1), + type_(Type::MAX), + replay_item_range_arr_(), + tablet_cnt_(0), + tenant_base_(nullptr), + handler_(nullptr) {} + + virtual ~ObTabletReplayCreateTask() + { + destroy(); + } + int init(const int64_t task_idx, const ObTabletReplayCreateTask::Type type, + share::ObTenantBase *tenant_base, ObTabletReplayCreateHandler *handler); + + int execute() override; + int add_item_range(const ObTabletReplayItemRange &range, bool &is_enough); + + VIRTUAL_TO_STRING_KV(K_(idx), K_(type), KP(this), KP_(tenant_base), + "range_count", replay_item_range_arr_.count(), K_(tablet_cnt)); + + +private: + static const int64_t MAX_DISCRETE_TABLET_CNT_PER_TASK = 200; + static const int64_t MAX_AGGREGATE_BLOCK_CNT_PER_TASK = 3; + void destroy(); + +private: + bool is_inited_; + int64_t idx_; + Type type_; + common::ObSEArray replay_item_range_arr_; + int64_t tablet_cnt_; + share::ObTenantBase *tenant_base_; + ObTabletReplayCreateHandler *handler_; +}; + + +//============================= ObTabletReplayCreateHandler ==============================// + +class ObTabletReplayCreateHandler +{ +public: + ObTabletReplayCreateHandler(); + ~ObTabletReplayCreateHandler() {} + + int init(const common::hash::ObHashMap &tablet_item_map); + int concurrent_replay(); + + int replay_discrete_tablets(const ObIArray &range_arr); + int replay_aggregate_tablets(const ObIArray &range_arr); + + void inc_inflight_task_cnt() { ATOMIC_INC(&inflight_task_cnt_); } + void dec_inflight_task_cnt() { ATOMIC_DEC(&inflight_task_cnt_); } + void inc_finished_tablet_cnt(const int64_t cnt) { (void)ATOMIC_FAA(&finished_tablet_cnt_, cnt); } + void set_errcode(const int errcode) { ATOMIC_STORE(&errcode_, errcode); }; + +private: + static bool is_suitable_to_aggregate_(const int64_t tablet_cnt_in_block, const int64_t valid_size_in_block) + { + return tablet_cnt_in_block > AGGREGATE_CNT_THRESHOLD && valid_size_in_block > AGGREGATE_SIZE_THRESHOLD; + } + int add_item_range_to_task_(const ObTabletReplayCreateTask::Type type, + const ObTabletReplayItemRange &range, ObTabletReplayCreateTask *&task); + int add_task_(ObTabletReplayCreateTask *task); + int get_tablet_svr_(const share::ObLSID &ls_id, ObLSTabletService *&ls_tablet_svr, ObLSHandle &ls_handle); + int record_ls_transfer_info_( + const ObLSHandle &ls_handle, + const ObTabletID &tablet_id, + const ObTabletTransferInfo &tablet_transfer_info); + int check_is_need_record_transfer_info_( + const share::ObLSID &src_ls_id, + const share::SCN &transfer_start_scn, + bool &is_need); + + +private: + static const int64_t AGGREGATE_CNT_THRESHOLD = 16; + static const int64_t AGGREGATE_SIZE_THRESHOLD = 256 << 10; // 256K + + bool is_inited_; + ObArenaAllocator allocator_; + int64_t task_idx_; + int64_t inflight_task_cnt_; + int64_t finished_tablet_cnt_; + int errcode_; + ObTabletReplayItem *total_tablet_item_arr_; + int64_t total_tablet_cnt_; + ObTabletReplayCreateTask *aggrgate_task_; + ObTabletReplayCreateTask *discrete_task_; +}; + + +} // namespace storage +} // namespace oceanbase + +#endif \ No newline at end of file diff --git a/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.cpp b/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.cpp index c6b68bb868..cf15226f2a 100644 --- a/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.cpp +++ b/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.cpp @@ -16,6 +16,7 @@ #include "storage/slog_ckpt/ob_tenant_storage_checkpoint_reader.h" #include "storage/slog_ckpt/ob_tenant_storage_checkpoint_writer.h" #include "storage/slog_ckpt/ob_server_checkpoint_slog_handler.h" +#include "storage/slog_ckpt/ob_tablet_replay_create_handler.h" #include "storage/meta_mem/ob_meta_obj_struct.h" #include "storage/ob_super_block_struct.h" #include "storage/slog/ob_storage_log_reader.h" @@ -156,83 +157,12 @@ void ObTenantCheckpointSlogHandler::ObWriteCheckpointTask::runTimerTask() } } -int ObTenantCheckpointSlogHandler::ObReplayCreateTabletTask::init( - const int64_t task_idx, ObTenantBase *tenant_base, ObTenantCheckpointSlogHandler *handler) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(is_inited_)) { - ret = OB_INIT_TWICE; - LOG_WARN("task has been inited", K(ret), KPC(this)); - } else { - idx_ = task_idx; - tenant_base_ = tenant_base; - tablet_addr_arr_.reset(); - tnt_ckpt_slog_handler_ = handler; - handler->inc_inflight_replay_tablet_task_cnt(); - is_inited_ = true; - } - return ret; -} - -void ObTenantCheckpointSlogHandler::ObReplayCreateTabletTask::destroy() -{ - if (IS_INIT) { - tnt_ckpt_slog_handler_->dec_inflight_replay_tablet_task_cnt(); - idx_ = -1; - tenant_base_ = nullptr; - tnt_ckpt_slog_handler_ = nullptr; - tablet_addr_arr_.reset(); - is_inited_ = false; - } -} -int ObTenantCheckpointSlogHandler::ObReplayCreateTabletTask::execute() -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("task not init", K(ret), KPC(this)); - } else { - ObTenantSwitchGuard guard(tenant_base_); - if (OB_UNLIKELY(MTL(ObTenantCheckpointSlogHandler*) != tnt_ckpt_slog_handler_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected ObTenantCheckpointSlogHandler", K(ret), KPC(this)); - } else if (OB_FAIL(tnt_ckpt_slog_handler_->replay_create_tablets_per_task(tablet_addr_arr_))) { - LOG_WARN("fail to execute replay_create_tablets_per_task", K(ret), KPC(this)); - } else { - FLOG_INFO("successfully execute replay create tablet task", KPC(this)); - } - } - if (OB_FAIL(ret)) { - tnt_ckpt_slog_handler_->set_replay_create_tablet_errcode(ret); - } - return ret; -} - -int ObTenantCheckpointSlogHandler::ObReplayCreateTabletTask::add_tablet_addr( - const ObTabletMapKey &tablet_key, const ObMetaDiskAddr &tablet_addr, bool &is_enough) -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("task not init", K(ret)); - } else if (OB_FAIL(tablet_addr_arr_.push_back(std::make_pair(tablet_key, tablet_addr)))) { - LOG_WARN("fail to push_back", K(ret), K(*this)); - } else if (tablet_addr_arr_.count() >= TABLET_NUM_PER_TASK) { - is_enough = true; - } else { - is_enough = false; - } - return ret; -} ObTenantCheckpointSlogHandler::ObTenantCheckpointSlogHandler() : is_inited_(false), is_writing_checkpoint_(false), last_ckpt_time_(0), last_frozen_version_(0), - inflight_replay_tablet_task_cnt_(0), - finished_replay_tablet_cnt_(0), - replay_create_tablet_errcode_(OB_SUCCESS), lock_(common::ObLatchIds::SLOG_CKPT_LOCK), slog_ckpt_lock_(common::ObLatchIds::SLOG_CKPT_LOCK), tablet_key_set_(), @@ -243,7 +173,8 @@ ObTenantCheckpointSlogHandler::ObTenantCheckpointSlogHandler() tg_id_(-1), write_ckpt_task_(this), replay_tablet_disk_addr_map_(), - shared_block_rwriter_() + shared_block_rwriter_(), + shared_block_raw_rwriter_() { } @@ -268,6 +199,8 @@ int ObTenantCheckpointSlogHandler::init() } else if (OB_FAIL(TG_CREATE_TENANT(lib::TGDefIDs::WriteCkpt, tg_id_))) { LOG_WARN("fail to tg create tenant", K(ret)); } else if (OB_FAIL(shared_block_rwriter_.init())) { + LOG_WARN("fail to init shared block reader ", K(ret)); + } else if (OB_FAIL(shared_block_raw_rwriter_.init())) { LOG_WARN("fail to init linked block manager", K(ret)); } else { is_inited_ = true; @@ -330,6 +263,7 @@ void ObTenantCheckpointSlogHandler::destroy() tg_id_ = -1; replay_tablet_disk_addr_map_.destroy(); shared_block_rwriter_.reset(); + shared_block_raw_rwriter_.reset(); tablet_key_set_.destroy(); ckpt_cursor_.reset(); is_copying_tablets_ = false; @@ -594,7 +528,7 @@ int ObTenantCheckpointSlogHandler::read_from_share_blk( ObSharedBlockReadInfo read_info; read_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_DATA_READ); read_info.addr_ = addr; - if (OB_FAIL(shared_block_rwriter_.async_read(read_info, read_handle))) { + if (OB_FAIL(ObSharedBlockReaderWriter::async_read(read_info, read_handle))) { LOG_WARN("fail to read tablet from macro block", K(ret), K(read_info)); } else if (OB_FAIL(read_handle.wait())) { LOG_WARN("fail to wait for read handle", K(ret)); @@ -613,7 +547,7 @@ int ObTenantCheckpointSlogHandler::read_from_disk( int ret = OB_SUCCESS; char *read_buf = nullptr; const int64_t read_buf_len = addr.size(); - const ObTenantSuperBlock super_block = static_cast(share::ObTenantEnv::get_tenant())->get_super_block(); + const ObTenantSuperBlock super_block = static_cast(MTL_CTX())->get_super_block(); if (OB_UNLIKELY(!super_block.is_valid())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("super block is invalid", K(ret), K(super_block)); @@ -636,225 +570,14 @@ int ObTenantCheckpointSlogHandler::read_from_disk( return ret; } -int ObTenantCheckpointSlogHandler::check_is_need_record_transfer_info( - const share::ObLSID &src_ls_id, - const share::SCN &transfer_start_scn, - bool &is_need) -{ - int ret = OB_SUCCESS; - ObLSService* ls_srv = nullptr; - ObLSHandle src_ls_handle; - ObLS *src_ls = NULL; - is_need = false; - if (!src_ls_id.is_valid() || !transfer_start_scn.is_valid()) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("src_ls_id or transfer_start_scn is invalid", K(ret), K(src_ls_id), K(transfer_start_scn)); - } else if (OB_ISNULL(ls_srv = MTL(ObLSService*))) { - ret = OB_ERR_UNEXPECTED; - LOG_ERROR("ls srv should not be NULL", K(ret), KP(ls_srv)); - } else if (OB_FAIL(ls_srv->get_ls(src_ls_id, src_ls_handle, ObLSGetMod::STORAGE_MOD))) { - if (OB_LS_NOT_EXIST == ret) { - is_need = false; - LOG_WARN("source ls is not exist", KR(ret), K(src_ls_id)); - ret = OB_SUCCESS; - } else { - LOG_WARN("failed to get ls", KR(ret), K(src_ls_id)); - } - } else if (OB_ISNULL(src_ls = src_ls_handle.get_ls())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("ls is NULL", KR(ret), K(src_ls_id)); - } else if (src_ls->get_ls_meta().get_clog_checkpoint_scn() < transfer_start_scn) { - is_need = true; - LOG_INFO("src ls max decided scn is smaller than transfer start scn, need wait clog replay", K(ret), - K(src_ls_id), K(transfer_start_scn), "ls_meta", src_ls->get_ls_meta()); - } - return ret; -} - -int ObTenantCheckpointSlogHandler::record_ls_transfer_info( - const ObLSHandle &ls_handle, - const ObTabletID &tablet_id, - const ObTabletTransferInfo &tablet_transfer_info) -{ - int ret = OB_SUCCESS; - storage::ObLS *ls = NULL; - bool is_need = false; - ObMigrationStatus current_migration_status = ObMigrationStatus::OB_MIGRATION_STATUS_MAX; - ObMigrationStatus new_migration_status = ObMigrationStatus::OB_MIGRATION_STATUS_MAX; - ObLSRestoreStatus ls_restore_status(ObLSRestoreStatus::LS_RESTORE_STATUS_MAX); - if (!ls_handle.is_valid() || !tablet_transfer_info.is_valid()) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(ls_handle), K(tablet_transfer_info)); - } else if (OB_ISNULL(ls = ls_handle.get_ls())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("log stream not exist", K(ret)); - } else if (OB_FAIL(ls->get_migration_status(current_migration_status))) { - LOG_WARN("failed to get ls migration status", K(ret)); - } else if (OB_FAIL(ObMigrationStatusHelper::trans_reboot_status(current_migration_status, new_migration_status))) { - LOG_WARN("failed to trans fail status", K(ret), "ls_id", ls->get_ls_id(), - K(current_migration_status), K(new_migration_status)); - } else if (ObMigrationStatus::OB_MIGRATION_STATUS_NONE != new_migration_status) { - LOG_INFO("The log stream does not need to record transfer_info", "ls_id", ls->get_ls_id(), K(current_migration_status), K(new_migration_status)); - } else if (OB_FAIL(ls->get_restore_status(ls_restore_status))) { - LOG_WARN("failed to get ls restore status", K(ret), KPC(ls)); - } else if (ls_restore_status.is_in_restore_and_before_quick_restore()) { - LOG_INFO("the log stream in restore and before quick restore, no need to record transfer info", "ls_id", ls->get_ls_id(), K(ls_restore_status)); - } else if (!tablet_transfer_info.has_transfer_table()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("tablet should have transfer table", K(ret), "ls_id", ls->get_ls_id(), K(tablet_id), K(tablet_transfer_info)); - } else if (ls->get_ls_startup_transfer_info().is_valid()) { - if (ls->get_ls_startup_transfer_info().ls_id_ != tablet_transfer_info.ls_id_ - || ls->get_ls_startup_transfer_info().transfer_start_scn_ != tablet_transfer_info.transfer_start_scn_) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("The transfer_info of different tablet records on the same ls is different", K(ret), "ls_id", ls->get_ls_id(), - K(tablet_id), K(tablet_transfer_info), "ls_startup_transfer_info", ls->get_ls_startup_transfer_info()); - } - } else if (OB_FAIL(check_is_need_record_transfer_info(tablet_transfer_info.ls_id_, - tablet_transfer_info.transfer_start_scn_, is_need))) { - LOG_WARN("failed to check is need record ls", K(ret), "ls_id", ls->get_ls_id(), K(tablet_id), K(tablet_transfer_info)); - } else if (!is_need) { - // do nothing - } else if (OB_FAIL(ls->get_ls_startup_transfer_info().init(tablet_transfer_info.ls_id_, - tablet_transfer_info.transfer_start_scn_))) { - LOG_WARN("failed to init ls transfer info", K(ret), "ls_id", ls->get_ls_id(), K(tablet_id), K(tablet_transfer_info)); - } - return ret; -} int ObTenantCheckpointSlogHandler::concurrent_replay_load_tablets() { int ret = OB_SUCCESS; - const int64_t start_time = ObTimeUtility::current_time(); - const int64_t total_tablet_cnt = replay_tablet_disk_addr_map_.size(); - int64_t cost_time_us = 0; - ReplayTabletDiskAddrMap::iterator iter = replay_tablet_disk_addr_map_.begin(); - ObReplayCreateTabletTask *task = nullptr; - int64_t task_idx = 0; - while (OB_SUCC(ret) && iter != replay_tablet_disk_addr_map_.end()) { - if (nullptr == task) { - if (OB_ISNULL(task = reinterpret_cast( - SERVER_STARTUP_TASK_HANDLER.get_task_allocator().alloc(sizeof(ObReplayCreateTabletTask))))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to alloc task buf", K(ret)); - } else if (FALSE_IT(task = new(task) ObReplayCreateTabletTask())) { - } else if (OB_FAIL(task->init(task_idx++, share::ObTenantEnv::get_tenant(), this))) { - LOG_WARN("fail to init ObReplayCreateTabletTask", K(ret), KPC(task)); - } - } - if (OB_SUCC(ret)) { - bool is_enough = false; - if (OB_FAIL(task->add_tablet_addr(iter->first, iter->second, is_enough))) { - LOG_WARN("fail to add tablet", K(ret), K(iter->first), K(iter->second), KPC(task)); - } else if (is_enough) { // tablet count of this task is enough and will create a new task at next round - if (OB_FAIL(add_replay_create_tablet_task(task))) { - LOG_WARN("fail to add replay tablet task", K(ret), KPC(task), K(inflight_replay_tablet_task_cnt_)); - } else { - task = nullptr; - ++iter; - } - } else { - ++iter; - } - } - - if (OB_FAIL(ret) && OB_NOT_NULL(task)) { - task->~ObReplayCreateTabletTask(); - SERVER_STARTUP_TASK_HANDLER.get_task_allocator().free(task); - task = nullptr; - } - } - - if (OB_SUCC(ret)) { // handle the last task - if (OB_NOT_NULL(task) && OB_FAIL(add_replay_create_tablet_task(task))) { - LOG_WARN("fail to add last replay tablet task", K(ret), KPC(task), K(inflight_replay_tablet_task_cnt_)); - task->~ObReplayCreateTabletTask(); - SERVER_STARTUP_TASK_HANDLER.get_task_allocator().free(task); - task = nullptr; - } - } - // waiting all task finish even if failure has occurred - while (ATOMIC_LOAD(&inflight_replay_tablet_task_cnt_) != 0) { - LOG_INFO("waiting replay create tablet task finish", K(inflight_replay_tablet_task_cnt_)); - ob_usleep(10 * 1000); // 10ms - } - if (OB_SUCC(ret)) { - if (OB_FAIL(ATOMIC_LOAD(&replay_create_tablet_errcode_))) { - LOG_WARN("ObReplayCreateTabletTask has failed", K(ret)); - } else if (ATOMIC_LOAD(&finished_replay_tablet_cnt_) != total_tablet_cnt) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("finished replay tablet cnt mismatch", K(ret), K_(finished_replay_tablet_cnt), K(total_tablet_cnt)); - } - } - - cost_time_us = ObTimeUtility::current_time() - start_time; - FLOG_INFO("finish concurrently repaly load tablets", K(ret), K(total_tablet_cnt), K(cost_time_us)); - - return ret; -} - -int ObTenantCheckpointSlogHandler::add_replay_create_tablet_task(ObReplayCreateTabletTask *task) -{ - int ret = OB_SUCCESS; - bool need_retry = false; - FLOG_INFO("add replay tablet task", KPC(task), K(inflight_replay_tablet_task_cnt_)); - do { - need_retry = false; - if (OB_FAIL(ATOMIC_LOAD(&replay_create_tablet_errcode_))) { - LOG_WARN("ObReplayCreateTabletTask has failed", K(ret), K(inflight_replay_tablet_task_cnt_)); - } else if (OB_FAIL(SERVER_STARTUP_TASK_HANDLER.push_task(task))) { - if (OB_EAGAIN == ret) { - LOG_INFO("task queue is full, wait and retry", KPC(task), K(inflight_replay_tablet_task_cnt_)); - need_retry = true; - ob_usleep(10 * 1000); // 10ms - } else { - LOG_WARN("fail to push task", K(ret), KPC(task), K(inflight_replay_tablet_task_cnt_)); - } - } - } while(OB_FAIL(ret) && need_retry); - - return ret; -} - -int ObTenantCheckpointSlogHandler::replay_create_tablets_per_task( - const ObIArray> &tablet_addr_arr) -{ - int ret = OB_SUCCESS; - char *buf = nullptr; - int64_t buf_len = 0; - ObTabletTransferInfo tablet_transfer_info; - - for (int64_t i = 0; OB_SUCC(ret) && i < tablet_addr_arr.count(); i++) { - ObArenaAllocator allocator(common::ObMemAttr(MTL_ID(), "ReplayTablet")); - const ObTabletMapKey &key = tablet_addr_arr.at(i).first; - const ObMetaDiskAddr &addr = tablet_addr_arr.at(i).second; - ObLSTabletService *ls_tablet_svr = nullptr; - ObLSHandle ls_handle; - tablet_transfer_info.reset(); - if (OB_FAIL(ATOMIC_LOAD(&replay_create_tablet_errcode_))) { - LOG_WARN("replay create has already failed", K(ret)); - } else { - // io maybe timeout, so need retry - int64_t max_retry_time = 5; - do { - if (OB_FAIL(read_from_disk(addr, allocator, buf, buf_len))) { - LOG_WARN("fail to read from disk", K(ret), K(addr), KP(buf), K(buf_len)); - } else if (OB_FAIL(get_tablet_svr(key.ls_id_, ls_tablet_svr, ls_handle))) { - LOG_WARN("fail to get ls tablet service", K(ret)); - } else if (OB_FAIL(ls_tablet_svr->replay_create_tablet(addr, buf, buf_len, key.tablet_id_, tablet_transfer_info))) { - LOG_WARN("fail to create tablet for replay", K(ret), K(key), K(addr)); - } - } while (OB_FAIL(ret) && OB_TIMEOUT == ret && max_retry_time-- > 0); - - if (OB_SUCC(ret)) { - if (tablet_transfer_info.has_transfer_table() && - OB_FAIL(record_ls_transfer_info(ls_handle, key.tablet_id_, tablet_transfer_info))) { - LOG_WARN("fail to record_ls_transfer_info", K(ret), K(key), K(tablet_transfer_info)); - } - } - } - } - - if (OB_SUCC(ret)) { - inc_finished_replay_tablet_cnt(tablet_addr_arr.count()); + ObTabletReplayCreateHandler handler; + if (OB_FAIL(handler.init(replay_tablet_disk_addr_map_))) { + LOG_WARN("fail to init ObTabletReplayCreateHandler", K(ret)); + } else if (OB_FAIL(handler.concurrent_replay())) { + LOG_WARN("fail to concurrent replay tablets", K(ret)); } return ret; } @@ -1461,25 +1184,6 @@ int ObTenantCheckpointSlogHandler::inner_replay_empty_shell_tablet(const ObRedoM return ret; } -int ObTenantCheckpointSlogHandler::get_tablet_svr( - const ObLSID &ls_id, - ObLSTabletService *&ls_tablet_svr, - ObLSHandle &ls_handle) -{ - int ret = OB_SUCCESS; - ObLS *ls = nullptr; - if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { - LOG_WARN("fail to get ls handle", K(ret), K(ls_id)); - } else if (OB_ISNULL(ls = ls_handle.get_ls())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("ls is null", K(ret), K(ls_id)); - } else if (OB_ISNULL(ls_tablet_svr = ls->get_tablet_svr())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("tablet service is null", K(ret), K(ls_id)); - } - return ret; -} - int ObTenantCheckpointSlogHandler::parse( const int32_t cmd, const char *buf, const int64_t len, FILE *stream) { diff --git a/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.h b/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.h index 4fa8ea4443..4ad829a260 100644 --- a/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.h +++ b/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.h @@ -22,8 +22,6 @@ #include "storage/blockstore/ob_shared_block_reader_writer.h" #include "storage/ls/ob_ls_meta.h" #include "storage/tx/ob_dup_table_base.h" -#include "storage/high_availability/ob_tablet_transfer_info.h" -#include "observer/ob_server_startup_task_handler.h" namespace oceanbase { @@ -39,8 +37,6 @@ struct ObMetaDiskAddr; class ObTenantStorageCheckpointWriter; class ObRedoModuleReplayParam; class ObStorageLogger; -class ObLSTabletService; -class ObLSHandle; struct ObLSCkptMember final { @@ -90,36 +86,7 @@ public: ObTenantCheckpointSlogHandler *handler_; }; - class ObReplayCreateTabletTask : public observer::ObServerStartupTask - { - public: - ObReplayCreateTabletTask() - : is_inited_(false), - idx_(-1), - tenant_base_(nullptr), - tnt_ckpt_slog_handler_(nullptr) {} - virtual ~ObReplayCreateTabletTask() - { - destroy(); - } - int init(const int64_t task_idx, ObTenantBase *tenant_base, ObTenantCheckpointSlogHandler *handler); - int execute() override; - int add_tablet_addr(const ObTabletMapKey &tablet_key, const ObMetaDiskAddr &tablet_addr, bool &is_enough); - - VIRTUAL_TO_STRING_KV(K_(idx), KP(this), KP_(tenant_base), "tablet_count", tablet_addr_arr_.count()); - - private: - static const int64_t TABLET_NUM_PER_TASK = 200; - void destroy(); - - private: - bool is_inited_; - int64_t idx_; - ObTenantBase *tenant_base_; - ObTenantCheckpointSlogHandler *tnt_ckpt_slog_handler_; - common::ObSEArray, TABLET_NUM_PER_TASK> tablet_addr_arr_; - }; ObTenantCheckpointSlogHandler(); ~ObTenantCheckpointSlogHandler() = default; @@ -139,6 +106,7 @@ public: int get_meta_block_list(common::ObIArray &block_list); ObSharedBlockReaderWriter &get_shared_block_reader_writer() { return shared_block_rwriter_; } + ObSharedBlockReaderWriter &get_shared_block_raw_reader_writer() { return shared_block_raw_rwriter_; } // only used by MACRO common::TCRWLock &get_slog_ckpt_lock() { return slog_ckpt_lock_; } virtual int replay(const ObRedoModuleReplayParam ¶m) override; @@ -150,16 +118,9 @@ public: char *&buf, int64_t &buf_len); - void inc_inflight_replay_tablet_task_cnt() { ATOMIC_INC(&inflight_replay_tablet_task_cnt_); } - void dec_inflight_replay_tablet_task_cnt() { ATOMIC_DEC(&inflight_replay_tablet_task_cnt_); } - void inc_finished_replay_tablet_cnt(const int64_t cnt) { (void)ATOMIC_FAA(&finished_replay_tablet_cnt_, cnt); } - void set_replay_create_tablet_errcode(const int errcode) - { - ATOMIC_STORE(&replay_create_tablet_errcode_, errcode); - }; - int replay_create_tablets_per_task(const common::ObIArray> &tablet_addr_arr); - private: + int read_from_share_blk(const ObMetaDiskAddr &addr, common::ObArenaAllocator &allocator, char *&buf, int64_t &buf_len); + int concurrent_replay_load_tablets(); int get_cur_cursor(); void clean_copy_status(); virtual int parse(const int32_t cmd, const char *buf, const int64_t len, FILE *stream) override; @@ -176,7 +137,6 @@ private: const bool is_replay_old, ObTenantStorageCheckpointWriter &ckpt_writer); int replay_dup_table_ls_meta(const transaction::ObDupTableLSCheckpoint::ObLSDupTableMeta &dup_ls_meta); int replay_tenant_slog(const common::ObLogCursor &start_point); - int concurrent_replay_load_tablets(); int inner_replay_update_ls_slog(const ObRedoModuleReplayParam ¶m); int inner_replay_create_ls_slog(const ObRedoModuleReplayParam ¶m); int inner_replay_create_ls_commit_slog(const ObRedoModuleReplayParam ¶m); @@ -189,9 +149,7 @@ private: int inner_replay_gts_record(const ObRedoModuleReplayParam ¶m); int inner_replay_gti_record(const ObRedoModuleReplayParam ¶m); int inner_replay_das_record(const ObRedoModuleReplayParam ¶m); - int get_tablet_svr(const share::ObLSID &ls_id, ObLSTabletService *&ls_tablet_svr, ObLSHandle &ls_handle); int read_from_disk_addr(const ObMetaDiskAddr &phy_addr, char *buf, const int64_t buf_len, char *&r_buf, int64_t &r_len); - int read_from_share_blk(const ObMetaDiskAddr &addr, common::ObArenaAllocator &allocator, char *&buf, int64_t &buf_len); int read_from_ckpt(const ObMetaDiskAddr &phy_addr, char *buf, const int64_t buf_len, int64_t &r_len); int read_from_slog(const ObMetaDiskAddr &phy_addr, char *buf, const int64_t buf_len, int64_t &pos); int read_empty_shell_file(const ObMetaDiskAddr &phy_addr, common::ObArenaAllocator &allocator, char *&buf, int64_t &buf_len); @@ -206,15 +164,6 @@ private: const char *buf, const int64_t buf_len, bool allow_override /* allow to overwrite the map's element or not */); - int record_ls_transfer_info( - const ObLSHandle &ls_handle, - const ObTabletID &tablet_id, - const ObTabletTransferInfo &tablet_transfer_info); - int check_is_need_record_transfer_info( - const share::ObLSID &src_ls_id, - const share::SCN &transfer_start_scn, - bool &is_need); - int add_replay_create_tablet_task(ObReplayCreateTabletTask *task); private: const static int64_t BUCKET_NUM = 109; @@ -224,9 +173,6 @@ private: bool is_writing_checkpoint_; int64_t last_ckpt_time_; int64_t last_frozen_version_; - int64_t inflight_replay_tablet_task_cnt_; - int64_t finished_replay_tablet_cnt_; - int replay_create_tablet_errcode_; common::TCRWLock lock_; // protect block_handle common::TCRWLock slog_ckpt_lock_; // protect is_copying_tablets_ common::hash::ObHashSet tablet_key_set_; @@ -239,6 +185,7 @@ private: ObWriteCheckpointTask write_ckpt_task_; ReplayTabletDiskAddrMap replay_tablet_disk_addr_map_; ObSharedBlockReaderWriter shared_block_rwriter_; + ObSharedBlockReaderWriter shared_block_raw_rwriter_; }; } // end namespace storage diff --git a/src/storage/slog_ckpt/ob_tenant_storage_checkpoint_writer.cpp b/src/storage/slog_ckpt/ob_tenant_storage_checkpoint_writer.cpp index ff9b7f56c2..78b5df901c 100644 --- a/src/storage/slog_ckpt/ob_tenant_storage_checkpoint_writer.cpp +++ b/src/storage/slog_ckpt/ob_tenant_storage_checkpoint_writer.cpp @@ -417,7 +417,6 @@ int ObTenantStorageCheckpointWriter::do_rollback(const ObMetaDiskAddr &load_addr int64_t pos = 0; read_info.addr_ = load_addr; read_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_DATA_READ); - do { allocator.reuse(); ObSharedBlockReadHandle block_handle(allocator); @@ -453,8 +452,13 @@ int ObTenantStorageCheckpointWriter::get_tablet_with_addr( char *buf = nullptr; int64_t pos = 0; read_info.addr_ = addr_info.new_addr_; + // only need load first-level meta + if (addr_info.new_addr_.is_raw_block()) { + if (addr_info.new_addr_.size() > ObTabletCommon::MAX_TABLET_FIRST_LEVEL_META_SIZE) { + read_info.addr_.set_size(ObTabletCommon::MAX_TABLET_FIRST_LEVEL_META_SIZE); + } + } read_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_DATA_READ); - do { ObArenaAllocator allocator("SlogCkptWriter", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); ObSharedBlockReadHandle block_handle(allocator); diff --git a/src/storage/tablet/ob_tablet.cpp b/src/storage/tablet/ob_tablet.cpp index bfbe213da3..f924cd492f 100644 --- a/src/storage/tablet/ob_tablet.cpp +++ b/src/storage/tablet/ob_tablet.cpp @@ -77,7 +77,10 @@ #include "storage/ob_storage_schema_util.h" #include "storage/compaction/ob_medium_list_checker.h" #include "storage/memtable/ob_row_conflict_handler.h" +#include "storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.h" #include "storage/tablet/ob_tablet_binding_info.h" +#include "storage/tablet/ob_tablet_persister.h" +#include "src/storage/slog_ckpt/ob_linked_macro_block_writer.h" namespace oceanbase { @@ -120,7 +123,6 @@ namespace storage } \ } while (false) \ - ObTableStoreCache::ObTableStoreCache() : last_major_snapshot_version_(0), major_table_cnt_(0), @@ -205,7 +207,7 @@ int ObTableStoreCache::init( ObTablet::ObTablet() - : version_(TABLET_VERSION_V2), + : version_(ObTabletBlockHeader::TABLET_VERSION_V3), length_(0), wash_score_(INT64_MIN), mds_data_(), @@ -215,6 +217,7 @@ ObTablet::ObTablet() rowkey_read_info_(nullptr), table_store_addr_(), storage_schema_addr_(), + macro_info_addr_(), memtable_count_(0), ddl_kvs_(nullptr), ddl_kv_count_(0), @@ -233,7 +236,7 @@ ObTablet::ObTablet() table_store_cache_() { #if defined(__x86_64__) && !defined(ENABLE_OBJ_LEAK_CHECK) - static_assert(sizeof(ObTablet) + sizeof(ObRowkeyReadInfo) == 1616, "The size of ObTablet will affect the meta memory manager, and the necessity of adding new fields needs to be considered."); + static_assert(sizeof(ObTablet) + sizeof(ObRowkeyReadInfo) == 1696, "The size of ObTablet will affect the meta memory manager, and the necessity of adding new fields needs to be considered."); #endif MEMSET(memtables_, 0x0, sizeof(memtables_)); } @@ -251,6 +254,7 @@ void ObTablet::reset() reset_ddl_memtables(); storage_schema_addr_.reset(); table_store_addr_.reset(); + macro_info_addr_.reset(); wash_score_ = INT64_MIN; tablet_meta_.reset(); table_store_cache_.reset(); @@ -268,7 +272,7 @@ void ObTablet::reset() ddl_data_cache_.reset(); next_tablet_guard_.reset(); // allocator_ = nullptr; can't reset allocator_ which would be used when gc tablet - version_ = TABLET_VERSION_V2; + version_ = ObTabletBlockHeader::TABLET_VERSION_V3; length_ = 0; next_tablet_ = nullptr; hold_ref_cnt_ = false; @@ -298,6 +302,7 @@ int ObTablet::init_for_first_time_creation( } ObITable **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; + ObLinkedMacroBlockItemWriter linked_writer; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; @@ -358,7 +363,9 @@ int ObTablet::init_for_first_time_creation( LOG_WARN("failed to init mds data", K(ret)); } else if (is_ls_inner_tablet() && OB_FAIL(mds_data_.init_with_tablet_status(allocator, ObTabletStatus::NORMAL, ObTabletMdsUserDataType::CREATE_TABLET))) { LOG_WARN("failed to init mds data for ls inner tablet", K(ret)); - } else if (FALSE_IT(set_mem_addr())) { + } else if (OB_FAIL(init_aggregated_info(allocator, linked_writer))) { + LOG_WARN("fail to init aggregated info", K(ret)); + } else if (FALSE_IT(set_initial_addr())) { } else if (OB_FAIL(inner_inc_macro_ref_cnt())) { LOG_WARN("failed to increase macro ref cnt", K(ret)); } else { @@ -390,6 +397,7 @@ int ObTablet::init_for_merge( int64_t finish_medium_scn = 0; ObITable **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; + ObLinkedMacroBlockItemWriter linked_writer; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; @@ -460,7 +468,9 @@ int ObTablet::init_for_merge( LOG_WARN("failed to check medium list", K(ret), K(param), K(old_tablet)); } else if (OB_FAIL(check_sstable_column_checksum())) { LOG_WARN("failed to check sstable column checksum", K(ret), KPC(this)); - } else if (FALSE_IT(set_mem_addr())) { + } else if (OB_FAIL(init_aggregated_info(allocator, linked_writer))) { + LOG_WARN("fail to init aggregated info", K(ret)); + } else if (FALSE_IT(set_initial_addr())) { } else if (OB_FAIL(inner_inc_macro_ref_cnt())) { LOG_WARN("failed to increase macro ref cnt", K(ret)); } else if (OB_UNLIKELY(old_tablet.is_row_store() != param.storage_schema_->is_row_store())) { @@ -509,6 +519,7 @@ int ObTablet::init_for_mds_table_dump( int64_t finish_medium_scn = 0; ObITable **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; + ObLinkedMacroBlockItemWriter linked_writer; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; @@ -552,7 +563,9 @@ int ObTablet::init_for_mds_table_dump( LOG_WARN("failed to check medium list", K(ret), KPC(this)); } else if (CLICK_FAIL(check_sstable_column_checksum())) { LOG_WARN("failed to check sstable column checksum", K(ret), KPC(this)); - } else if (FALSE_IT(set_mem_addr())) { + } else if (OB_FAIL(init_aggregated_info(allocator, linked_writer))) { + LOG_WARN("fail to init aggregated info", K(ret)); + } else if (FALSE_IT(set_initial_addr())) { } else if (CLICK_FAIL(inner_inc_macro_ref_cnt())) { LOG_WARN("failed to increase macro ref cnt", K(ret)); } else { @@ -579,6 +592,7 @@ int ObTablet::init_with_migrate_param( allocator_ = &allocator; ObITable **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; + ObLinkedMacroBlockItemWriter linked_writer; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; @@ -618,6 +632,7 @@ int ObTablet::init_with_migrate_param( } else { table_store_addr_.addr_.set_none_addr(); storage_schema_addr_.addr_.set_none_addr(); + macro_info_addr_.addr_.set_none_addr(); is_inited_ = true; LOG_INFO("succeeded to init empty shell tablet", K(ret), K(param), KPC(this)); } @@ -644,7 +659,9 @@ int ObTablet::init_with_migrate_param( LOG_WARN("failed to check medium list", K(ret), K(param)); } else if (OB_FAIL(check_sstable_column_checksum())) { LOG_WARN("failed to check sstable column checksum", K(ret), KPC(this)); - } else if (FALSE_IT(set_mem_addr())) { + } else if (OB_FAIL(init_aggregated_info(allocator, linked_writer))) { + LOG_WARN("fail to init aggregated info", K(ret)); + } else if (FALSE_IT(set_initial_addr())) { } else if (OB_FAIL(inner_inc_macro_ref_cnt())) { LOG_WARN("failed to increase macro ref cnt", K(ret)); } else { @@ -676,6 +693,7 @@ int ObTablet::init_for_defragment( allocator_ = &allocator; ObITable **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; + ObLinkedMacroBlockItemWriter linked_writer; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; @@ -720,7 +738,9 @@ int ObTablet::init_for_defragment( LOG_WARN("failed to check medium list", K(ret), KPC(this)); } else if (OB_FAIL(check_sstable_column_checksum())) { LOG_WARN("failed to check sstable column checksum", K(ret), KPC(this)); - } else if (FALSE_IT(set_mem_addr())) { + } else if (OB_FAIL(init_aggregated_info(allocator, linked_writer))) { + LOG_WARN("fail to init aggregated info", K(ret)); + } else if (FALSE_IT(set_initial_addr())) { } else if (OB_FAIL(inner_inc_macro_ref_cnt())) { LOG_WARN("failed to increase macro ref cnt", K(ret)); } else if (OB_UNLIKELY(old_tablet.is_row_store() != old_storage_schema->is_row_store())) { @@ -786,6 +806,7 @@ int ObTablet::init_for_sstable_replace( int64_t finish_medium_scn = 0; ObITable **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; + ObLinkedMacroBlockItemWriter linked_writer; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; @@ -844,7 +865,9 @@ int ObTablet::init_for_sstable_replace( table_store_addr_.get_ptr()->get_minor_sstables(), storage_schema->is_row_store()))) { LOG_WARN("failed to init table store cache", K(ret), KPC(this)); - } else if (FALSE_IT(set_mem_addr())) { + } else if (OB_FAIL(init_aggregated_info(allocator, linked_writer))) { + LOG_WARN("fail to init aggregated info", K(ret)); + } else if (FALSE_IT(set_initial_addr())) { } else if (OB_FAIL(inner_inc_macro_ref_cnt())) { LOG_WARN("failed to increase macro ref cnt", K(ret)); } else { @@ -1043,6 +1066,7 @@ int ObTablet::init_with_update_medium_info( const ObTabletTableStore *old_table_store = nullptr; ObStorageSchema *old_storage_schema = nullptr; const ObTabletMdsData &old_mds_data = old_tablet.mds_data_; + ObLinkedMacroBlockItemWriter linked_writer; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; @@ -1082,7 +1106,9 @@ int ObTablet::init_with_update_medium_info( table_store_addr_.get_ptr()->get_minor_sstables(), old_tablet.table_store_cache_.is_row_store_))) { LOG_WARN("failed to init table store cache", K(ret), KPC(this)); - } else if (FALSE_IT(set_mem_addr())) { + } else if (OB_FAIL(init_aggregated_info(allocator, linked_writer))) { + LOG_WARN("fail to init aggregated info", K(ret)); + } else if (FALSE_IT(set_initial_addr())) { } else if (OB_FAIL(inner_inc_macro_ref_cnt())) { LOG_WARN("failed to increase macro ref cnt", K(ret)); } else { @@ -1118,6 +1144,7 @@ int ObTablet::init_with_new_snapshot_version( const ObTabletTableStore *old_table_store = nullptr; ObStorageSchema *old_storage_schema = nullptr; const ObTabletMdsData &old_mds_data = old_tablet.mds_data_; + ObLinkedMacroBlockItemWriter linked_writer; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; @@ -1156,7 +1183,9 @@ int ObTablet::init_with_new_snapshot_version( table_store_addr_.get_ptr()->get_minor_sstables(), old_tablet.table_store_cache_.is_row_store_))) { LOG_WARN("failed to init table store cache", K(ret), KPC(this)); - } else if (FALSE_IT(set_mem_addr())) { + } else if (OB_FAIL(init_aggregated_info(allocator, linked_writer))) { + LOG_WARN("fail to init aggregated info", K(ret)); + } else if (FALSE_IT(set_initial_addr())) { } else if (OB_FAIL(inner_inc_macro_ref_cnt())) { // full tablet only increases data macro blocks' ref cnt LOG_WARN("failed to increase sstables ref cnt", K(ret)); } else { @@ -1215,6 +1244,7 @@ int ObTablet::init_empty_shell( } else { table_store_addr_.addr_.set_none_addr(); storage_schema_addr_.addr_.set_none_addr(); + macro_info_addr_.addr_.set_none_addr(); tablet_meta_.clog_checkpoint_scn_ = user_data.delete_commit_scn_ > tablet_meta_.clog_checkpoint_scn_ ? user_data.delete_commit_scn_ : tablet_meta_.clog_checkpoint_scn_; tablet_meta_.mds_checkpoint_scn_ = user_data.delete_commit_scn_; @@ -1279,11 +1309,11 @@ int ObTablet::check_sstable_column_checksum() const return ret; } -int ObTablet::serialize(char *buf, const int64_t len, int64_t &pos) const +int ObTablet::serialize(char *buf, const int64_t len, int64_t &pos, const ObSArray &meta_arr) const { int ret = OB_SUCCESS; - int64_t new_pos = pos; - const int64_t length = get_self_size(); + ObTabletBlockHeader block_header; + const int64_t total_length = get_serialize_size(meta_arr); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not inited", K(ret), K_(is_inited)); @@ -1295,10 +1325,62 @@ int ObTablet::serialize(char *buf, const int64_t len, int64_t &pos) const } else if (OB_UNLIKELY(!is_valid() && !is_empty_shell())) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("tablet is invalid", K(ret), K(*this)); - } else if (TABLET_VERSION_V2 != version_) { + } else if (ObTabletBlockHeader::TABLET_VERSION_V3 != version_) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("invalid version", K(ret), K_(version)); - } else if (OB_UNLIKELY(length > len - pos)) { + } else if (OB_FAIL(block_header.init(meta_arr.count()))) { + LOG_WARN("fail to init tablet block header", K(ret), K(meta_arr)); + } else if (OB_UNLIKELY(1 < meta_arr.count())) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("shouldn't have more than one inline meta", K(meta_arr.count())); + } else { + const int64_t header_size = block_header.get_serialize_size(); + const int64_t self_size = get_self_serialize_size(); + int64_t payload_pos = pos + header_size; + int64_t header_pos = pos; + if (OB_FAIL(self_serialize(buf, len, payload_pos))) { + LOG_WARN("fail to serialize itself", K(ret), K(len), K(payload_pos), KPC(this)); + } else { + block_header.length_ = self_size; + block_header.checksum_ = ob_crc64(buf + (pos + header_size), self_size); + const ObTabletMacroInfo *macro_info = nullptr; + for (int64_t i = 0; OB_SUCC(ret) && i < meta_arr.count(); i++) { + if (OB_ISNULL(meta_arr[i].obj_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("obj is nullptr", K(ret), K(meta_arr[i].meta_type_)); + } else if (OB_UNLIKELY(ObSecondaryMetaType::TABLET_MACRO_INFO != meta_arr[i].meta_type_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("only support tablet macro info", K(ret), K(meta_arr[i].meta_type_)); + } else if (FALSE_IT(macro_info = reinterpret_cast(meta_arr[i].obj_))) { + } else if (OB_FAIL(macro_info->serialize(buf, len, payload_pos))) { + LOG_WARN("fail to serialize tablet macro info", K(ret), KPC(macro_info)); + } else if (OB_FAIL(block_header.push_inline_meta(ObInlineSecondaryMetaDesc(meta_arr[i].meta_type_, macro_info->get_serialize_size())))) { + LOG_WARN("fail to push inline meta", K(ret), K(meta_arr[i].meta_type_), KPC(macro_info)); + } + } + } + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_UNLIKELY(payload_pos - pos != total_length)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet's length doesn't match calculated length", K(ret), K(payload_pos), K(pos), K(total_length)); + } else if (OB_FAIL(block_header.serialize(buf, len, header_pos))) { + LOG_WARN("fail to serialize block header", K(ret), K(len), K(header_pos), K(block_header)); + } else if (OB_UNLIKELY(header_pos - pos != header_size)) { + LOG_WARN("block header's length doesn't match calculated length", K(ret), K(header_pos), K(pos), K(header_pos), K(block_header)); + } else { + pos = payload_pos; + } + } + return ret; +} + +int ObTablet::self_serialize(char *buf, const int64_t len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + int64_t new_pos = pos; + const int64_t length = get_self_serialize_size(); + if (OB_UNLIKELY(length > len - pos)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("buffer's length is not enough", K(ret), K(length), K(len - new_pos)); } else if (OB_FAIL(serialization::encode_i32(buf, len, new_pos, version_))) { @@ -1315,11 +1397,11 @@ int ObTablet::serialize(char *buf, const int64_t len, int64_t &pos) const LOG_WARN("fail to serialize rowkey read info", K(ret), KPC(rowkey_read_info_)); } else if (new_pos - pos < length && OB_FAIL(mds_data_.serialize(buf, len, new_pos))) { LOG_WARN("failed to serialize mds data", K(ret), K(len), K(new_pos)); + } else if (new_pos - pos < length && OB_FAIL(macro_info_addr_.addr_.serialize(buf, len, new_pos))) { + LOG_WARN("failed to serialize macro info addr", K(ret), K(len), K(new_pos), K(macro_info_addr_)); } else if (OB_UNLIKELY(length != new_pos - pos)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet's length doesn't match standard length", K(ret), K(new_pos), K(pos), K(length), KPC(this)); - } else if (tablet_meta_.has_next_tablet_ && OB_FAIL(next_tablet_guard_.get_obj()->serialize(buf, len, new_pos))) { - LOG_WARN("failed to serialize next tablet", K(ret), K(len), K(new_pos)); } else { pos = new_pos; } @@ -1334,6 +1416,7 @@ int ObTablet::rollback_ref_cnt( { int ret = OB_SUCCESS; int64_t new_pos = pos; + int32_t version = 0; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("cannot deserialize inited tablet meta", K(ret), K_(is_inited)); @@ -1343,26 +1426,23 @@ int ObTablet::rollback_ref_cnt( || OB_UNLIKELY(len <= pos)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", K(ret), K(buf), K(len), K(pos)); - } else if (OB_FAIL(serialization::decode_i32(buf, len, new_pos, (int32_t *)&version_))) { - LOG_WARN("failed to deserialize tablet meta's version", K(ret), K(len), K(new_pos)); - } else if (OB_FAIL(serialization::decode_i32(buf, len, new_pos, (int32_t *)&length_))) { - LOG_WARN("failed to deserialize tablet meta's length", K(ret), K(len), K(new_pos)); - } else if (OB_UNLIKELY(length_ > len - pos)) { + } else if (OB_FAIL(get_tablet_version(buf + pos, len - pos, version))) { + LOG_WARN("fail to get tablet version", K(ret)); + } else if (ObTabletBlockHeader::TABLET_VERSION_V1 == version || ObTabletBlockHeader::TABLET_VERSION_V2 == version) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("buffer's length is not enough", K(ret), K(length_), K(len - new_pos)); + LOG_WARN("unexpected tablet version", K(ret)); } else { do { - if (OB_FAIL(load_deserialize_v2(allocator, buf, len, pos, new_pos, false))) { - LOG_WARN("fail to load deserialize tablet v2", K(ret), K(length_), K(len - new_pos), KPC(this)); + if (ObTabletBlockHeader::TABLET_VERSION_V3 == version) { + if (OB_FAIL(load_deserialize_v3(allocator, buf, len, new_pos, false/*pull memtable*/))) { + LOG_WARN("fail to load deserialize tablet v3", K(ret), KPC(this)); + } } } while (ignore_ret(ret)); } if (OB_FAIL(ret)) { // do nothing - } else if (OB_UNLIKELY(length_ != new_pos - pos)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("tablet's length doesn't match standard length", K(ret), K(new_pos), K(pos), K_(length)); } else if (tablet_meta_.has_next_tablet_) { ObTablet next_tablet; next_tablet.set_tablet_addr(tablet_addr_); @@ -1379,6 +1459,50 @@ int ObTablet::rollback_ref_cnt( return ret; } +int ObTablet::deserialize_for_replay( + common::ObArenaAllocator &allocator, + const char *buf, + const int64_t len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + int32_t version = 0; + int64_t new_pos = pos; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("cannot deserialize inited tablet meta", K(ret), K_(is_inited)); + } else if (OB_ISNULL(buf) + || OB_UNLIKELY(len <= 0) + || OB_UNLIKELY(pos < 0) + || OB_UNLIKELY(len <= pos)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), K(buf), K(len), K(pos)); + } else if (OB_FAIL(get_tablet_version(buf + pos, len - pos, version))) { + LOG_WARN("fail to get tablet version", K(ret)); + } else if ((ObTabletBlockHeader::TABLET_VERSION_V1 == version || ObTabletBlockHeader::TABLET_VERSION_V2 == version)) { + if (OB_FAIL(deserialize(allocator, buf, len, new_pos))) { + LOG_WARN("fail to deserialize", K(ret)); + } else { + pos = new_pos; + } + } else if (ObTabletBlockHeader::TABLET_VERSION_V3 == version) { + if (OB_FAIL(load_deserialize_v3(allocator, buf, len, new_pos, false/*pull memtable*/))) { + LOG_WARN("fail to deserialize with id array", K(ret)); + } else if (OB_FAIL(inner_inc_macro_ref_cnt())) { + LOG_WARN("failed to increase macro ref cnt", K(ret)); + } else { + pos = new_pos; + is_inited_ = true; + LOG_INFO("succeed to deserialize tablet for replay", K(ret), KPC(this)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected version", K(ret)); + } + return ret; +} + +// deserialize to a full tablet int ObTablet::deserialize( common::ObArenaAllocator &allocator, const char *buf, @@ -1402,6 +1526,7 @@ int ObTablet::load_deserialize( { int ret = OB_SUCCESS; int64_t new_pos = pos; + int32_t version = 0; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("cannot deserialize inited tablet meta", K(ret), K_(is_inited)); @@ -1411,20 +1536,17 @@ int ObTablet::load_deserialize( || OB_UNLIKELY(len <= pos)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", K(ret), K(buf), K(len), K(pos)); - } else if (OB_FAIL(serialization::decode_i32(buf, len, new_pos, (int32_t *)&version_))) { - LOG_WARN("failed to deserialize tablet meta's version", K(ret), K(len), K(new_pos)); - } else if (OB_FAIL(serialization::decode_i32(buf, len, new_pos, (int32_t *)&length_))) { - LOG_WARN("failed to deserialize tablet meta's length", K(ret), K(len), K(new_pos)); - } else if (OB_UNLIKELY(length_ > len - pos)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("buffer's length is not enough", K(ret), K(length_), K(len - new_pos)); - } else if (TABLET_VERSION_V2 == version_ && OB_FAIL(load_deserialize_v2(allocator, buf, len, pos, new_pos))) { - LOG_WARN("failed to load deserialize v2", K(ret), K(length_), K(len - new_pos), KPC(this)); - } else if (TABLET_VERSION == version_ && OB_FAIL(load_deserialize_v1(allocator, buf, len, pos, new_pos))) { - LOG_WARN("failed to load deserialize v1", K(ret), K(length_), K(len - new_pos), KPC(this)); - } else if (OB_UNLIKELY(length_ != new_pos - pos)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("tablet's length doesn't match standard length", K(ret), K(new_pos), K(pos), K_(length)); + } else if (OB_FAIL(get_tablet_version(buf + pos, len - pos, version))) { + LOG_WARN("fail to get tablet version", K(ret)); + } else if (ObTabletBlockHeader::TABLET_VERSION_V1 == version && + OB_FAIL(load_deserialize_v1(allocator, buf, len, new_pos))) { + LOG_WARN("failed to load deserialize v1", K(ret), KPC(this)); + } else if (ObTabletBlockHeader::TABLET_VERSION_V2 == version && + OB_FAIL(load_deserialize_v2(allocator, buf, len, new_pos, true/*prepare_memtable*/))) { + LOG_WARN("failed to load deserialize v2", K(ret), K(pos), KPC(this)); + } else if (ObTabletBlockHeader::TABLET_VERSION_V3 == version && + OB_FAIL(load_deserialize_v3(allocator, buf, len, new_pos, true/*prepare_memtable*/))) { + LOG_WARN("failed to load deserialize v3", K(ret), K(pos), KPC(this)); } else if (tablet_meta_.has_next_tablet_) { const ObTabletMapKey key(tablet_meta_.ls_id_, tablet_meta_.tablet_id_); if (OB_FAIL(ObTabletCreateDeleteHelper::acquire_tmp_tablet(key, allocator, next_tablet_guard_))) { @@ -1437,6 +1559,7 @@ int ObTablet::load_deserialize( LOG_WARN("failed to deserialize next tablet", K(ret), K(len), K(new_pos)); } } + if (OB_SUCC(ret)) { pos = new_pos; } else if (OB_UNLIKELY(!is_inited_)) { @@ -1448,10 +1571,11 @@ int ObTablet::load_deserialize( int ObTablet::deserialize_post_work(common::ObArenaAllocator &allocator) { int ret = OB_SUCCESS; + ObLinkedMacroBlockItemWriter linked_writer; // it needs to hold ref cnt until inner_inc_macro_ref_cnt is called if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("cannot deserialize inited tablet meta", K(ret), K_(is_inited)); - } else if (TABLET_VERSION_V2 == version_) { + } else if (ObTabletBlockHeader::TABLET_VERSION_V2 == version_ || ObTabletBlockHeader::TABLET_VERSION_V3 == version_) { if (!table_store_addr_.addr_.is_none()) { IO_AND_DESERIALIZE(allocator, table_store_addr_.addr_, table_store_addr_.ptr_, *this); if (FAILEDx(table_store_addr_.ptr_->batch_cache_sstable_meta(allocator, INT64_MAX))) {// cache all @@ -1487,7 +1611,6 @@ int ObTablet::deserialize_post_work(common::ObArenaAllocator &allocator) } } if (OB_SUCC(ret)) { - version_ = TABLET_VERSION_V2; is_inited_ = true; LOG_INFO("succeed to load deserialize tablet", K(ret), KPC(this)); } @@ -1502,8 +1625,7 @@ int ObTablet::load_deserialize_v1( common::ObArenaAllocator &allocator, const char *buf, const int64_t len, - const int64_t pos, - int64_t &new_pos) + int64_t &pos) { int ret = OB_SUCCESS; ObTabletAutoincSeq auto_inc_seq; @@ -1512,8 +1634,14 @@ int ObTablet::load_deserialize_v1( ObMediumCompactionInfoList info_list; ObITable **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; + int64_t new_pos = pos; + macro_info_addr_.addr_.set_none_addr(); - if (OB_FAIL(ObTabletObjLoadHelper::alloc_and_new(allocator, table_store_addr_.ptr_))) { + if (OB_FAIL(serialization::decode_i32(buf, len, new_pos, (int32_t *)&version_))) { + LOG_WARN("failed to deserialize tablet meta's version", K(ret), K(len), K(new_pos)); + } else if (OB_FAIL(serialization::decode_i32(buf, len, new_pos, (int32_t *)&length_))) { + LOG_WARN("failed to deserialize tablet meta's length", K(ret), K(len), K(new_pos)); + } else if (OB_FAIL(ObTabletObjLoadHelper::alloc_and_new(allocator, table_store_addr_.ptr_))) { LOG_WARN("fail to allocate and new table store", K(ret)); } else if (OB_FAIL(ObTabletObjLoadHelper::alloc_and_new(allocator, storage_schema_addr_.ptr_))) { LOG_WARN("fail to allocate and new storage schema", K(ret)); @@ -1545,7 +1673,7 @@ int ObTablet::load_deserialize_v1( if (FAILEDx(build_read_info(allocator))) { LOG_WARN("failed to build read info", K(ret)); } else if (OB_FAIL(pull_memtables(allocator, ddl_kvs_addr, ddl_kv_count))) { - LOG_WARN("fail to pull memtable", K(ret), K(len), K(new_pos)); + LOG_WARN("fail to pull memtable", K(ret), K(len), K(pos)); } else if (OB_FAIL(table_store_cache_.init(table_store_addr_.ptr_->get_major_sstables(), table_store_addr_.ptr_->get_minor_sstables(), true /*is_row_store*/))) { @@ -1553,8 +1681,9 @@ int ObTablet::load_deserialize_v1( } else { ddl_kvs_ = ddl_kvs_addr; ddl_kv_count_ = ddl_kv_count; - set_mem_addr(); + set_initial_addr(); mds_data_.set_mem_addr(); + pos = new_pos; } return ret; } @@ -1660,14 +1789,19 @@ int ObTablet::load_deserialize_v2( common::ObArenaAllocator &allocator, const char *buf, const int64_t len, - const int64_t pos, - int64_t &new_pos, + int64_t &pos, const bool prepare_memtable) { int ret = OB_SUCCESS; ObITable **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; - if (new_pos - pos < length_ && OB_FAIL(tablet_meta_.deserialize(buf, len, new_pos))) { + int64_t new_pos = pos; + macro_info_addr_.addr_.set_none_addr(); + if (OB_FAIL(serialization::decode_i32(buf, len, new_pos, (int32_t *)&version_))) { + LOG_WARN("failed to deserialize tablet meta's version", K(ret), K(len), K(new_pos)); + } else if (OB_FAIL(serialization::decode_i32(buf, len, new_pos, (int32_t *)&length_))) { + LOG_WARN("failed to deserialize tablet meta's length", K(ret), K(len), K(new_pos)); + } else if (new_pos - pos < length_ && OB_FAIL(tablet_meta_.deserialize(buf, len, new_pos))) { LOG_WARN("failed to deserialize tablet meta", K(ret), K(len), K(new_pos)); } else if (new_pos - pos < length_ && OB_FAIL(table_store_addr_.addr_.deserialize(buf, len, new_pos))) { LOG_WARN("failed to deserialize table store addr", K(ret), K(len), K(new_pos)); @@ -1681,10 +1815,75 @@ int ObTablet::load_deserialize_v2( } else if (new_pos - pos < length_ && OB_FAIL(mds_data_.deserialize(buf, len, new_pos))) { LOG_WARN("failed to deserialize mds data", K(ret), K(len), K(new_pos)); } else if (prepare_memtable && OB_FAIL(pull_memtables(allocator, ddl_kvs_addr, ddl_kv_count))) { - LOG_WARN("fail to pull memtable", K(ret), K(len), K(new_pos)); - } else { + LOG_WARN("fail to pull memtable", K(ret), K(len), K(pos)); + } + + if (OB_SUCC(ret)) { ddl_kvs_ = ddl_kvs_addr; ddl_kv_count_ = ddl_kv_count; + pos = new_pos; + } + + return ret; +} + +// tablet_V3 = OBTabletBlockHeader + tablet_V2 + inline_meta +int ObTablet::load_deserialize_v3( + common::ObArenaAllocator &allocator, + const char *buf, + const int64_t len, + int64_t &pos, + const bool prepare_memtable) +{ + int ret = OB_SUCCESS; + ObITable **ddl_kvs_addr = nullptr; + int64_t ddl_kv_count = 0; + ObTabletBlockHeader header; + int64_t new_pos = 0; + int32_t crc = 0; + + if (OB_FAIL(header.deserialize(buf, len, pos))) { + LOG_WARN("fail to deserialize tablet block header", K(ret)); + } else if (FALSE_IT(new_pos = pos)) { + } else if (FALSE_IT(crc = ob_crc64(buf + new_pos, header.length_))) { + } else if (OB_UNLIKELY(header.checksum_ != crc)) { + ret = OB_CHECKSUM_ERROR; + LOG_WARN("tablet's checksum doesn't match", K(ret), K(header), K(crc)); + } else if (OB_UNLIKELY(1 < header.inline_meta_count_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("shouldn't have more than one inline meta", K(ret), K(header)); + } else if (OB_FAIL(load_deserialize_v2(allocator, buf, len, new_pos, prepare_memtable))) { + LOG_WARN("fail to load_deserialize_v2", K(ret)); + } else if (new_pos - pos < length_ && OB_FAIL(macro_info_addr_.addr_.deserialize(buf, len, new_pos))) { + LOG_WARN("fail to deserialize macro info addr", K(ret), K(len), K(new_pos)); + } else if (OB_UNLIKELY(new_pos - pos >= len && 0 != header.inline_meta_count_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("buf is not enough to deserialize macro info", K(ret), K(new_pos), K(pos), K(length_)); + } else if (1 == header.inline_meta_count_) { + int64_t offset = 0; + int64_t size = 0; + MacroBlockId macro_id; + int64_t secondary_meta_size = header.desc_array_[0].length_; + if (OB_UNLIKELY(ObSecondaryMetaType::TABLET_MACRO_INFO != header.desc_array_[0].type_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("only support tablet macro info", K(ret), K(header.desc_array_[0])); + } else if (OB_FAIL(deserialize_macro_info(allocator, buf, len, new_pos, macro_info_addr_.ptr_))) { + LOG_WARN("fail to deserialize macro info", K(ret), K(len), K(new_pos)); + } else if (OB_UNLIKELY(!tablet_addr_.is_valid() || !tablet_addr_.is_block())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet addr is invalid", K(ret), K(tablet_addr_)); + } else if (OB_FAIL(tablet_addr_.get_block_addr(macro_id, offset, size))) { + LOG_WARN("fail to get block addr", K(ret), K(tablet_addr_)); + } else if (OB_FAIL(macro_info_addr_.addr_.set_block_addr( + macro_id, + offset + (size - secondary_meta_size), + secondary_meta_size, + ObMetaDiskAddr::DiskType::RAW_BLOCK))) { + LOG_WARN("fail to set tablet macro info's addr", K(ret), K(tablet_addr_), K(secondary_meta_size)); + } + } + if (OB_SUCC(ret)) { + pos = new_pos; } return ret; } @@ -1695,7 +1894,6 @@ int ObTablet::deserialize( int64_t &pos) { int ret = OB_SUCCESS; - int64_t new_pos = pos; char* tablet_buf = reinterpret_cast(this); ObMetaObjBufferHeader &buf_header = ObMetaObjBufferHelper::get_buffer_header(tablet_buf); int64_t remain = buf_header.buf_len_ - sizeof(ObTablet); @@ -1703,6 +1901,10 @@ int ObTablet::deserialize( ObArenaAllocator allocator; ObITable **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; + ObTabletBlockHeader header; + int32_t version = 0; + macro_info_addr_.addr_.set_none_addr(); + if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("cannot deserialize inited tablet meta", K(ret), K_(is_inited)); @@ -1712,17 +1914,20 @@ int ObTablet::deserialize( || OB_UNLIKELY(len <= pos)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", K(ret), K(buf), K(len), K(pos)); - } else if (OB_FAIL(serialization::decode_i32(buf, len, new_pos, (int32_t *)&version_))) { - LOG_WARN("failed to deserialize tablet meta's version", K(ret), K(len), K(new_pos)); - } else if (OB_FAIL(serialization::decode_i32(buf, len, new_pos, (int32_t *)&length_))) { - LOG_WARN("failed to deserialize tablet meta's length", K(ret), K(len), K(new_pos)); - } else if (TABLET_VERSION_V2 != version_) { + } else if (OB_FAIL(get_tablet_version(buf + pos, len - pos, version))) { + LOG_WARN("fail to get tablet version", K(ret)); + } else if (ObTabletBlockHeader::TABLET_VERSION_V2 != version && ObTabletBlockHeader::TABLET_VERSION_V3 != version) { ret = OB_NOT_SUPPORTED; LOG_WARN("invalid version", K(ret), K_(version)); + } else if (ObTabletBlockHeader::TABLET_VERSION_V3 == version && OB_FAIL(header.deserialize(buf, len, pos))) { + LOG_WARN("fail to deserialize ObTabletBlockHeader for tablet v3", K(ret)); } else { - if (OB_UNLIKELY(length_ > len - pos)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("buffer's length is not enough", K(ret), K(length_), K(len - new_pos)); + int64_t new_pos = pos; + int64_t rowkey_info_copy_size = 0; + if (OB_FAIL(serialization::decode_i32(buf, len, new_pos, (int32_t *)&version_))) { + LOG_WARN("failed to deserialize tablet meta's version", K(ret), K(len), K(new_pos)); + } else if (OB_FAIL(serialization::decode_i32(buf, len, new_pos, (int32_t *)&length_))) { + LOG_WARN("failed to deserialize tablet meta's length", K(ret), K(len), K(new_pos)); } else if (new_pos - pos < length_ && OB_FAIL(tablet_meta_.deserialize(buf, len, new_pos))) { LOG_WARN("failed to deserialize tablet meta", K(ret), K(len), K(new_pos)); } else if (OB_FAIL(pull_memtables(allocator, ddl_kvs_addr, ddl_kv_count))) { @@ -1738,9 +1943,9 @@ int ObTablet::deserialize( LOG_WARN("fail to deserialize rowkey read info", K(ret), K(len), K(new_pos)); } else if (OB_FAIL(rowkey_read_info->deserialize(allocator, buf, len, new_pos))) { LOG_WARN("fail to deserialize rowkey read info", K(ret), K(len), K(new_pos)); - } else if (remain < rowkey_read_info->get_deep_copy_size()) { + } else if (remain < (rowkey_info_copy_size = rowkey_read_info->get_deep_copy_size())) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("tablet memory buffer not enough for rowkey read info", K(ret), K(remain), K(rowkey_read_info->get_deep_copy_size())); + LOG_WARN("tablet memory buffer not enough for rowkey read info", K(ret), K(remain), K(rowkey_info_copy_size)); } else if (OB_FAIL(rowkey_read_info->deep_copy( tablet_buf + start_pos, remain, rowkey_read_info_))) { LOG_WARN("fail to deep copy rowkey read info to tablet", K(ret), KPC(rowkey_read_info), K(remain)); @@ -1748,8 +1953,8 @@ int ObTablet::deserialize( ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected nullptr for rowkey read info deep copy", K(ret)); } else { - remain -= rowkey_read_info_->get_deep_copy_size(); - start_pos += rowkey_read_info_->get_deep_copy_size(); + remain -= rowkey_info_copy_size; + start_pos += rowkey_info_copy_size; } } @@ -1783,8 +1988,13 @@ int ObTablet::deserialize( if (OB_SUCC(ret)) { if (new_pos - pos < length_ && OB_FAIL(mds_data_.deserialize(buf, len, new_pos))) { LOG_WARN("failed to deserialize mds data", K(ret), K(len), K(new_pos)); + } else if (ObTabletBlockHeader::TABLET_VERSION_V3 == version + && new_pos - pos < length_ + && OB_FAIL(macro_info_addr_.addr_.deserialize(buf, len, new_pos))) { + LOG_WARN("fail to deserialize macro info addr", K(ret), K(len), K(new_pos)); } } + if (OB_SUCC(ret)) { ObTabletTableStore *table_store = nullptr; if (table_store_addr_.addr_.is_none()) { @@ -1855,6 +2065,50 @@ int ObTablet::deserialize( } } + if (OB_SUCC(ret) && ObTabletBlockHeader::TABLET_VERSION_V3 == version) { + if (OB_UNLIKELY(1 < header.inline_meta_count_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("shouldn't have more than one inline meta", K(ret), K(header)); + } else if (1 == header.inline_meta_count_) { + int64_t offset = 0; + int64_t size = 0; + MacroBlockId macro_id; + int64_t secondary_meta_size = header.desc_array_[0].length_; + if (OB_UNLIKELY(ObSecondaryMetaType::TABLET_MACRO_INFO != header.desc_array_[0].type_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("only support tablet macro info", K(ret), K(header.desc_array_[0])); + } else if (OB_UNLIKELY(!tablet_addr_.is_valid() || !tablet_addr_.is_block())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet addr is invalid", K(ret), K(tablet_addr_)); + } else if (OB_FAIL(tablet_addr_.get_block_addr(macro_id, offset, size))) { + LOG_WARN("fail to get block addr", K(ret), K(tablet_addr_)); + } else if (OB_FAIL(macro_info_addr_.addr_.set_block_addr( + macro_id, + offset + (size - secondary_meta_size), + secondary_meta_size, + ObMetaDiskAddr::DiskType::RAW_BLOCK))) { + LOG_WARN("fail to set tablet macro info's addr", K(ret), K(tablet_addr_), K(secondary_meta_size)); + } else if (new_pos + secondary_meta_size <= len) { + ObTabletMacroInfo *tablet_macro_info = nullptr; + int64_t macro_info_size = 0; + if (OB_FAIL(deserialize_macro_info(allocator, buf, len, new_pos, tablet_macro_info))) { + LOG_WARN("fail to deserialize macro info", K(ret), K(len), K(new_pos)); + } else if (FALSE_IT(macro_info_size = tablet_macro_info->get_deep_copy_size())) { + } else if (remain >= macro_info_size) { + if (OB_FAIL(tablet_macro_info->deep_copy(tablet_buf + start_pos, remain, macro_info_addr_.ptr_))) { + LOG_WARN("fail to deep copy tablet macro info", K(ret), K(start_pos), K(remain), KPC(tablet_macro_info)); + } else if (OB_ISNULL(macro_info_addr_.ptr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("macro info is nullptr", K(ret), K(macro_info_addr_)); + } else { + remain -= macro_info_size; + start_pos += macro_info_size; + } + } + } + } + } + if (OB_SUCC(ret) && tablet_meta_.has_next_tablet_) { ObTabletHandle next_tablet_handle; const ObTabletMapKey key(tablet_meta_.ls_id_, tablet_meta_.tablet_id_); @@ -1876,7 +2130,7 @@ int ObTablet::deserialize( is_inited_ = true; // must succeed if hold_ref_cnt_ has been set to true hold_ref_cnt_ = true; - LOG_INFO("succeed to load deserialize tablet", K(ret), KPC(this)); + LOG_INFO("succeed to load deserialize tablet", K(ret), KPC(this), K(header)); } } @@ -1886,7 +2140,28 @@ int ObTablet::deserialize( return ret; } -int ObTablet::get_tablet_meta_ids(ObIArray &meta_ids) const +int ObTablet::deserialize_macro_info( + common::ObArenaAllocator &allocator, + const char *buf, + const int64_t len, + int64_t &pos, + ObTabletMacroInfo *&tablet_macro_info) +{ + int ret = OB_SUCCESS; + void *macro_info_buf = nullptr; + if (OB_ISNULL(macro_info_buf = allocator.alloc(sizeof(ObTabletMacroInfo)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate buf for tablet macro info", K(ret)); + } else if (FALSE_IT(tablet_macro_info = new (macro_info_buf) ObTabletMacroInfo)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate buf for tablet macro info", K(ret)); + } else if (OB_FAIL(tablet_macro_info->deserialize(allocator, buf, len, pos))) { + LOG_WARN("fail to deserialize tablet macro info", K(ret), K(len), K(pos)); + } + return ret; +} + +int ObTablet::get_tablet_first_second_level_meta_ids(ObIArray &meta_ids) const { int ret = OB_SUCCESS; const ObMetaDiskAddr &tablet_status_uncommitted_kv_addr = mds_data_.tablet_status_.uncommitted_kv_.addr_; @@ -1924,14 +2199,83 @@ int ObTablet::get_tablet_meta_ids(ObIArray &meta_ids) const int ObTablet::parse_meta_addr(const ObMetaDiskAddr &addr, ObIArray &meta_ids) { int ret = OB_SUCCESS; - MacroBlockId macro_id; if (addr.is_block()) { if (OB_UNLIKELY(!addr.is_valid())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet_status_uncommitted_kv_addr is invalid", K(ret), K(addr)); - } else if (FALSE_IT(macro_id = addr.block_id())) { - } else if (OB_FAIL(meta_ids.push_back(macro_id))) { - LOG_WARN("fail to push back macro id", K(ret), K(macro_id)); + } else { + const MacroBlockId macro_id = addr.block_id(); + bool found = false; + for (int64_t i = 0; !found && i < meta_ids.count(); i++) { + if (macro_id == meta_ids.at(i)) { + found = true; + } + } + if (!found && OB_FAIL(meta_ids.push_back(macro_id))) { + LOG_WARN("fail to push back macro id", K(ret), K(macro_id)); + } + } + } + return ret; +} + +int ObTablet::get_all_macro_ids( + ObIArray &meta_block_arr, + ObIArray &data_block_arr, + ObIArray &shared_meta_block_arr, + ObIArray &shared_data_block_arr) const +{ + int ret = OB_SUCCESS; + ObArenaAllocator allocator; + ObTabletMacroInfo *tablet_macro_info = nullptr; + bool in_memory = false; + if (is_empty_shell()) { + // no macro info + } else if (OB_FAIL(load_macro_info(allocator, tablet_macro_info, in_memory))) { + LOG_WARN("fail to fetch tablet macro info", K(ret)); + } else if (OB_FAIL(tablet_macro_info->get_all_macro_ids( + meta_block_arr, + data_block_arr, + shared_meta_block_arr, + shared_data_block_arr))) { + LOG_WARN("fail to get tablet's macro ids", K(ret)); + } + if (OB_NOT_NULL(tablet_macro_info) && !in_memory) { + tablet_macro_info->~ObTabletMacroInfo(); + allocator.free(tablet_macro_info); + } + return ret; +} + +int ObTablet::load_macro_info(ObArenaAllocator &allocator, ObTabletMacroInfo *&tablet_macro_info, bool &in_memory) const +{ + int ret = OB_SUCCESS; + in_memory = false; + if (OB_UNLIKELY(!macro_info_addr_.is_valid() || macro_info_addr_.is_none_object())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("macro info addr is invalid", K(ret), K_(macro_info_addr)); + } else if (macro_info_addr_.is_memory_object()) { + if (OB_ISNULL(macro_info_addr_.ptr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet macro info ptr is null", K(ret), K_(macro_info_addr)); + } else { + tablet_macro_info = macro_info_addr_.ptr_; + in_memory = true; + } + } else { + char *buf = nullptr; + int64_t buf_len = 0; + int64_t pos = 0; + void *macro_info_buf = nullptr; + if (OB_FAIL(MTL(ObTenantCheckpointSlogHandler*)->read_from_disk( + macro_info_addr_.addr_, allocator, buf, buf_len))) { + LOG_WARN("fail to read shared block", K(ret), K_(macro_info_addr)); + } else if (OB_ISNULL(macro_info_buf = allocator.alloc(sizeof(ObTabletMacroInfo)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory for tablet macro info", K(ret)); + } else if (FALSE_IT(tablet_macro_info = new (macro_info_buf) ObTabletMacroInfo)) { + } else if (OB_FAIL(tablet_macro_info->deserialize(allocator, buf, buf_len, pos))) { + LOG_WARN("fail to deserialize tablet macro info", K(ret), K(buf_len), K(pos)); } } return ret; @@ -1953,7 +2297,118 @@ int ObTablet::inc_macro_ref_cnt() return ret; } +int ObTablet::inc_macro_ref_with_macro_info(const ObTabletMacroInfo &tablet_macro_info) +{ + int ret = OB_SUCCESS; + bool inc_tablet_ref = false; + bool inc_other_ref = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("tablet hasn't been inited", K(ret), K(is_inited_)); + } else if (OB_FAIL(inc_addr_ref_cnt(tablet_addr_, inc_tablet_ref))) { + LOG_WARN("fail to increase macro blocks' ref cnt for 4k tablet", K(ret), K(tablet_addr_)); + } else if (OB_FAIL(tablet_macro_info.inc_macro_ref(inc_other_ref))) { + LOG_WARN("fail to inc macro ref", K(ret)); + } + if (OB_FAIL(ret)) { + if (inc_tablet_ref) { + dec_addr_ref_cnt(tablet_addr_); + } + if (inc_other_ref) { + tablet_macro_info.dec_macro_ref(); + } + } + if (OB_SUCC(ret)) { + hold_ref_cnt_ = true; + } + + // for issue track + const ObTabletComplexAddr &tablet_status_uncommitted_kv_addr = mds_data_.tablet_status_.uncommitted_kv_; + const ObTabletComplexAddr &tablet_status_committed_kv_addr = mds_data_.tablet_status_.committed_kv_; + const ObTabletComplexAddr &aux_tablet_info_uncommitted_kv_addr = mds_data_.aux_tablet_info_.uncommitted_kv_; + const ObTabletComplexAddr &aux_tablet_info_committed_kv_addr = mds_data_.aux_tablet_info_.committed_kv_; + const ObTabletComplexAddr &medium_info_list_addr = mds_data_.medium_info_list_; + const ObTabletComplexAddr &auto_inc_seq_addr = mds_data_.auto_inc_seq_; + FLOG_INFO("the tablet that inner increases ref cnt is", + K(ret), K(hold_ref_cnt_), K(tablet_meta_.ls_id_), K(tablet_meta_.tablet_id_), K(table_store_addr_.addr_), + K(auto_inc_seq_addr.addr_), K(storage_schema_addr_.addr_), K(medium_info_list_addr.addr_), + K(tablet_status_uncommitted_kv_addr.addr_), K(tablet_status_committed_kv_addr.addr_), + K(aux_tablet_info_uncommitted_kv_addr.addr_), K(aux_tablet_info_committed_kv_addr.addr_), + K(tablet_addr_), KP(this), K(lbt())); + return ret; +} + int ObTablet::inner_inc_macro_ref_cnt() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(check_meta_addr())) { + LOG_WARN("fail to check meta addrs", K(ret)); + } else if (OB_UNLIKELY(ObTabletBlockHeader::TABLET_VERSION_V3 == version_ && !is_empty_shell() && macro_info_addr_.is_none_object())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("v3 normal tablet's macro_info_addr_ shouldn't be none object", K(ret), KPC(this)); + } else if (macro_info_addr_.is_none_object()) { + if (OB_FAIL(inc_ref_without_aggregated_info())) { + LOG_WARN("fail to inc macro ref without fetch", K(ret)); + } + } else { + if (OB_FAIL(inc_ref_with_aggregated_info())) { + LOG_WARN("fail to inc macro ref with fetch", K(ret)); + } + } + if (OB_SUCC(ret)) { + hold_ref_cnt_ = true; + } + + // for issue track + const ObTabletComplexAddr &tablet_status_uncommitted_kv_addr = mds_data_.tablet_status_.uncommitted_kv_; + const ObTabletComplexAddr &tablet_status_committed_kv_addr = mds_data_.tablet_status_.committed_kv_; + const ObTabletComplexAddr &aux_tablet_info_uncommitted_kv_addr = mds_data_.aux_tablet_info_.uncommitted_kv_; + const ObTabletComplexAddr &aux_tablet_info_committed_kv_addr = mds_data_.aux_tablet_info_.committed_kv_; + const ObTabletComplexAddr &medium_info_list_addr = mds_data_.medium_info_list_; + const ObTabletComplexAddr &auto_inc_seq_addr = mds_data_.auto_inc_seq_; + FLOG_INFO("the tablet that inner increases ref cnt is", + K(ret), K(hold_ref_cnt_), K(tablet_meta_.ls_id_), K(tablet_meta_.tablet_id_), K(table_store_addr_.addr_), + K(auto_inc_seq_addr.addr_), K(storage_schema_addr_.addr_), K(medium_info_list_addr.addr_), + K(tablet_status_uncommitted_kv_addr.addr_), K(tablet_status_committed_kv_addr.addr_), + K(aux_tablet_info_uncommitted_kv_addr.addr_), K(aux_tablet_info_committed_kv_addr.addr_), + K(tablet_addr_), KP(this), K(lbt())); + + return ret; +} + +int ObTablet::inc_ref_with_aggregated_info() +{ + int ret = OB_SUCCESS; + bool inc_tablet_ref = false; + bool inc_other_ref = false; + bool in_memory = false; + ObArenaAllocator allocator("IncMacroRef", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObTabletMacroInfo *tablet_macro_info = nullptr; + if (OB_FAIL(load_macro_info(allocator, tablet_macro_info, in_memory))) { + LOG_WARN("fail to fetch tablet macro info", K(ret)); + } else if (OB_FAIL(inc_addr_ref_cnt(tablet_addr_, inc_tablet_ref))) { + LOG_WARN("fail to increase macro blocks' ref cnt for 4k tablet", K(ret), K(tablet_addr_)); + } else if (OB_FAIL(tablet_macro_info->inc_macro_ref(inc_other_ref))) { + LOG_WARN("fail to increase other macro ref cnt", K(ret)); + } + if (OB_FAIL(ret)) { + if (inc_tablet_ref) { + dec_addr_ref_cnt(tablet_addr_); + } + if (inc_other_ref) { + tablet_macro_info->dec_macro_ref(); + } + } + if (OB_NOT_NULL(tablet_macro_info) && !in_memory) { + tablet_macro_info->~ObTabletMacroInfo(); + allocator.free(tablet_macro_info); + } + + return ret; +} + + +int ObTablet::inc_ref_without_aggregated_info() { int ret = OB_SUCCESS; bool inc_table_store_ref = false; @@ -1975,9 +2430,7 @@ int ObTablet::inner_inc_macro_ref_cnt() const ObTabletComplexAddr &medium_info_list_addr = mds_data_.medium_info_list_; const ObTabletComplexAddr &auto_inc_seq_addr = mds_data_.auto_inc_seq_; - if (OB_FAIL(check_meta_addr())) { - LOG_WARN("fail to check meta addrs", K(ret)); - } else if (OB_FAIL(inc_linked_block_ref_cnt(medium_info_list_addr.addr_, inc_medium_info_list_ref))) { + if (OB_FAIL(inc_linked_block_ref_cnt(medium_info_list_addr.addr_, inc_medium_info_list_ref))) { LOG_WARN("fail to increase macro blocks' ref cnt for medium info list", K(ret), K(medium_info_list_addr)); } else if (OB_FAIL(inc_addr_ref_cnt(table_store_addr_.addr_, inc_table_store_ref))) { LOG_WARN("fail to increase macro blocks' ref cnt for table store", K(ret), K(table_store_addr_.addr_)); @@ -1997,8 +2450,6 @@ int ObTablet::inner_inc_macro_ref_cnt() LOG_WARN("fail to increase macro blocks' ref cnt for 4k tablet", K(ret), K(tablet_addr_)); } else if (OB_FAIL(inc_table_store_ref_cnt(inc_table_store_member_ref))) { LOG_WARN("fail to increase macro blocks' ref cnt for sstable meta", K(ret)); - } else { - hold_ref_cnt_ = true; } FLOG_INFO("the tablet that inner increases ref cnt is", K(ret), K(is_inited_), K(tablet_meta_.ls_id_), K(tablet_meta_.tablet_id_), K(table_store_addr_.addr_), @@ -2045,13 +2496,13 @@ int ObTablet::inner_inc_macro_ref_cnt() void ObTablet::dec_macro_ref_cnt() { int ret = OB_SUCCESS; + // for issue track const ObTabletComplexAddr &tablet_status_uncommitted_kv_addr = mds_data_.tablet_status_.uncommitted_kv_; const ObTabletComplexAddr &tablet_status_committed_kv_addr = mds_data_.tablet_status_.committed_kv_; const ObTabletComplexAddr &aux_tablet_info_uncommitted_kv_addr = mds_data_.aux_tablet_info_.uncommitted_kv_; const ObTabletComplexAddr &aux_tablet_info_committed_kv_addr = mds_data_.aux_tablet_info_.committed_kv_; const ObTabletComplexAddr &medium_info_list_addr = mds_data_.medium_info_list_; const ObTabletComplexAddr &auto_inc_seq_addr = mds_data_.auto_inc_seq_; - // We don't need to recursively decrease macro ref cnt, since we will push both them to gc queue if (OB_UNLIKELY(!hold_ref_cnt_)) { FLOG_INFO("tablet doesn't hold ref cnt, no need to dec ref cnt", K(is_inited_), K(tablet_meta_.ls_id_), K(tablet_meta_.tablet_id_), K(table_store_addr_.addr_.is_valid()), @@ -2060,25 +2511,69 @@ void ObTablet::dec_macro_ref_cnt() K(aux_tablet_info_uncommitted_kv_addr.addr_), K(aux_tablet_info_committed_kv_addr.addr_), K(tablet_addr_), KP(this), K(lbt())); } else if (OB_FAIL(check_meta_addr())) { - LOG_WARN("fail to check meta addrs", K(ret)); + LOG_ERROR("fail to check meta addrs", K(ret)); + } else if (OB_UNLIKELY(ObTabletBlockHeader::TABLET_VERSION_V3 == version_ && !is_empty_shell() && macro_info_addr_.is_none_object())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("v3 normal tablet's macro_info_addr_ shouldn't be none object", K(ret), KPC(this)); + } else if (macro_info_addr_.is_none_object()) { + dec_ref_without_aggregated_info(); + } else { + dec_ref_with_aggregated_info(); + } + FLOG_INFO("the tablet that decreases ref cnt is", + K(is_inited_), K(tablet_meta_.ls_id_), K(tablet_meta_.tablet_id_), K(table_store_addr_.addr_), + K(auto_inc_seq_addr.addr_), K(storage_schema_addr_.addr_), K(medium_info_list_addr.addr_), + K(tablet_status_uncommitted_kv_addr.addr_), K(tablet_status_committed_kv_addr.addr_), + K(aux_tablet_info_uncommitted_kv_addr.addr_), K(aux_tablet_info_committed_kv_addr.addr_), + K(tablet_addr_), KP(this), K(lbt())); +} + +void ObTablet::dec_ref_without_aggregated_info() +{ + int ret = OB_SUCCESS; + const ObTabletComplexAddr &tablet_status_uncommitted_kv_addr = mds_data_.tablet_status_.uncommitted_kv_; + const ObTabletComplexAddr &tablet_status_committed_kv_addr = mds_data_.tablet_status_.committed_kv_; + const ObTabletComplexAddr &aux_tablet_info_uncommitted_kv_addr = mds_data_.aux_tablet_info_.uncommitted_kv_; + const ObTabletComplexAddr &aux_tablet_info_committed_kv_addr = mds_data_.aux_tablet_info_.committed_kv_; + const ObTabletComplexAddr &medium_info_list_addr = mds_data_.medium_info_list_; + const ObTabletComplexAddr &auto_inc_seq_addr = mds_data_.auto_inc_seq_; + // 1. We don't need to recursively decrease macro ref cnt, since we will push both them to gc queue + // 2. the order can't be changed, must be sstable blocks' ref cnt -> tablet meta blocks' ref cnt + dec_linked_block_ref_cnt(medium_info_list_addr.addr_); + dec_table_store_ref_cnt(); + dec_addr_ref_cnt(table_store_addr_.addr_); + dec_addr_ref_cnt(storage_schema_addr_.addr_); + dec_addr_ref_cnt(tablet_status_uncommitted_kv_addr.addr_); + dec_addr_ref_cnt(tablet_status_committed_kv_addr.addr_); + dec_addr_ref_cnt(aux_tablet_info_uncommitted_kv_addr.addr_); + dec_addr_ref_cnt(aux_tablet_info_committed_kv_addr.addr_); + dec_addr_ref_cnt(auto_inc_seq_addr.addr_); + dec_addr_ref_cnt(tablet_addr_); +} + +void ObTablet::dec_ref_with_aggregated_info() +{ + int ret = OB_SUCCESS; + ObArenaAllocator allocator("DecMacroRef", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObTabletMacroInfo *tablet_macro_info = nullptr; + bool in_memory = false; + do { + if (nullptr != tablet_macro_info) { + tablet_macro_info->reset(); + tablet_macro_info = nullptr; + } + allocator.reuse(); + ret = load_macro_info(allocator, tablet_macro_info, in_memory); + } while (ignore_ret(ret)); + if (OB_FAIL(ret)) { + LOG_ERROR("fail to fetch macro info, macro blocks may leak", K(ret)); } else { - FLOG_INFO("the tablet that decreases ref cnt is", - K(is_inited_), K(tablet_meta_.ls_id_), K(tablet_meta_.tablet_id_), K(table_store_addr_.addr_), - K(auto_inc_seq_addr.addr_), K(storage_schema_addr_.addr_), K(medium_info_list_addr.addr_), - K(tablet_status_uncommitted_kv_addr.addr_), K(tablet_status_committed_kv_addr.addr_), - K(aux_tablet_info_uncommitted_kv_addr.addr_), K(aux_tablet_info_committed_kv_addr.addr_), - K(tablet_addr_), KP(this), K(lbt())); - // the order can't be changed, must be sstable blocks' ref cnt -> tablet meta blocks' ref cnt - dec_linked_block_ref_cnt(medium_info_list_addr.addr_); - dec_table_store_ref_cnt(); - dec_addr_ref_cnt(table_store_addr_.addr_); - dec_addr_ref_cnt(storage_schema_addr_.addr_); - dec_addr_ref_cnt(tablet_status_uncommitted_kv_addr.addr_); - dec_addr_ref_cnt(tablet_status_committed_kv_addr.addr_); - dec_addr_ref_cnt(aux_tablet_info_uncommitted_kv_addr.addr_); - dec_addr_ref_cnt(aux_tablet_info_committed_kv_addr.addr_); - dec_addr_ref_cnt(auto_inc_seq_addr.addr_); dec_addr_ref_cnt(tablet_addr_); + tablet_macro_info->dec_macro_ref(); + } + if (OB_NOT_NULL(tablet_macro_info) && !in_memory) { + tablet_macro_info->~ObTabletMacroInfo(); + allocator.free(tablet_macro_info); } } @@ -2234,12 +2729,13 @@ bool ObTablet::ignore_ret(const int ret) return OB_ALLOCATE_MEMORY_FAILED == ret || OB_TIMEOUT == ret || OB_DISK_HUNG == ret; } -void ObTablet::set_mem_addr() +void ObTablet::set_initial_addr() { if (!table_store_addr_.addr_.is_none() && !storage_schema_addr_.addr_.is_none()) { table_store_addr_.addr_.set_mem_addr(0, sizeof(ObTabletTableStore)); storage_schema_addr_.addr_.set_mem_addr(0, sizeof(ObStorageSchema)); } + macro_info_addr_.addr_.set_mem_addr(0, sizeof(ObTabletMacroInfo)); tablet_addr_.set_mem_addr(0, sizeof(ObTablet)); } @@ -2297,18 +2793,37 @@ int ObTablet::get_snapshot_version(SCN &scn) const return ret; } -int64_t ObTablet::get_serialize_size() const +int64_t ObTablet::get_serialize_size(const ObSArray &meta_arr) const { - int64_t size = get_self_size(); - if (tablet_meta_.has_next_tablet_) { - size += next_tablet_guard_.get_obj()->get_serialize_size(); + ObTabletBlockHeader header; + int64_t size = 0; + int ret = OB_SUCCESS; + if (OB_FAIL(header.init(meta_arr.count()))) { + LOG_WARN("fail to init tablet block header", K(ret), K(meta_arr)); + size = -1; + } else { + size += header.get_serialize_size(); + size += get_self_serialize_size(); + for (int64_t i = 0; OB_SUCC(ret) && i < meta_arr.count(); i++) { + if (OB_ISNULL(meta_arr[i].obj_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("obj is nullptr", K(ret), K(meta_arr[i].meta_type_)); + size = -1; + } else if (OB_UNLIKELY(ObSecondaryMetaType::TABLET_MACRO_INFO != meta_arr[i].meta_type_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("only support tablet macro info", K(ret), K(meta_arr[i].meta_type_)); + size = -1; + } else { + size += reinterpret_cast(meta_arr[i].obj_)->get_serialize_size(); + } + } } return size; } -int64_t ObTablet::get_self_size() const +int64_t ObTablet::get_self_serialize_size() const { - int64_t size =0; + int64_t size = 0; size += serialization::encoded_length_i32(version_); size += serialization::encoded_length_i32(length_); size += tablet_meta_.get_serialize_size(); @@ -2316,6 +2831,7 @@ int64_t ObTablet::get_self_size() const size += table_store_addr_.addr_.get_serialize_size(); size += is_empty_shell() ? 0 : rowkey_read_info_->get_serialize_size(); size += mds_data_.get_serialize_size(); + size += macro_info_addr_.addr_.get_serialize_size(); return size; } @@ -2335,6 +2851,25 @@ void ObTablet::set_tablet_addr(const ObMetaDiskAddr &tablet_addr) } } +int ObTablet::set_macro_info_addr( + const blocksstable::MacroBlockId ¯o_id, + const int64_t offset, + const int64_t size, + const ObMetaDiskAddr::DiskType block_type) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("tablet hasn't been inited", K(ret)); + } else if (OB_UNLIKELY(!macro_id.is_valid() || 0 > offset || 0 >= size)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), K(macro_id), K(offset), K(size)); + } else if (OB_FAIL(macro_info_addr_.addr_.set_block_addr(macro_id, offset, size, block_type))) { + LOG_WARN("fail to set block addr", K(ret), K(macro_id), K(offset), K(size)); + } + return ret; +} + void ObTablet::trim_tablet_list() { tablet_meta_.has_next_tablet_ = false; @@ -2355,7 +2890,7 @@ int ObTablet::deserialize_id( LOG_WARN("fail to deserialize tablet meta's version", K(ret), K(len), K(pos)); } else if (OB_FAIL(serialization::decode_i32(buf, len, pos, (int32_t *)&length))) { LOG_WARN("fail to deserialize tablet meta's length", K(ret), K(len), K(pos)); - } else if (TABLET_VERSION == version || TABLET_VERSION_V2 == version) { + } else if (ObTabletBlockHeader::TABLET_VERSION_V1 == version || ObTabletBlockHeader::TABLET_VERSION_V2 == version) { if (OB_FAIL(ObTabletMeta::deserialize_id(buf, len, pos, ls_id, tablet_id))) { LOG_WARN("fail to deserialize ls_id and tablet_id from tablet meta", K(ret), K(len)); } @@ -2438,6 +2973,19 @@ int ObTablet::get_max_column_cnt_on_schema_recorder(int64_t &max_column_cnt) return ret; } +int ObTablet::get_tablet_version(const char *buf, const int64_t len, int32_t &version) +{ + int ret = OB_SUCCESS; + int64_t tmp_pos = 0; + if (OB_ISNULL(buf) || len < sizeof(int32_t)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet version length is unexpected", K(ret), K(len)); + } else if (OB_FAIL(serialization::decode_i32(buf, len, tmp_pos, (int32_t *)&version))) { + LOG_WARN("fail to decode version", K(ret)); + } + return ret; +} + // be careful to use this max_schem_version on storage_schema int ObTablet::get_max_schema_version(int64_t &schema_version) { @@ -2982,6 +3530,29 @@ int ObTablet::get_sstables_size(int64_t &used_size, const bool ignore_shared_blo return ret; } +int ObTablet::get_tablet_size(const bool ignore_shared_block, int64_t &meta_size, int64_t &data_size) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("tablet hasn't been inited", K(ret)); + } else { + meta_size = 0; + data_size = 0; + const ObTabletSpaceUsage &space_usage = tablet_meta_.space_usage_; + if (ObTabletBlockHeader::TABLET_VERSION_V3 == version_) { + meta_size += space_usage.shared_meta_size_ + space_usage.meta_size_; + data_size += space_usage.data_size_; + if (!ignore_shared_block) { + data_size += space_usage.shared_data_size_; + } + } else if (OB_FAIL(get_sstables_size(data_size, ignore_shared_block))) { + LOG_WARN("fail to get all sstables' size", K(ret), K(ignore_shared_block)); + } + } + return ret; +} + int ObTablet::get_memtables(common::ObIArray &memtables, const bool need_active) const { common::SpinRLockGuard guard(memtables_lock_); @@ -5350,6 +5921,7 @@ int64_t ObTablet::to_string(char *buf, const int64_t buf_len) const K_(ddl_kv_count), K_(table_store_addr), K_(storage_schema_addr), + K_(macro_info_addr), K_(next_tablet_guard), K_(pointer_hdl), KP_(next_tablet), @@ -6002,6 +6574,77 @@ int ObTablet::set_initial_state(const bool initial_state) return ret; } +int ObTablet::init_aggregated_info(common::ObArenaAllocator &allocator, ObLinkedMacroBlockItemWriter &linked_writer) +{ + int ret = OB_SUCCESS; + ObTableStoreIterator iter; + ObBlockInfoSet info_set; + ObTabletPersister::SharedMacroMap shared_macro_map; + int64_t shared_meta_size = 0; + if (OB_FAIL(info_set.init())) { + LOG_WARN("fail to init block info set", K(ret)); + } else if (OB_FAIL(shared_macro_map.create(SHARED_MACRO_BUCKET_CNT, "SharedBlkMap", "SharedBlkNode", MTL_ID()))) { + LOG_WARN("fail to create shared macro map", K(ret)); + } else if (OB_FAIL(inner_get_all_sstables(iter, true /*whether to unpack*/))) { + LOG_WARN("fail to get all sstables", K(ret)); + } else { + while (OB_SUCC(ret)) { + ObITable *table = nullptr; + ObSSTable *sstable = nullptr; + ObSSTableMetaHandle meta_handle; + if (OB_FAIL(iter.get_next(table))) { + if (OB_UNLIKELY(OB_ITER_END == ret)) { + ret = OB_SUCCESS; + break; + } else { + LOG_WARN("fail to get next table from iter", K(ret), K(iter)); + } + } else if (FALSE_IT(sstable = static_cast(table))) { + } else if (OB_ISNULL(sstable) || !sstable->is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("the sstable is null or invalid", K(ret), KPC(sstable)); + } else if (OB_FAIL(sstable->get_meta(meta_handle))) { + LOG_WARN("fail to get sstable meta handle", K(ret), KPC(sstable)); + } else if (sstable->is_small_sstable() && OB_FAIL(ObTabletPersister::copy_shared_macro_info( + meta_handle.get_sstable_meta().get_macro_info(), + shared_macro_map, + info_set.meta_block_info_set_))) { + LOG_WARN("fail to copy shared macro info", K(ret), K(meta_handle.get_sstable_meta())); + } else if (!sstable->is_small_sstable() && OB_FAIL(ObTabletPersister::copy_data_macro_ids( + meta_handle.get_sstable_meta().get_macro_info(), + info_set))) { + LOG_WARN("fail to copy sstable's macro ids", K(ret), K(meta_handle.get_sstable_meta())); + } + const ObMetaDiskAddr &sstable_addr = sstable->get_addr(); + if (OB_FAIL(ret)) { + // do nothing + } else if (sstable_addr.is_block()) { + if (OB_FAIL(info_set.shared_meta_block_info_set_.set_refactored(sstable_addr.block_id(), 0 /*whether to overwrite*/))) { + if (OB_HASH_EXIST != ret) { + LOG_WARN("fail to push macro id into set", K(ret), K(sstable_addr)); + } else { + ret = OB_SUCCESS; + } + } + shared_meta_size += sstable_addr.size(); + } + } + } + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(ObTabletPersister::convert_macro_info_map(shared_macro_map, info_set.shared_data_block_info_map_))) { + LOG_WARN("fail to convert macro info map", K(ret)); + } else { + ALLOC_AND_INIT(allocator, macro_info_addr_, info_set, linked_writer); + } + if (OB_SUCC(ret)) { + tablet_meta_.space_usage_.data_size_ = macro_info_addr_.ptr_->data_block_info_arr_.cnt_ * DEFAULT_MACRO_BLOCK_SIZE; + tablet_meta_.space_usage_.shared_data_size_ = macro_info_addr_.ptr_->shared_data_block_info_arr_.cnt_ * DEFAULT_MACRO_BLOCK_SIZE; + tablet_meta_.space_usage_.shared_meta_size_ = shared_meta_size; + } + return ret; +} + int ObTablet::load_medium_info_list( common::ObArenaAllocator &allocator, const ObTabletComplexAddr &complex_addr, diff --git a/src/storage/tablet/ob_tablet.h b/src/storage/tablet/ob_tablet.h index ece597cce5..dfe02d78e2 100644 --- a/src/storage/tablet/ob_tablet.h +++ b/src/storage/tablet/ob_tablet.h @@ -31,6 +31,8 @@ #include "storage/tablet/ob_tablet_table_store_flag.h" #include "storage/tablet/ob_tablet_mds_data.h" #include "storage/tablet/ob_tablet_mds_data_cache.h" +#include "storage/tablet/ob_tablet_block_aggregated_info.h" +#include "storage/tablet/ob_tablet_block_header.h" #include "storage/tx/ob_trans_define.h" #include "share/scn.h" #include "ob_i_tablet_mds_interface.h" @@ -106,7 +108,6 @@ class ObTabletBindingMdsUserData; class ObMemtableArray; class ObCOSSTableV2; - struct ObTableStoreCache { public: @@ -237,6 +238,13 @@ public: bool is_valid() const; // refresh memtable and update tablet_addr_ and table_store_addr_ sequence, only used by slog ckpt int refresh_memtable_and_update_seq(const uint64_t seq); + // TODO(zhouxinlan.zxl): replace ObIArray with iterator + int get_all_macro_ids( + ObIArray &meta_block_arr, + ObIArray &data_block_arr, + ObIArray &shared_meta_block_arr, + ObIArray &shared_data_block_arr) const; + bool is_old_tablet() { return version_ < ObTabletBlockHeader::TABLET_VERSION_V3; } void dec_macro_ref_cnt(); int inc_macro_ref_cnt(); // these interfaces is only for tiny mode @@ -244,6 +252,7 @@ public: // fetch_$member: member may exist in memory or disk, if in memory, get it directly, if in disk, // read from disk then put into kv cache, and return kv cache handle for caller int fetch_table_store(ObTabletMemberWrapper &wrapper) const; + int load_macro_info(common::ObArenaAllocator &allocator, ObTabletMacroInfo *&tablet_macro_info, bool &in_memory) const; int load_storage_schema( common::ObIAllocator &allocator, ObStorageSchema *&storage_schema) const; @@ -261,7 +270,18 @@ public: bool is_data_complete() const; // serialize & deserialize - int serialize(char *buf, const int64_t len, int64_t &pos) const; + // TODO: change the impl of serialize and get_serialize_size after rebase + int serialize( + char *buf, + const int64_t len, + int64_t &pos, + const ObSArray &meta_arr = ObSArray()) const; + int deserialize_for_replay( + common::ObArenaAllocator &allocator, + const char *buf, + const int64_t len, + int64_t &pos); + // for normal tablet deserialize int load_deserialize( common::ObArenaAllocator &allocator, @@ -285,7 +305,7 @@ public: const char *buf, const int64_t len, int64_t &pos); - int64_t get_serialize_size() const; + int64_t get_serialize_size(const ObSArray &meta_arr = ObSArray()) const; ObMetaObjGuard &get_next_tablet_guard() { return next_tablet_guard_; } const ObMetaObjGuard &get_next_tablet_guard() const { return next_tablet_guard_; } void set_next_tablet_guard(const ObTabletHandle &next_tablet_guard); @@ -338,15 +358,14 @@ public: const int64_t schema_version, ObIAllocator &allocator, const int64_t timeout_ts); - + int get_tablet_first_second_level_meta_ids(ObIArray &meta_ids) const; // table operation - int get_tablet_meta_ids(ObIArray &meta_ids) const; /* When need_unpack is true, if tablet is column store type, we should flatten the co sstable, and add all cg tables to iter. Else, we should add co sstable to iter as a whole. */ int get_all_tables(ObTableStoreIterator &iter, const bool need_unpack = false) const; int get_all_sstables(ObTableStoreIterator &iter, const bool need_unpack = false) const; - int get_sstables_size(int64_t &used_size, const bool ignore_shared_block = false) const; + int get_tablet_size(const bool ignore_shared_block, int64_t &meta_size, int64_t &data_size); int get_memtables(common::ObIArray &memtables, const bool need_active = false) const; int get_ddl_memtables(common::ObIArray &ddl_memtables) const; int check_need_remove_old_table(const int64_t multi_version_start, bool &need_remove) const; @@ -562,6 +581,7 @@ public: ObTabletFullMemoryMdsData &mds_data); int64_t to_string(char *buf, const int64_t buf_len) const; int get_max_column_cnt_on_schema_recorder(int64_t &max_column_cnt); + static int get_tablet_version(const char *buf, const int64_t len, int32_t &version); protected:// for MDS use virtual bool check_is_inited_() const override final { return is_inited_; } virtual const ObTabletMdsData &get_mds_data_() const override final { return mds_data_; } @@ -572,10 +592,24 @@ protected:// for MDS use return static_cast(pointer_hdl_.get_resource_ptr()); } private: - void set_mem_addr(); + static int deserialize_macro_info( + common::ObArenaAllocator &allocator, + const char *buf, + const int64_t len, + int64_t &pos, + ObTabletMacroInfo *&tablet_macro_info); + int init_aggregated_info(common::ObArenaAllocator &allocator, ObLinkedMacroBlockItemWriter &linked_writer); + int get_sstables_size(int64_t &used_size, const bool ignore_shared_block = false) const; + void set_initial_addr(); int check_meta_addr() const; static int parse_meta_addr(const ObMetaDiskAddr &addr, ObIArray &meta_ids); + void dec_ref_with_aggregated_info(); + void dec_ref_without_aggregated_info(); int inner_inc_macro_ref_cnt(); + // inc ref with existed ObTabletMacroInfo + int inc_macro_ref_with_macro_info(const ObTabletMacroInfo &tablet_macro_info); + int inc_ref_with_aggregated_info(); + int inc_ref_without_aggregated_info(); void dec_table_store_ref_cnt(); int inc_table_store_ref_cnt(bool &inc_success); static int inc_addr_ref_cnt(const ObMetaDiskAddr &addr, bool &inc_success); @@ -587,8 +621,8 @@ private: static bool ignore_ret(const int ret); int inner_check_valid(const bool ignore_ha_status = false) const; int get_min_medium_snapshot(int64_t &min_medium_snapshot) const; - - int64_t get_self_size() const; + int self_serialize(char *buf, const int64_t len, int64_t &pos) const; + int64_t get_self_serialize_size() const; int get_memtable_mgr(ObIMemtableMgr *&memtable_mgr) const; int get_tablet_memtable_mgr(ObTabletMemtableMgr *&memtable_mgr) const; int check_schema_version(const int64_t schema_version); @@ -692,13 +726,17 @@ private: const int64_t finish_medium_scn, const ObTabletMdsData &mds_data) const; int set_initial_state(const bool initial_state); + int set_macro_info_addr( + const blocksstable::MacroBlockId ¯o_id, + const int64_t offset, + const int64_t size, + const ObMetaDiskAddr::DiskType block_type); int load_deserialize_v1( common::ObArenaAllocator &allocator, const char *buf, const int64_t len, - const int64_t pos, - int64_t &new_pos); + int64_t &pos); int deserialize_meta_v1( common::ObArenaAllocator &allocator, const char *buf, @@ -711,10 +749,16 @@ private: common::ObArenaAllocator &allocator, const char *buf, const int64_t len, - const int64_t pos, - int64_t &new_pos, + int64_t &pos, const bool prepare_memtable = true /* whether to prepare memtable */); + int load_deserialize_v3( + common::ObArenaAllocator &allocator, + const char *buf, + const int64_t len, + int64_t &pos, + const bool prepare_memtable); + static int convert_to_mds_dump_kv( common::ObIAllocator &allocator, const share::ObTabletAutoincSeq &auto_inc_seq, @@ -770,8 +814,8 @@ private: // ObTabletDDLKvMgr::MAX_DDL_KV_CNT_IN_STORAGE // Array size is too large, need to shrink it if possible static const int64_t DDL_KV_ARRAY_SIZE = 64; - static const int32_t TABLET_VERSION = 1; - static const int32_t TABLET_VERSION_V2 = 2; + static const int64_t ON_DEMAND_LOAD_SIZE = 4096; //4k + static const int64_t SHARED_MACRO_BUCKET_CNT = 100; private: int32_t version_; int32_t length_; @@ -785,6 +829,7 @@ private: ObTabletComplexAddr table_store_addr_; // size: 48B, alignment: 8B // always in disk ObTabletComplexAddr storage_schema_addr_; // size: 48B, alignment: 8B + ObTabletComplexAddr macro_info_addr_; // size: 48B, alignment: 8B int64_t memtable_count_; ObITable **ddl_kvs_; int64_t ddl_kv_count_; diff --git a/src/storage/tablet/ob_tablet_block_aggregated_info.cpp b/src/storage/tablet/ob_tablet_block_aggregated_info.cpp new file mode 100644 index 0000000000..dfafaf2ddd --- /dev/null +++ b/src/storage/tablet/ob_tablet_block_aggregated_info.cpp @@ -0,0 +1,1156 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include "ob_tablet_block_aggregated_info.h" +#include "storage/ob_super_block_struct.h" +#include "share/rc/ob_tenant_base.h" +#include "storage/slog_ckpt/ob_linked_macro_block_writer.h" +#include "storage/slog_ckpt/ob_linked_macro_block_reader.h" +#include "storage/tablet/ob_tablet_block_header.h" +#include "storage/tablet/ob_tablet.h" +#include "storage/blocksstable/ob_shared_macro_block_manager.h" + +namespace oceanbase +{ +using namespace blocksstable; +namespace storage +{ +/** + * ---------------------------------------ObSharedBlockInfo---------------------------------------- + */ +int ObSharedBlockInfo::serialize(char *buf, const int64_t buf_len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(buf) || OB_UNLIKELY(buf_len <= 0 || pos < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(buf), K(buf_len), K(pos)); + } else if (OB_FAIL(shared_macro_id_.serialize(buf, buf_len, pos))) { + LOG_WARN("fail to serialize shared macro id", K(ret), K(shared_macro_id_), KP(buf), K(buf_len), K(pos)); + } else if (OB_FAIL(serialization::encode_i64(buf, buf_len, pos, occupy_size_))) { + LOG_WARN("fail to serialize occupy size", K(ret), K(occupy_size_), KP(buf), K(buf_len), K(pos)); + } + return ret; +} + +int ObSharedBlockInfo::deserialize(const char *buf, const int64_t data_len, int64_t &pos) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(buf) || OB_UNLIKELY(pos < 0 || data_len <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(buf), K(data_len), K(pos)); + } else if (OB_FAIL(shared_macro_id_.deserialize(buf, data_len, pos))) { + LOG_WARN("fail to deserialize shared macro id", K(ret), KP(buf), K(data_len), K(pos)); + } else if (OB_FAIL(serialization::decode_i64(buf, data_len, pos, &occupy_size_))) { + LOG_WARN("fail to deserialize occupy size", K(ret), KP(buf), K(data_len), K(pos)); + } + return ret; +} + +int64_t ObSharedBlockInfo::get_serialize_size() const +{ + return shared_macro_id_.get_serialize_size() + serialization::encoded_length_i64(occupy_size_); +} + +/** + * ---------------------------------------ObBlockInfoSet---------------------------------------- + */ +int ObBlockInfoSet::init( + const int64_t meta_bucket_num, + const int64_t data_bucket_num, + const int64_t shared_meta_bucket_num, + const int64_t shared_data_bucket_num) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(meta_block_info_set_.create(meta_bucket_num, "ObBlockInfoSet", "ObBlockSetNode", MTL_ID()))) { + LOG_WARN("fail to create meta block id set", K(ret), K(meta_bucket_num)); + } else if (OB_FAIL(data_block_info_set_.create(data_bucket_num, "ObBlockInfoSet", "ObBlockSetNode", MTL_ID()))) { + LOG_WARN("fail to create data block id set", K(ret), K(data_bucket_num)); + } else if (OB_FAIL(shared_meta_block_info_set_.create(shared_meta_bucket_num, "ObBlockInfoSet", "ObBlockSetNode", MTL_ID()))) { + LOG_WARN("fail to create shared meta block id set", K(ret), K(shared_meta_bucket_num)); + } else if (OB_FAIL(shared_data_block_info_map_.create(shared_data_bucket_num, "ObBlockInfoMap", "ObBlockMapNode", MTL_ID()))) { + LOG_WARN("fail to create shared data block id set", K(ret), K(shared_meta_bucket_num)); + } + return ret; +} +/** + * ---------------------------------------ObBlockInfoArray---------------------------------------- + */ +template +ObBlockInfoArray::ObBlockInfoArray() + : cnt_(0), arr_(nullptr), capacity_(0), is_inited_(false) +{ +} + +template +ObBlockInfoArray::~ObBlockInfoArray() +{ + reset(); +} + +template +void ObBlockInfoArray::reset() +{ + cnt_ = 0; + capacity_ = 0; + arr_ = nullptr; + is_inited_ = false; +} + +template +int ObBlockInfoArray::init(const int64_t cnt, ObArenaAllocator &allocator) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("ObBlockInfoArray has been inited", K(ret)); + } else if (0 == cnt) { + // no macro id + arr_ = nullptr; + } else if (OB_ISNULL(arr_ = reinterpret_cast(allocator.alloc(sizeof(T) * cnt)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory", K(ret), K(sizeof(T) * cnt)); + } + if (OB_SUCC(ret)) { + cnt_ = cnt; + capacity_ = cnt; + is_inited_ = true; + } + return ret; +} + +template +int ObBlockInfoArray::serialize(char *buf, const int64_t buf_len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObBlockInfoArray hasn't been inited", K(ret)); + } else if (OB_ISNULL(buf) || OB_UNLIKELY(buf_len <= 0 || pos < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(buf), K(buf_len), K(pos)); + } else if (OB_FAIL(serialization::encode_i64(buf, buf_len, pos, cnt_))) { + LOG_WARN("fail to encode count", K(ret), K_(cnt)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < cnt_; i++) { + if (OB_UNLIKELY(!arr_[i].is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("macro block id is invalid", K(ret), K(i), K(arr_[i])); + } else if (OB_FAIL(arr_[i].serialize(buf, buf_len, pos))) { + LOG_WARN("fail to serialize macro block id", K(ret), K(i), KP(buf), K(buf_len), K(pos)); + } + } + return ret; +} + +template +int ObBlockInfoArray::deserialize(ObArenaAllocator &allocator, const char *buf, const int64_t data_len, int64_t &pos) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("ObBlockInfoArray has been inited", K(ret)); + } else if (OB_ISNULL(buf) || OB_UNLIKELY(pos < 0 || data_len <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(buf), K(data_len), K(pos)); + } else if (OB_FAIL(serialization::decode_i64(buf, data_len, pos, &cnt_))) { + LOG_WARN("fail to decode count", K(ret), K(data_len), K(pos)); + } else if (0 == cnt_) { + // no macro id + } else if (OB_UNLIKELY(cnt_ < 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("array count shouldn't be less than 0", K(ret), K_(cnt)); + } else { + if (OB_ISNULL(arr_ = static_cast(allocator.alloc(cnt_ * sizeof(T))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory for macro id array", K(ret), K_(cnt)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < cnt_; i++) { + if (OB_FAIL(arr_[i].deserialize(buf, data_len, pos))) { + LOG_WARN("fail to deserialize macro block id", K(ret), K(data_len), K(pos)); + } else if (OB_UNLIKELY(!arr_[i].is_valid())) { + LOG_WARN("deserialized macro id is invalid", K(ret), K(arr_[i])); + } + } + } + if (OB_FAIL(ret) && nullptr != arr_) { + allocator.free(arr_); + reset(); + } else if (OB_SUCC(ret)) { + is_inited_ = true; + capacity_ = cnt_; + } + return ret; +} + +template +int64_t ObBlockInfoArray::get_serialize_size() const +{ + T block_info; + return serialization::encoded_length_i64(cnt_) + block_info.get_serialize_size() * cnt_; +} + +template +int64_t ObBlockInfoArray::get_deep_copy_size() const +{ + return sizeof(T) * cnt_; +} + +template +int ObBlockInfoArray::deep_copy(char *buf, const int64_t buf_len, int64_t &pos, ObBlockInfoArray &dest_obj) const +{ + int ret = OB_SUCCESS; + const int64_t memory_size = get_deep_copy_size(); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObBlockInfoArray hasn't been inited", K(ret)); + } else if (OB_ISNULL(buf) || OB_UNLIKELY(buf_len <= 0 || pos < 0 || buf_len - pos < memory_size)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(buf), K(buf_len), K(pos), K(memory_size)); + } else if (OB_NOT_NULL(arr_) && 0 != cnt_) { + dest_obj.arr_ = reinterpret_cast(buf + pos); + MEMCPY(dest_obj.arr_, arr_, sizeof(T) * cnt_); + } else { + dest_obj.arr_ = nullptr; + } + if (OB_SUCC(ret)) { + dest_obj.cnt_ = cnt_; + dest_obj.capacity_ = capacity_; + pos += memory_size; + dest_obj.is_inited_ = is_inited_; + } + return ret; +} + +/** + * ---------------------------------------ObTabletMacroInfo---------------------------------------- + */ +ObTabletMacroInfo::ObTabletMacroInfo() + : entry_block_(ObServerSuperBlock::EMPTY_LIST_ENTRY_BLOCK), + meta_block_info_arr_(), data_block_info_arr_(), shared_meta_block_info_arr_(), + shared_data_block_info_arr_(), is_inited_(false) +{ +} + +ObTabletMacroInfo::~ObTabletMacroInfo() +{ + reset(); +} + +void ObTabletMacroInfo::reset() +{ + entry_block_ = ObServerSuperBlock::EMPTY_LIST_ENTRY_BLOCK; + meta_block_info_arr_.reset(); + data_block_info_arr_.reset(); + shared_meta_block_info_arr_.reset(); + shared_data_block_info_arr_.reset(); + is_inited_ = false; +} + +int ObTabletMacroInfo::init( + ObArenaAllocator &allocator, + ObBlockInfoSet &info_set, + ObLinkedMacroBlockItemWriter &linked_writer) +{ + int ret = OB_SUCCESS; + ObBlockInfoSet::TabletMacroSet &meta_block_info_set = info_set.meta_block_info_set_; + ObBlockInfoSet::TabletMacroSet &data_block_info_set = info_set.data_block_info_set_; + ObBlockInfoSet::TabletMacroSet &shared_meta_block_info_set = info_set.shared_meta_block_info_set_; + ObBlockInfoSet::TabletMacroMap &shared_data_block_info_map = info_set.shared_data_block_info_map_; + int64_t total_macro_cnt = meta_block_info_set.size() + + data_block_info_set.size() + + shared_meta_block_info_set.size() + + shared_data_block_info_map.size(); + + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("ObTabletMacroInfo has been inited", K(ret)); + } else if (OB_FAIL(meta_block_info_arr_.init(meta_block_info_set.size(), allocator))) { + LOG_WARN("fail to init meta block id arr", K(ret)); + } else if (OB_FAIL(data_block_info_arr_.init(data_block_info_set.size(), allocator))) { + LOG_WARN("fail to init data block id arr", K(ret)); + } else if (OB_FAIL(shared_meta_block_info_arr_.init(shared_meta_block_info_set.size(), allocator))) { + LOG_WARN("fail to init shared meta block info arr", K(ret)); + } else if (OB_FAIL(shared_data_block_info_arr_.init(shared_data_block_info_map.size(), allocator))) { + LOG_WARN("fail to init shared data block info arr", K(ret)); + } else if (OB_FAIL(construct_block_id_arr(meta_block_info_set, meta_block_info_arr_))) { + LOG_WARN("fail to construct meta block id arr", K(ret)); + } else if (OB_FAIL(construct_block_id_arr(data_block_info_set, data_block_info_arr_))) { + LOG_WARN("fail to construct data block id arr", K(ret)); + } else if (OB_FAIL(construct_block_id_arr(shared_meta_block_info_set, shared_meta_block_info_arr_))) { + LOG_WARN("fail to construct shared meta block id arr", K(ret)); + } else if (OB_FAIL(construct_block_info_arr(shared_data_block_info_map, shared_data_block_info_arr_))) { + LOG_WARN("fail to construct shared data block info arr", K(ret)); + } else if (ID_COUNT_THRESHOLD < total_macro_cnt && OB_FAIL(persist_macro_ids(allocator, linked_writer))) { + LOG_WARN("fail to persist macro ids", K(ret)); + } + if (OB_SUCC(ret)) { + is_inited_ = true; + } else if (!is_inited_) { + reset(); + } + return ret; +} + +int ObTabletMacroInfo::construct_block_id_arr( + ObBlockInfoSet::TabletMacroSet &id_set, + ObBlockInfoArray &block_id_arr) +{ + int ret = OB_SUCCESS; + int64_t cnt = 0; + for (ObBlockInfoSet::SetIterator iter = id_set.begin(); OB_SUCC(ret) && iter != id_set.end(); ++iter) { + const MacroBlockId ¯o_id = iter->first; + if (OB_UNLIKELY(cnt >= block_id_arr.cnt_ || !macro_id.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected macro_cnt/macro_id", K(ret), K(macro_id), K(cnt), K(block_id_arr)); + } else { + block_id_arr.arr_[cnt] = macro_id; + cnt++; + } + } + return ret; +} + +int ObTabletMacroInfo::construct_block_info_arr( + ObBlockInfoSet::TabletMacroMap &block_info_map, + ObBlockInfoArray &block_info_arr) +{ + int ret = OB_SUCCESS; + int64_t cnt = 0; + for (ObBlockInfoSet::MapIterator iter = block_info_map.begin(); OB_SUCC(ret) && iter != block_info_map.end(); ++iter) { + const MacroBlockId ¯o_id = iter->first; + const int64_t occupy_size = iter->second; + if (OB_UNLIKELY(cnt >= block_info_arr.cnt_ || !macro_id.is_valid() || occupy_size <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected macro_cnt/macro_id/occupy_size", K(ret), K(macro_id), K(cnt), K(occupy_size)); + } else { + new (&block_info_arr.arr_[cnt]) ObSharedBlockInfo(macro_id, occupy_size); + cnt++; + } + } + return ret; +} + +int ObTabletMacroInfo::persist_macro_ids( + ObArenaAllocator &allocator, + ObLinkedMacroBlockItemWriter &linked_writer) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(linked_writer.init(false /*whether need addr*/))) { + LOG_WARN("fail to init linked writer", K(ret)); + } else if (OB_FAIL(do_flush_ids(ObTabletMacroType::META_BLOCK, meta_block_info_arr_, allocator, linked_writer))) { + LOG_WARN("fail to persist meta block ids", K(ret)); + } else if (OB_FAIL(do_flush_ids(ObTabletMacroType::DATA_BLOCK, data_block_info_arr_, allocator, linked_writer))) { + LOG_WARN("fail to persist data block ids", K(ret)); + } else if (OB_FAIL(do_flush_ids(ObTabletMacroType::SHARED_META_BLOCK, shared_meta_block_info_arr_, allocator, linked_writer))) { + LOG_WARN("fail to persist shared meta block ids", K(ret)); + } else if (OB_FAIL(do_flush_ids(shared_data_block_info_arr_, allocator, linked_writer))) { + LOG_WARN("fail to persist shared data block infos", K(ret)); + } else if (OB_FAIL(linked_writer.close())) { + LOG_WARN("fail to close linked writer", K(ret)); + } else if (OB_FAIL(linked_writer.get_entry_block(entry_block_))) { + LOG_WARN("fail to get entry block", K(ret)); + } + return ret; +} + +int ObTabletMacroInfo::do_flush_ids( + const ObTabletMacroType macro_type, + ObBlockInfoArray &block_id_arr, + ObArenaAllocator &allocator, + ObLinkedMacroBlockItemWriter &linked_writer) +{ + int ret = OB_SUCCESS; + MacroBlockId dummy_id; + char *buf = nullptr; + const int64_t buf_len = serialization::encoded_length_i16(static_cast(macro_type)) + + block_id_arr.get_serialize_size(); + int64_t pos = 0; + if (OB_ISNULL(buf = (char *)(allocator.alloc(buf_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory for flush buf", K(ret), K(buf_len)); + } else if (OB_FAIL(serialization::encode_i16(buf, buf_len, pos, static_cast(macro_type)))) { + LOG_WARN("fail to serialize macro type", K(ret), K(macro_type)); + } else if (OB_FAIL(block_id_arr.serialize(buf, buf_len, pos))) { + LOG_WARN("fail to serialize block id arr", K(ret), K(block_id_arr)); + } + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(linked_writer.write_item(buf, buf_len))) { + LOG_WARN("fail to write linked item", K(ret), KP(buf), K(buf_len)); + } + return ret; +} + +int ObTabletMacroInfo::do_flush_ids( + ObBlockInfoArray &block_info_arr, + ObArenaAllocator &allocator, + ObLinkedMacroBlockItemWriter &linked_writer) +{ + int ret = OB_SUCCESS; + ObSharedBlockInfo dummy_info; + char *buf = nullptr; + int16_t dummy_type = 0; + const int64_t buf_len = serialization::encoded_length_i16(dummy_type) + + block_info_arr.get_serialize_size(); + int64_t pos = 0; + if (OB_ISNULL(buf = (char *)(allocator.alloc(buf_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory for flush buf", K(ret), K(buf_len)); + } else if (OB_FAIL(serialization::encode_i16(buf, buf_len, pos, static_cast(ObTabletMacroType::SHARED_DATA_BLOCK)))) { + LOG_WARN("fail to serialize macro type", K(ret)); + } else if (OB_FAIL(block_info_arr.serialize(buf, buf_len, pos))) { + LOG_WARN("fail to serialize block info arr", K(ret), K(block_info_arr)); + } + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(linked_writer.write_item(buf, buf_len))) { + LOG_WARN("fail to write linked item", K(ret), KP(buf), K(buf_len)); + } + return ret; +} + +int ObTabletMacroInfo::serialize(char *buf, const int64_t buf_len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + ObSecondaryMetaHeader meta_header; + const int64_t header_size = meta_header.get_serialize_size(); + const int64_t total_size = get_serialize_size(); + int64_t meta_pos = pos + header_size; + int64_t header_pos = pos; + int64_t version = TABLET_MACRO_INFO_VERSION; + int64_t size = get_serialize_size(); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObBlockInfoArray hasn't been inited", K(ret)); + } else if (OB_ISNULL(buf) || OB_UNLIKELY(buf_len <= 0 || buf_len - pos < total_size)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(buf), K(buf_len), K(pos)); + } else if (OB_FAIL(serialization::encode_i64(buf, buf_len, meta_pos, version))) { + LOG_WARN("fail to serialize version", K(ret), KP(buf), K(buf_len), K(version)); + } else if (OB_FAIL(serialization::encode_i64(buf, buf_len, meta_pos, size))) { + LOG_WARN("fail to serialize size", K(ret), KP(buf), K(buf_len), K(size)); + } else if (OB_FAIL(entry_block_.serialize(buf, buf_len, meta_pos))) { + LOG_WARN("fail to serialize entry block", K(ret), KP(buf), K(buf_len), K(entry_block_)); + } else if (OB_FAIL(meta_block_info_arr_.serialize(buf, buf_len, meta_pos))) { + LOG_WARN("fail to serialize meta block id arr", K(ret), KP(buf), K(buf_len)); + } else if (OB_FAIL(data_block_info_arr_.serialize(buf, buf_len, meta_pos))) { + LOG_WARN("fail to serialize data block id arr", K(ret), KP(buf), K(buf_len)); + } else if (OB_FAIL(shared_meta_block_info_arr_.serialize(buf, buf_len, meta_pos))) { + LOG_WARN("fail to serialize shared meta block id arr", K(ret), KP(buf), K(buf_len)); + } else if (OB_FAIL(shared_data_block_info_arr_.serialize(buf, buf_len, meta_pos))) { + LOG_WARN("fail to serialize shared data block id arr", K(ret), KP(buf), K(buf_len)); + } else { + meta_header.checksum_ = ob_crc64(buf + pos + header_size, total_size - header_size); + meta_header.payload_size_ = total_size - header_size; + if (OB_FAIL(meta_header.serialize(buf, buf_len, header_pos))) { + LOG_WARN("fail to serialize secondary meta header", K(ret), K(meta_header)); + } else { + pos = meta_pos; + } + } + return ret; +} + +int ObTabletMacroInfo::deserialize(ObArenaAllocator &allocator, const char *buf, const int64_t data_len, int64_t &pos) +{ + int ret = OB_SUCCESS; + ObSecondaryMetaHeader meta_header; + int32_t crc = 0; + int64_t new_pos = pos; + int64_t version = 0; + int64_t size = 0; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("ObTabletMacroInfo has been inited", K(ret)); + } else if (OB_ISNULL(buf) || OB_UNLIKELY(data_len <= 0 || pos < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(buf), K(data_len), K(pos)); + } else if (OB_FAIL(meta_header.deserialize(buf, data_len, new_pos))) { + LOG_WARN("fail to deserialize secondary meta header", K(ret), KP(buf), K(data_len), K(new_pos)); + } else if (FALSE_IT(crc = ob_crc64(buf + new_pos, meta_header.payload_size_))) { + } else if (OB_UNLIKELY(crc != meta_header.checksum_)) { + ret = OB_CHECKSUM_ERROR; + LOG_WARN("tablet macro info's checksum doesn't match", K(ret), K(meta_header), K(crc)); + } else if (OB_FAIL(serialization::decode_i64(buf, data_len, new_pos, &version))) { + LOG_WARN("fail to deserialize version", K(ret), KP(buf), K(data_len)); + } else if (OB_UNLIKELY(TABLET_MACRO_INFO_VERSION != version)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tablet macro info's version doesn't match", K(ret), K(version)); + } else if (OB_FAIL(serialization::decode_i64(buf, data_len, new_pos, &size))) { + LOG_WARN("fail to deserialize size", K(ret), KP(buf), K(data_len)); + } else if (new_pos - pos < size && OB_FAIL(entry_block_.deserialize(buf, data_len, new_pos))) { + LOG_WARN("fail to deserialize entry block", K(ret), KP(buf), K(data_len)); + } else if (new_pos - pos < size && OB_FAIL(meta_block_info_arr_.deserialize(allocator, buf, data_len, new_pos))) { + LOG_WARN("fail to deserialize meta block id array", K(ret), KP(buf), K(data_len)); + } else if (new_pos - pos < size && OB_FAIL(data_block_info_arr_.deserialize(allocator, buf, data_len, new_pos))) { + LOG_WARN("fail to deserialize data block id array", K(ret), KP(buf), K(data_len)); + } else if (new_pos - pos < size && OB_FAIL(shared_meta_block_info_arr_.deserialize(allocator, buf, data_len, new_pos))) { + LOG_WARN("fail to deserialize shared meta block id array", K(ret), KP(buf), K(data_len)); + } else if (new_pos - pos < size && OB_FAIL(shared_data_block_info_arr_.deserialize(allocator, buf, data_len, new_pos))) { + LOG_WARN("fail to deserialize shared data block id array", K(ret), KP(buf), K(data_len)); + } else if (OB_UNLIKELY(new_pos - pos != size)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet macro info's size doesn't match", K(ret), K(new_pos), K(pos), K(size), K(meta_block_info_arr_), K(data_block_info_arr_)); + } else { + pos = new_pos; + is_inited_ = true; + } + return ret; +} + +int64_t ObTabletMacroInfo::get_serialize_size() const +{ + ObSecondaryMetaHeader meta_header; + int64_t version = 0; + int64_t size = 0; + int64_t len = serialization::encoded_length_i64(version); + len += serialization::encoded_length_i64(size); + len += entry_block_.get_serialize_size(); + len += meta_block_info_arr_.get_serialize_size(); + len += data_block_info_arr_.get_serialize_size(); + len += shared_meta_block_info_arr_.get_serialize_size(); + len += shared_data_block_info_arr_.get_serialize_size(); + len += meta_header.get_serialize_size(); + return len; +} + +int64_t ObTabletMacroInfo::get_deep_copy_size() const +{ + int64_t len = sizeof(ObTabletMacroInfo); + if (IS_EMPTY_BLOCK_LIST(entry_block_)) { + len += meta_block_info_arr_.get_deep_copy_size(); + len += data_block_info_arr_.get_deep_copy_size(); + len += shared_meta_block_info_arr_.get_deep_copy_size(); + len += shared_data_block_info_arr_.get_deep_copy_size(); + } + return len; +} + +int ObTabletMacroInfo::deep_copy(char *buf, const int64_t buf_len, ObTabletMacroInfo *&dest_obj) const +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + const int64_t memory_size = get_deep_copy_size(); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObBlockInfoArray hasn't been inited", K(ret)); + } else if (OB_ISNULL(buf) || OB_UNLIKELY(buf_len <= 0 || buf_len < memory_size)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), KP(buf), K(buf_len), K(memory_size)); + } else { + ObTabletMacroInfo *tablet_macro_info = new (buf) ObTabletMacroInfo(); + pos = sizeof(ObTabletMacroInfo); + tablet_macro_info->entry_block_ = entry_block_; + if (IS_EMPTY_BLOCK_LIST(entry_block_)) { + if (OB_FAIL(meta_block_info_arr_.deep_copy(buf, buf_len, pos, tablet_macro_info->meta_block_info_arr_))) { + LOG_WARN("fail to deep copy meta block id arr", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(data_block_info_arr_.deep_copy(buf, buf_len, pos, tablet_macro_info->data_block_info_arr_))) { + LOG_WARN("fail to deep copy data block id arr", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(shared_meta_block_info_arr_.deep_copy(buf, buf_len, pos, tablet_macro_info->shared_meta_block_info_arr_))) { + LOG_WARN("fail to deep copy shared meta block id arr", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(shared_data_block_info_arr_.deep_copy(buf, buf_len, pos, tablet_macro_info->shared_data_block_info_arr_))) { + LOG_WARN("fail to deep copy shared data block info arr", K(ret), K(buf_len), K(pos)); + } + } else { + ObArenaAllocator dummy_allocator; + if (OB_FAIL(tablet_macro_info->meta_block_info_arr_.init(0, dummy_allocator))) { + LOG_WARN("fail to init empty meta block info arr", K(ret)); + } else if (OB_FAIL(tablet_macro_info->data_block_info_arr_.init(0, dummy_allocator))) { + LOG_WARN("fail to init empty data block info arr", K(ret)); + } else if (OB_FAIL(tablet_macro_info->shared_meta_block_info_arr_.init(0, dummy_allocator))) { + LOG_WARN("fail to init empty shared meta block info arr", K(ret)); + } else if (OB_FAIL(tablet_macro_info->shared_data_block_info_arr_.init(0, dummy_allocator))) { + LOG_WARN("fail to init empty shared data block info arr", K(ret)); + } + } + if (OB_SUCC(ret)) { + dest_obj = tablet_macro_info; + dest_obj->is_inited_ = is_inited_; + } + } + return ret; +} + +bool ObTabletMacroInfo::is_valid() const +{ + return entry_block_.is_valid() + && meta_block_info_arr_.is_valid() + && data_block_info_arr_.is_valid() + && shared_meta_block_info_arr_.is_valid() + && shared_data_block_info_arr_.is_valid(); +} + +int ObTabletMacroInfo::get_all_macro_ids( + ObIArray &meta_block_arr, + ObIArray &data_block_arr, + ObIArray &shared_meta_block_arr, + ObIArray &shared_data_block_arr) +{ + int ret = OB_SUCCESS; + if (IS_EMPTY_BLOCK_LIST(entry_block_)) { + if (OB_FAIL(get_macro_ids_without_io( + meta_block_arr, + data_block_arr, + shared_meta_block_arr, + shared_data_block_arr))) { + LOG_WARN("fail to get macro ids without io", K(ret)); + } + } else { + if (OB_FAIL(get_macro_ids_with_io( + meta_block_arr, + data_block_arr, + shared_meta_block_arr, + shared_data_block_arr))) { + LOG_WARN("fail to get macro ids with io", K(ret)); + } + } + return ret; +} + +int ObTabletMacroInfo::get_macro_ids_without_io( + ObIArray &meta_block_arr, + ObIArray &data_block_arr, + ObIArray &shared_meta_block_arr, + ObIArray &shared_data_block_arr) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(parse_id_arr(meta_block_info_arr_, meta_block_arr))) { + LOG_WARN("fail to parse meta id array", K(ret), K(meta_block_info_arr_)); + } else if (OB_FAIL(parse_id_arr(data_block_info_arr_, data_block_arr))) { + LOG_WARN("fail to parse data id array", K(ret), K(data_block_info_arr_)); + } else if (OB_FAIL(parse_id_arr(shared_meta_block_info_arr_, shared_meta_block_arr))) { + LOG_WARN("fail to parse shared meta id array", K(ret), K(shared_meta_block_info_arr_)); + } else if (OB_FAIL(parse_info_arr(shared_data_block_info_arr_, shared_data_block_arr))) { + LOG_WARN("fail to parse shared data id array", K(ret), K(shared_data_block_info_arr_)); + } + return ret; +} + +int ObTabletMacroInfo::parse_id_arr( + const ObBlockInfoArray &info_arr, + ObIArray &id_arr) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < info_arr.cnt_; i++) { + if (OB_FAIL(id_arr.push_back(info_arr.arr_[i]))) { + LOG_WARN("fail to push back macro id", K(ret), K(i), K(info_arr.arr_[i])); + } + } + return ret; +} + +int ObTabletMacroInfo::parse_info_arr( + const ObBlockInfoArray &info_arr, + ObIArray &id_arr) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < info_arr.cnt_; i++) { + if (OB_FAIL(id_arr.push_back(info_arr.arr_[i].shared_macro_id_))) { + LOG_WARN("fail to push back macro id", K(ret), K(i), K(info_arr.arr_[i])); + } + } + return ret; +} + +int ObTabletMacroInfo::get_macro_ids_with_io( + ObIArray &meta_block_arr, + ObIArray &data_block_arr, + ObIArray &shared_meta_block_arr, + ObIArray &shared_data_block_arr) +{ + int ret = OB_SUCCESS; + ObLinkedMacroBlockItemReader block_reader; + char *buf = nullptr; + int64_t buf_len = 0; + ObMetaDiskAddr addr; + if (OB_UNLIKELY(!entry_block_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("entry block is invalid", K(ret), K(entry_block_)); + } else { + if (OB_FAIL(block_reader.init(entry_block_))) { + LOG_WARN("fail to init linked block item reader", K(ret), K(entry_block_)); + } else if (OB_FAIL(block_reader.get_next_item(buf, buf_len, addr))) { + LOG_WARN("fail to get next item", K(ret)); + } else if (OB_FAIL(parse_id_buf(buf, buf_len, meta_block_arr))) { + LOG_WARN("fail to parse meta block info buf", K(ret), K(buf_len), KP(buf)); + } else if (OB_FAIL(block_reader.get_next_item(buf, buf_len, addr))) { + LOG_WARN("fail to get next item", K(ret)); + } else if (OB_FAIL(parse_id_buf(buf, buf_len, data_block_arr))) { + LOG_WARN("fail to parse data block info buf", K(ret), K(buf_len), KP(buf)); + } else if (OB_FAIL(block_reader.get_next_item(buf, buf_len, addr))) { + LOG_WARN("fail to get next item", K(ret)); + } else if (OB_FAIL(parse_id_buf(buf, buf_len, shared_meta_block_arr))) { + LOG_WARN("fail to parse shared meta block info buf", K(ret), K(buf_len), KP(buf)); + } else if (OB_FAIL(block_reader.get_next_item(buf, buf_len, addr))) { + LOG_WARN("fail to get next item", K(ret)); + } else if (OB_FAIL(parse_info_buf(buf, buf_len, shared_data_block_arr))) { + LOG_WARN("fail to parse shared data block info buf", K(ret), K(buf_len), KP(buf)); + } + } + return ret; +} + +int ObTabletMacroInfo::parse_info_buf( + const char *buf, + const int64_t buf_len, + ObIArray &block_id_arr) +{ + int ret = OB_SUCCESS; + int64_t cnt = 0; + int64_t pos = 0; + ObSharedBlockInfo block_info; + int16_t macro_type; + ObArenaAllocator allocator; + ObBlockInfoArray block_info_arr; + if (OB_FAIL(serialization::decode_i16(buf, buf_len, pos, ¯o_type))) { + LOG_WARN("fail to deserialize macro type", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(block_info_arr.deserialize(allocator, buf, buf_len, pos))) { + LOG_WARN("fail to deserialize block info arr", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(parse_info_arr(block_info_arr, block_id_arr))) { + LOG_WARN("fail to parse info arr", K(ret), K(block_info_arr)); + } + return ret; +} + +int ObTabletMacroInfo::parse_id_buf( + const char *buf, + const int64_t buf_len, + ObIArray &block_id_arr) +{ + int ret = OB_SUCCESS; + int64_t cnt = 0; + int64_t pos = 0; + MacroBlockId macro_id; + int16_t macro_type; + ObArenaAllocator allocator; + ObBlockInfoArray block_info_arr; + if (OB_FAIL(serialization::decode_i16(buf, buf_len, pos, ¯o_type))) { + LOG_WARN("fail to deserialize macro type", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(block_info_arr.deserialize(allocator, buf, buf_len, pos))) { + LOG_WARN("fail to deserialize block info arr", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(parse_id_arr(block_info_arr, block_id_arr))) { + LOG_WARN("fail to parse id arr", K(ret), K(block_info_arr)); + } + return ret; +} + +int ObTabletMacroInfo::inc_macro_ref(bool &inc_success) const +{ + int ret = OB_SUCCESS; + inc_success = false; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("tablet macro info hasnt' been inited", K(ret)); + } else if (IS_EMPTY_BLOCK_LIST(entry_block_)) { + if (OB_FAIL(inc_macro_ref_without_io())) { + LOG_WARN("fail to increase macro ref cnt without io", K(ret)); + } + } else { + if (OB_FAIL(inc_macro_ref_with_io())) { + LOG_WARN("fail to increase macro ref cnt with io", K(ret)); + } + } + if (OB_SUCC(ret)) { + inc_success = true; + } + return ret; +} + +void ObTabletMacroInfo::dec_macro_ref() const +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("tablet macro info hasnt' been inited", K(ret)); + } else if (IS_EMPTY_BLOCK_LIST(entry_block_)) { + dec_macro_ref_without_io(); + } else { + dec_macro_ref_with_io(); + } +} + +void ObTabletMacroInfo::dec_macro_ref_with_io() const +{ + int ret = OB_SUCCESS; + ObLinkedMacroBlockItemReader block_reader; + char *meta_id_buf = nullptr; + int64_t meta_id_len = 0; + char *data_id_buf = nullptr; + int64_t data_id_len = 0; + char *shared_meta_id_buf = nullptr; + int64_t shared_meta_id_len = 0; + char *shared_data_info_buf = nullptr; + int64_t shared_data_info_len = 0; + ObMetaDiskAddr addr; + if (OB_UNLIKELY(!entry_block_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("entry block is invalid", K(ret), K(entry_block_)); + } else { + do { + block_reader.reset(); + if (OB_FAIL(block_reader.init(entry_block_))) { + LOG_WARN("fail to init linked block item reader", K(ret), K(entry_block_)); + } else if (OB_FAIL(block_reader.get_next_item(meta_id_buf, meta_id_len, addr))) { + LOG_WARN("fail to get next item", K(ret)); + } else if (OB_FAIL(block_reader.get_next_item(data_id_buf, data_id_len, addr))) { + LOG_WARN("fail to get next item", K(ret)); + } else if (OB_FAIL(block_reader.get_next_item(shared_meta_id_buf, shared_meta_id_len, addr))) { + LOG_WARN("fail to get next item", K(ret)); + } else if (OB_FAIL(block_reader.get_next_item(shared_data_info_buf, shared_data_info_len, addr))) { + LOG_WARN("fail to get next item", K(ret)); + } + } while(ignore_ret(ret)); + if (OB_FAIL(ret)) { + LOG_ERROR("fail to read macro id from disk, macro blocks may leak", K(ret)); + } else { + deserialize_and_dec_macro_ref(meta_id_buf, meta_id_len); + deserialize_and_dec_macro_ref(data_id_buf, data_id_len); + deserialize_and_dec_macro_ref(shared_meta_id_buf, shared_meta_id_len); + deserialize_and_dec_shared_macro_ref(shared_data_info_buf, shared_data_info_len); + dec_linked_block_ref_cnt(block_reader.get_meta_block_list()); + } + } +} + +int ObTabletMacroInfo::inc_macro_ref_with_io() const +{ + int ret = OB_SUCCESS; + ObLinkedMacroBlockItemReader block_reader; + char *meta_id_buf = nullptr; + int64_t meta_id_len = 0; + char *data_id_buf = nullptr; + int64_t data_id_len = 0; + char *shared_meta_id_buf = nullptr; + int64_t shared_meta_id_len = 0; + char *shared_data_info_buf = nullptr; + int64_t shared_data_info_len = 0; + ObMetaDiskAddr addr; + bool inc_meta_id_success = false; + bool inc_data_id_success = false; + bool inc_shared_meta_id_success = false; + bool inc_shared_data_id_success = false; + bool inc_linked_id_success = false; + if (OB_UNLIKELY(!entry_block_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("entry block is invalid", K(ret), K(entry_block_)); + } else if (OB_FAIL(block_reader.init(entry_block_))) { + LOG_WARN("fail to init linked block item reader", K(ret), K(entry_block_)); + } else if (OB_FAIL(block_reader.get_next_item(meta_id_buf, meta_id_len, addr))) { + LOG_WARN("fail to get next item", K(ret)); + } else if (OB_FAIL(deserialize_and_inc_macro_ref(meta_id_buf, meta_id_len, inc_meta_id_success))) { + LOG_WARN("fail to deserialize and inc macro ref", K(ret)); + } else if (OB_FAIL(block_reader.get_next_item(data_id_buf, data_id_len, addr))) { + LOG_WARN("fail to get next item", K(ret)); + } else if (OB_FAIL(deserialize_and_inc_macro_ref(data_id_buf, data_id_len, inc_data_id_success))) { + LOG_WARN("fail to deserialize and inc macro ref", K(ret)); + } else if (OB_FAIL(block_reader.get_next_item(shared_meta_id_buf, shared_meta_id_len, addr))) { + LOG_WARN("fail to get next item", K(ret)); + } else if (OB_FAIL(deserialize_and_inc_macro_ref(shared_meta_id_buf, shared_meta_id_len, inc_shared_meta_id_success))) { + LOG_WARN("fail to deserialize and inc macro ref", K(ret)); + } else if (OB_FAIL(block_reader.get_next_item(shared_data_info_buf, shared_data_info_len, addr))) { + LOG_WARN("fail to get next item", K(ret)); + } else if (OB_FAIL(deserialize_and_inc_shared_macro_ref(shared_data_info_buf, shared_data_info_len, inc_shared_data_id_success))) { + LOG_WARN("fail to deserialize and inc macro ref", K(ret)); + } else if (OB_FAIL(inc_linked_block_ref_cnt(block_reader.get_meta_block_list(), inc_linked_id_success))) { + LOG_WARN("fail to inc linked macro ref", K(ret)); + } + if (OB_FAIL(ret)) { + if (inc_meta_id_success) { + deserialize_and_dec_macro_ref(meta_id_buf, meta_id_len); + } + if (inc_data_id_success) { + deserialize_and_dec_macro_ref(data_id_buf, data_id_len); + } + if (inc_shared_meta_id_success) { + deserialize_and_dec_macro_ref(shared_meta_id_buf, shared_meta_id_len); + } + if (inc_shared_data_id_success) { + deserialize_and_dec_shared_macro_ref(shared_data_info_buf, shared_data_info_len); + } + if (inc_linked_id_success) { + dec_linked_block_ref_cnt(block_reader.get_meta_block_list()); + } + } + return ret; +} + +int ObTabletMacroInfo::inc_linked_block_ref_cnt( + const ObIArray &linked_block_list, + bool &inc_macro_id_success) const +{ + int ret = OB_SUCCESS; + int64_t inc_cnt = 0; + inc_macro_id_success = false; + for (int64_t i = 0; OB_SUCC(ret) && i < linked_block_list.count(); i++) { + if (OB_FAIL(OB_SERVER_BLOCK_MGR.inc_ref(linked_block_list.at(i)))) { + LOG_WARN("fail to increase ref cnt for linked block", K(ret), K(i), K(linked_block_list.at(i))); + } else { + inc_cnt++; + } + } + if (OB_FAIL(ret)) { + int tmp_ret = OB_SUCCESS; + for (int64_t i = 0; i < inc_cnt; i++) { + if (OB_FAIL(OB_SERVER_BLOCK_MGR.dec_ref(linked_block_list.at(i)))) { + LOG_WARN("fail to decrease ref cnt for linked block", K(tmp_ret), K(i), K(linked_block_list.at(i))); + } + } + } else { + inc_macro_id_success = true; + } + return ret; +} + +void ObTabletMacroInfo::dec_linked_block_ref_cnt(const ObIArray &linked_block_list) const +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; i < linked_block_list.count(); i++) { + if (OB_FAIL(OB_SERVER_BLOCK_MGR.dec_ref(linked_block_list.at(i)))) { + LOG_WARN("fail to decrease ref cnt for linked block", K(ret), K(i), K(linked_block_list.at(i))); + } + } +} + +void ObTabletMacroInfo::deserialize_and_dec_macro_ref(const char *buf, const int64_t buf_len) const +{ + int ret = OB_SUCCESS; + int64_t cnt = 0; + int64_t pos = 0; + MacroBlockId macro_id; + int16_t macro_type; + ObArenaAllocator allocator; + ObBlockInfoArray block_id_arr; + if (OB_FAIL(serialization::decode_i16(buf, buf_len, pos, ¯o_type))) { + LOG_WARN("fail to deserialize macro type", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(block_id_arr.deserialize(allocator, buf, buf_len, pos))) { + LOG_WARN("fail to deserialize block id arr", K(ret), K(buf_len), K(pos)); + } else { + do_dec_macro_ref(block_id_arr); + } +} + +void ObTabletMacroInfo::deserialize_and_dec_shared_macro_ref(const char *buf, const int64_t buf_len) const +{ + int ret = OB_SUCCESS; + int64_t cnt = 0; + int64_t pos = 0; + ObSharedBlockInfo block_info; + int16_t macro_type; + ObArenaAllocator allocator; + ObBlockInfoArray block_info_arr; + if (OB_FAIL(serialization::decode_i16(buf, buf_len, pos, ¯o_type))) { + LOG_WARN("fail to deserialize macro type", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(block_info_arr.deserialize(allocator, buf, buf_len, pos))) { + LOG_WARN("fail to deserialize block id arr", K(ret), K(buf_len), K(pos)); + } else { + do_dec_macro_ref(block_info_arr); + } +} + +int ObTabletMacroInfo::deserialize_and_inc_macro_ref(const char *buf, const int64_t buf_len, bool &inc_success) const +{ + int ret = OB_SUCCESS; + int64_t cnt = 0; + int64_t pos = 0; + int64_t inc_cnt = 0; + int64_t id_pos = 0; + MacroBlockId macro_id; + inc_success = false; + int16_t macro_type; + ObArenaAllocator allocator; + ObBlockInfoArray block_id_arr; + if (OB_FAIL(serialization::decode_i16(buf, buf_len, pos, ¯o_type))) { + LOG_WARN("fail to deserialize macro type", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(block_id_arr.deserialize(allocator, buf, buf_len, pos))) { + LOG_WARN("fail to deserialize block id arr", K(ret), KP(buf), K(buf_len)); + } else if (OB_FAIL(do_inc_macro_ref(block_id_arr, inc_success))) { + LOG_WARN("fail to increase macro ref", K(ret), K(block_id_arr)); + } + return ret; +} + +int ObTabletMacroInfo::deserialize_and_inc_shared_macro_ref(const char *buf, const int64_t buf_len, bool &inc_success) const +{ + int ret = OB_SUCCESS; + int64_t cnt = 0; + int64_t pos = 0; + int64_t id_pos = 0; + int64_t macro_ref_inc_cnt = 0; + int64_t occupy_size_inc_cnt = 0; + ObSharedBlockInfo block_info; + int16_t macro_type; + inc_success = false; + ObArenaAllocator allocator; + ObBlockInfoArray block_info_arr; + if (OB_FAIL(serialization::decode_i16(buf, buf_len, pos, ¯o_type))) { + LOG_WARN("fail to deserialize macro id cnt", K(ret), K(buf_len)); + } else if (OB_FAIL(block_info_arr.deserialize(allocator, buf, buf_len, pos))) { + LOG_WARN("fail to deserialize block id arr", K(ret), KP(buf), K(buf_len)); + } else if (OB_FAIL(do_inc_macro_ref(block_info_arr, inc_success))) { + LOG_WARN("fail to increase macro ref", K(ret), K(block_info_arr)); + } + return ret; +} + +void ObTabletMacroInfo::dec_macro_ref_without_io() const +{ + int ret = OB_SUCCESS; + do_dec_macro_ref(data_block_info_arr_); + do_dec_macro_ref(meta_block_info_arr_); + do_dec_macro_ref(shared_meta_block_info_arr_); + do_dec_macro_ref(shared_data_block_info_arr_); +} + +int ObTabletMacroInfo::inc_macro_ref_without_io() const +{ + int ret = OB_SUCCESS; + bool inc_data_macro_id_success = false; + bool inc_meta_macro_id_success = false; + bool inc_shared_meta_macro_id_success = false; + bool inc_shared_data_macro_id_success = false; + if (OB_FAIL(do_inc_macro_ref(meta_block_info_arr_, inc_meta_macro_id_success))) { + LOG_WARN("fail to increase meta blocks' ref cnt", K(ret), K(meta_block_info_arr_)); + } else if (OB_FAIL(do_inc_macro_ref(data_block_info_arr_, inc_data_macro_id_success))) { + LOG_WARN("fail to increase data blocks' ref cnt", K(ret), K(data_block_info_arr_)); + } else if (OB_FAIL(do_inc_macro_ref(shared_meta_block_info_arr_, inc_shared_meta_macro_id_success))) { + LOG_WARN("fail to increase shared meta blocks' ref cnt", K(ret), K(shared_meta_block_info_arr_)); + } else if (OB_FAIL(do_inc_macro_ref(shared_data_block_info_arr_, inc_shared_data_macro_id_success))) { + LOG_WARN("fail to increase shared data blocks' ref cnt and block size", K(ret), K(shared_data_block_info_arr_)); + } + if (OB_FAIL(ret)) { + if (inc_data_macro_id_success) { + do_dec_macro_ref(data_block_info_arr_); + } + if (inc_meta_macro_id_success) { + do_dec_macro_ref(meta_block_info_arr_); + } + if (inc_shared_meta_macro_id_success) { + do_dec_macro_ref(shared_meta_block_info_arr_); + } + if (inc_shared_data_macro_id_success) { + do_dec_macro_ref(shared_data_block_info_arr_); + } + } + return ret; +} + +int ObTabletMacroInfo::do_inc_macro_ref(const ObBlockInfoArray &block_info_arr, bool &inc_macro_id_success) const +{ + int ret = OB_SUCCESS; + inc_macro_id_success = false; + int64_t macro_ref_inc_cnt = 0; + int64_t occupy_size_inc_cnt = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < block_info_arr.cnt_; i++) { + const MacroBlockId ¯o_id = block_info_arr.arr_[i].shared_macro_id_; + const int64_t occupy_size = block_info_arr.arr_[i].occupy_size_; + if (OB_UNLIKELY(!macro_id.is_valid() || occupy_size <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("macro_id/occupy_size is invalid", K(ret), K(macro_id), K(occupy_size)); + } else if (OB_FAIL(OB_SERVER_BLOCK_MGR.inc_ref(macro_id))) { + LOG_WARN("fail to increase macro ref cnt", K(ret), K(macro_id)); + } else if (FALSE_IT(macro_ref_inc_cnt++)) { + } else if (OB_FAIL(MTL(ObSharedMacroBlockMgr*)->add_block(macro_id, occupy_size))) { + LOG_WARN("fail to increase shared block's occupy size", K(ret), K(macro_id), K(occupy_size)); + } else { + occupy_size_inc_cnt++; + } + } + if (OB_FAIL(ret)) { + int tmp_ret = OB_SUCCESS; + // no need to check OB_SUCC(ret) + for (int64_t i = 0; i < macro_ref_inc_cnt; i++) { + const MacroBlockId ¯o_id = block_info_arr.arr_[i].shared_macro_id_; + const int64_t occupy_size = block_info_arr.arr_[i].occupy_size_; + if (OB_TMP_FAIL(OB_SERVER_BLOCK_MGR.dec_ref(macro_id))) { + LOG_WARN("fail to decrease macro ref cnt", K(tmp_ret), K(macro_id)); + } else if (i < occupy_size_inc_cnt && OB_TMP_FAIL(MTL(ObSharedMacroBlockMgr*)->free_block(macro_id, occupy_size))) { + LOG_WARN("fail to decrease shared block's occupy size", K(tmp_ret), K(macro_id), K(occupy_size)); + } + } + } else { + inc_macro_id_success = true; + } + return ret; +} + +int ObTabletMacroInfo::do_inc_macro_ref(const ObBlockInfoArray &block_info_arr, bool &inc_macro_id_success) const +{ + int ret = OB_SUCCESS; + inc_macro_id_success = false; + int64_t increased_id_cnt = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < block_info_arr.cnt_; i++) { + const MacroBlockId ¯o_id = block_info_arr.arr_[i]; + if (OB_UNLIKELY(!macro_id.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("macro id is invalid", K(ret), K(macro_id)); + } else if (OB_FAIL(OB_SERVER_BLOCK_MGR.inc_ref(macro_id))) { + LOG_WARN("fail to increase macro ref cnt", K(ret), K(macro_id)); + } else { + increased_id_cnt++; + } + } + if (OB_FAIL(ret)) { + int tmp_ret = OB_SUCCESS; + // no need to check OB_SUCC(ret) + for (int64_t i = 0; i < increased_id_cnt; i++) { + const MacroBlockId ¯o_id = block_info_arr.arr_[i]; + if (OB_TMP_FAIL(OB_SERVER_BLOCK_MGR.dec_ref(macro_id))) { + LOG_WARN("fail to decrease macro ref cnt", K(tmp_ret), K(macro_id)); + } + } + } else { + inc_macro_id_success = true; + } + return ret; +} + +void ObTabletMacroInfo::do_dec_macro_ref(const ObBlockInfoArray &block_info_arr) const +{ + int ret = OB_SUCCESS; + // no need to check OB_SUCC(ret) + for (int64_t i = 0; i < block_info_arr.cnt_; i++) { + const MacroBlockId ¯o_id = block_info_arr.arr_[i].shared_macro_id_; + const int64_t occupy_size = block_info_arr.arr_[i].occupy_size_; + if (OB_UNLIKELY(!macro_id.is_valid() || occupy_size <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("macro_id/occupy_size is invalid", K(ret), K(macro_id), K(occupy_size)); + } else if (OB_FAIL(OB_SERVER_BLOCK_MGR.dec_ref(macro_id))) { + LOG_WARN("fail to decrease macro ref cnt", K(ret), K(macro_id)); + } else if (OB_FAIL(MTL(ObSharedMacroBlockMgr*)->free_block(macro_id, occupy_size))) { + LOG_WARN("fail to decrease shared block's occupy size", K(ret), K(macro_id), K(occupy_size)); + } + } +} + +void ObTabletMacroInfo::do_dec_macro_ref(const ObBlockInfoArray &block_info_arr) const +{ + int ret = OB_SUCCESS; + // no need to check OB_SUCC(ret) + for (int64_t i = 0; i < block_info_arr.cnt_; i++) { + const MacroBlockId ¯o_id = block_info_arr.arr_[i]; + if (OB_UNLIKELY(!macro_id.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("macro id is invalid", K(ret), K(macro_id)); + } else if (OB_FAIL(OB_SERVER_BLOCK_MGR.dec_ref(macro_id))) { + LOG_WARN("fail to decrease macro ref cnt", K(ret), K(macro_id)); + } + } +} + +bool ObTabletMacroInfo::ignore_ret(const int ret) +{ + return OB_ALLOCATE_MEMORY_FAILED == ret || OB_TIMEOUT == ret || OB_DISK_HUNG == ret; +} +} // storage +} // oceanbase \ No newline at end of file diff --git a/src/storage/tablet/ob_tablet_block_aggregated_info.h b/src/storage/tablet/ob_tablet_block_aggregated_info.h new file mode 100644 index 0000000000..001b420c47 --- /dev/null +++ b/src/storage/tablet/ob_tablet_block_aggregated_info.h @@ -0,0 +1,250 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_STORAGE_OB_TABLET_BLOCK_AGGREGATED_INFO +#define OCEANBASE_STORAGE_OB_TABLET_BLOCK_AGGREGATED_INFO + +#include +#include "lib/ob_errno.h" +#include "lib/utility/ob_macro_utils.h" +#include "lib/utility/ob_unify_serialize.h" +#include "storage/blocksstable/ob_macro_block_id.h" +#include "lib/container/ob_array_serialization.h" +#include "lib/hash/ob_hashset.h" + +namespace oceanbase +{ +namespace storage +{ +class ObLinkedMacroBlockItemWriter; +class ObLinkedMacroBlockItemReader; + +enum class ObTabletMacroType : int16_t +{ + INVALID_TYPE = 0, + META_BLOCK = 1, + DATA_BLOCK = 2, + SHARED_META_BLOCK = 3, + SHARED_DATA_BLOCK = 4, + MAX + +}; +struct ObSharedBlockInfo final +{ +public: + ObSharedBlockInfo() + : shared_macro_id_(), occupy_size_() + { + } + ObSharedBlockInfo(const blocksstable::MacroBlockId &shared_macro_id, const int64_t occupy_size) + : shared_macro_id_(shared_macro_id), occupy_size_(occupy_size) + { + } + ~ObSharedBlockInfo() + { + reset(); + } + void reset() + { + shared_macro_id_.reset(); + occupy_size_ = 0; + } + OB_INLINE bool is_valid() + { + return shared_macro_id_.is_valid() && occupy_size_ >= 0; + } + int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; + int deserialize(const char *buf, const int64_t data_len, int64_t &pos); + int64_t get_serialize_size() const; + TO_STRING_KV(K_(shared_macro_id), K_(occupy_size)); +public: + blocksstable::MacroBlockId shared_macro_id_; + int64_t occupy_size_; +}; + +struct ObBlockInfoSet +{ +public: + static const int64_t SHARED_BLOCK_BUCKET_NUM = 10; + static const int64_t EXCLUSIVE_BLOCK_BUCKET_NUM = 10000; + static const int64_t MAP_EXTEND_RATIO = 2; + + typedef typename common::hash::ObHashSet, + common::hash::equal_to, + common::hash::SimpleAllocer::AllocType>, + common::hash::NormalPointer, + oceanbase::common::ObMalloc, + MAP_EXTEND_RATIO> TabletMacroSet; + typedef typename common::hash::ObHashMap, + common::hash::equal_to, + common::hash::SimpleAllocer::AllocType>, + common::hash::NormalPointer, + oceanbase::common::ObMalloc, + MAP_EXTEND_RATIO> TabletMacroMap; + typedef typename TabletMacroSet::iterator SetIterator; + typedef typename TabletMacroMap::iterator MapIterator; +public: + ObBlockInfoSet() + : meta_block_info_set_(), data_block_info_set_(), shared_meta_block_info_set_(), shared_data_block_info_map_() + { + } + ~ObBlockInfoSet() + { + meta_block_info_set_.reuse(); + data_block_info_set_.reuse(); + shared_meta_block_info_set_.reuse(); + shared_data_block_info_map_.reuse(); + } + int init( + const int64_t meta_bucket_num = EXCLUSIVE_BLOCK_BUCKET_NUM, + const int64_t data_bucket_num = EXCLUSIVE_BLOCK_BUCKET_NUM, + const int64_t shared_meta_bucket_num = SHARED_BLOCK_BUCKET_NUM, + const int64_t shared_data_bucket_num = SHARED_BLOCK_BUCKET_NUM); + +public: + TabletMacroSet meta_block_info_set_; + TabletMacroSet data_block_info_set_; + TabletMacroSet shared_meta_block_info_set_; + TabletMacroMap shared_data_block_info_map_; +}; + +template +class ObBlockInfoArray final +{ +public: + ObBlockInfoArray(); + ~ObBlockInfoArray(); + void reset(); + int init(const int64_t cnt, ObArenaAllocator &allocator); + int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; + int deserialize(ObArenaAllocator &allocator, const char *buf, const int64_t data_len, int64_t &pos); + int64_t get_serialize_size() const; + int64_t get_deep_copy_size() const; + int deep_copy(char *buf, const int64_t buf_len, int64_t &pos, ObBlockInfoArray &dest_obj) const; + OB_INLINE bool is_valid() const + { + return (0 == cnt_ && nullptr == arr_) || (0 < cnt_ && nullptr != arr_); + } + TO_STRING_KV(K_(cnt), KP_(arr), K_(capacity), K_(is_inited)); + +public: + int64_t cnt_; + T *arr_; + + // no need to be persisted + int64_t capacity_; + bool is_inited_; +}; + +class ObTabletMacroInfo final +{ +public: + ObTabletMacroInfo(); + ~ObTabletMacroInfo(); + void reset(); + int init(ObArenaAllocator &allocator, ObBlockInfoSet &id_set, ObLinkedMacroBlockItemWriter &linked_writer); + int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; + int deserialize(ObArenaAllocator &allocator, const char *buf, const int64_t data_len, int64_t &pos); + int64_t get_serialize_size() const; + int64_t get_deep_copy_size() const; + int deep_copy(char *buf, const int64_t buf_len, ObTabletMacroInfo *&dest_obj) const; + bool is_valid() const; + + // inc and dec macro ref cnt, can only be called by tablet + int inc_macro_ref(bool &inc_success) const; + void dec_macro_ref() const; + int get_all_macro_ids( + ObIArray &meta_block_arr, + ObIArray &data_block_arr, + ObIArray &shared_meta_block_arr, + ObIArray &shared_data_block_arr); + TO_STRING_KV( + K_(entry_block), + K_(meta_block_info_arr), + K_(data_block_info_arr), + K_(shared_meta_block_info_arr), + K_(shared_data_block_info_arr), + K_(is_inited)); +private: + static bool ignore_ret(const int ret); + static int parse_info_arr( + const ObBlockInfoArray &info_arr, + ObIArray &id_arr); + static int parse_id_arr( + const ObBlockInfoArray &info_arr, + ObIArray &id_arr); + static int parse_info_buf( + const char *buf, + const int64_t buf_len, + ObIArray &block_id_arr); + static int parse_id_buf( + const char *buf, + const int64_t buf_len, + ObIArray &block_id_arr); + int get_macro_ids_with_io( + ObIArray &meta_block_arr, + ObIArray &data_block_arr, + ObIArray &shared_meta_block_arr, + ObIArray &shared_data_block_arr); + int get_macro_ids_without_io( + ObIArray &meta_block_arr, + ObIArray &data_block_arr, + ObIArray &shared_meta_block_arr, + ObIArray &shared_data_block_arr); + int inc_linked_block_ref_cnt( + const ObIArray &linked_block_list, + bool &inc_macro_id_success) const; + void dec_linked_block_ref_cnt(const ObIArray &linked_block_list) const; + int construct_block_id_arr(ObBlockInfoSet::TabletMacroSet &id_set, ObBlockInfoArray &block_id_arr); + int construct_block_info_arr(ObBlockInfoSet::TabletMacroMap &block_info_map, ObBlockInfoArray &block_info_arr); + int persist_macro_ids(ObArenaAllocator &allocator, ObLinkedMacroBlockItemWriter &linked_writer); + int do_flush_ids( + const ObTabletMacroType macro_type, + ObBlockInfoArray &block_id_arr, + ObArenaAllocator &allocator, + ObLinkedMacroBlockItemWriter &linked_writer); + int do_flush_ids( + ObBlockInfoArray &block_info_arr, + ObArenaAllocator &allocator, + ObLinkedMacroBlockItemWriter &linked_writer); + int inc_macro_ref_with_io() const; + int inc_macro_ref_without_io() const; + void dec_macro_ref_with_io() const; + void dec_macro_ref_without_io() const; + int deserialize_and_inc_macro_ref(const char *buf, const int64_t buf_len, bool &inc_success) const; + void deserialize_and_dec_macro_ref(const char *buf, const int64_t buf_len) const; + int deserialize_and_inc_shared_macro_ref(const char *buf, const int64_t buf_len, bool &inc_success) const; + void deserialize_and_dec_shared_macro_ref(const char *buf, const int64_t buf_len) const; + int do_inc_macro_ref(const ObBlockInfoArray &block_info_arr, bool &inc_macro_id_success) const; + void do_dec_macro_ref(const ObBlockInfoArray &block_info_arr) const; + int do_inc_macro_ref(const ObBlockInfoArray &block_info_arr, bool &inc_macro_id_success) const; + void do_dec_macro_ref(const ObBlockInfoArray &block_info_arr) const; +private: + static const int64_t ID_COUNT_THRESHOLD = 50000; // occupy almost 1.2MB disk space + static const int32_t TABLET_MACRO_INFO_VERSION = 1; +public: + blocksstable::MacroBlockId entry_block_; + ObBlockInfoArray meta_block_info_arr_; + ObBlockInfoArray data_block_info_arr_; + ObBlockInfoArray shared_meta_block_info_arr_; + ObBlockInfoArray shared_data_block_info_arr_; + bool is_inited_; +}; +} // storage +} // oceanbase + +#endif \ No newline at end of file diff --git a/src/storage/tablet/ob_tablet_block_header.cpp b/src/storage/tablet/ob_tablet_block_header.cpp new file mode 100644 index 0000000000..c592a5024d --- /dev/null +++ b/src/storage/tablet/ob_tablet_block_header.cpp @@ -0,0 +1,194 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include "storage/tablet/ob_tablet_block_header.h" +#include "lib/checksum/ob_crc64.h" + +namespace oceanbase +{ +namespace storage +{ + +int ObTabletBlockHeader::init(const int32_t inline_meta_count) +{ + int ret = OB_SUCCESS; + if (is_inited_) { + ret = OB_INIT_TWICE; + LOG_WARN("ObTabletBlockHeader has inited", K(ret)); + } else if (inline_meta_count > MAX_INLINE_META_COUNT) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("inline meta count is too large", K(ret), K(inline_meta_count)); + } else { + version_ = TABLET_VERSION_V3; + inline_meta_count_ = inline_meta_count; + is_inited_ = true; + } + return ret; +} + +int ObTabletBlockHeader::deserialize(const char* buf, const int64_t data_len, int64_t& pos) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(serialization::decode_i32(buf, data_len, pos, &version_))) { + LOG_WARN("failed to deserialize tablet version", K(ret), K(data_len), K(pos)); + } else if (OB_FAIL(serialization::decode_i32(buf, data_len, pos, &length_))) { + LOG_WARN("failed to deserialize tablet length", K(ret), K(data_len), K(pos)); + } else if (TABLET_VERSION_V3 != version_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("only tablet v3 has header", K(ret), K(version_), K(length_)); + } else if (OB_FAIL(serialization::decode_i32(buf, data_len, pos, &checksum_))) { + LOG_WARN("failed to deserialize checksum", K(ret), K(data_len), K(pos)); + } else if (OB_FAIL(serialization::decode_i32(buf, data_len, pos, &inline_meta_count_))) { + LOG_WARN("failed to deserialize tablet secondary_meta_count", K(ret), K(data_len), K(pos)); + } else if (OB_UNLIKELY(inline_meta_count_ > MAX_INLINE_META_COUNT)) { + ret = OB_DESERIALIZE_ERROR; + LOG_WARN("inline_meta_count is too large", K(ret), K(inline_meta_count_)); + } else { + const int64_t desc_array_len = inline_meta_count_ * sizeof(ObInlineSecondaryMetaDesc); + if (desc_array_len > 0) { + MEMCPY(desc_array_, buf + pos, desc_array_len); + pos += desc_array_len; + } + if (OB_UNLIKELY(data_len - pos < length_)) { + ret = OB_DESERIALIZE_ERROR; + LOG_WARN("buffer's length is not enough", K(ret), K(data_len), K(pos), K(length_), K(desc_array_len)); + } + } + + if (OB_SUCC(ret)) { + is_inited_ = true; + } + return ret; +} + +int64_t ObTabletBlockHeader::get_serialize_size(void) const +{ + int64_t len = 0; + len += serialization::encoded_length_i32(version_); + len += serialization::encoded_length_i32(length_); + len += serialization::encoded_length_i32(checksum_); + len += serialization::encoded_length_i32(inline_meta_count_); + len += inline_meta_count_ * sizeof(ObInlineSecondaryMetaDesc); + return len; +} + +int ObTabletBlockHeader::serialize(char* buf, const int64_t buf_len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + const int64_t size = get_serialize_size(); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObTabletBlockHeader not inited", K(ret)); + } else if (OB_UNLIKELY(pushed_inline_meta_cnt_ != inline_meta_count_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("inline meta count mismatch", K(ret), K(inline_meta_count_), K(pushed_inline_meta_cnt_)); + } else if (buf_len - pos < size) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("serilize overflow", K(ret), K(buf_len), K(pos), K(size)); + } else if (OB_FAIL(serialization::encode_i32(buf, buf_len, pos, version_))) { + LOG_WARN("fail to serialize verison", K(ret)); + } else if (OB_FAIL(serialization::encode_i32(buf, buf_len, pos, length_))) { + LOG_WARN("fail to serialize length", K(ret)); + } else if (OB_FAIL(serialization::encode_i32(buf, buf_len, pos, checksum_))) { + LOG_WARN("fail to serialize checksum", K(ret)); + } else if (OB_FAIL(serialization::encode_i32(buf, buf_len, pos, inline_meta_count_))) { + LOG_WARN("fail to serialize inline_meta_count", K(ret)); + } else { + if (inline_meta_count_ > 0) { + MEMCPY(buf + pos, desc_array_, inline_meta_count_ * sizeof(ObInlineSecondaryMetaDesc)); + pos += inline_meta_count_ * sizeof(ObInlineSecondaryMetaDesc); + } + } + return ret; +} + +int ObTabletBlockHeader::push_inline_meta(const ObInlineSecondaryMetaDesc &desc) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObTabletBlockHeader not inited", K(ret)); + } else if (OB_UNLIKELY(pushed_inline_meta_cnt_ >= inline_meta_count_)) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("pushed meta count is overflow", K(ret), K(inline_meta_count_)); + } else { + desc_array_[pushed_inline_meta_cnt_] = desc; + pushed_inline_meta_cnt_++; + } + return ret; +} + +void ObSecondaryMetaHeader::destroy() +{ + version_ = SECONDARY_META_HEADER_VERSION; + size_ = sizeof(ObSecondaryMetaHeader); + checksum_ = 0; + payload_size_ = 0; +} + +int ObSecondaryMetaHeader::deserialize(const char* buf, const int64_t data_len, int64_t& pos) +{ + int ret = OB_SUCCESS; + int64_t new_pos = pos; + if (OB_ISNULL(buf) || OB_UNLIKELY(0 >= data_len || 0 > pos || data_len - pos < get_serialize_size())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), KP(buf), K(data_len), K(pos)); + } else if (OB_FAIL(serialization::decode_i32(buf, data_len, new_pos, &version_))) { + LOG_WARN("fail to deserialize version", K(ret), K(data_len), K(new_pos)); + } else if (OB_UNLIKELY(SECONDARY_META_HEADER_VERSION != version_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("header version doesn't match", K(ret), K(version_)); + } else if (OB_FAIL(serialization::decode_i32(buf, data_len, new_pos, &size_))) { + LOG_WARN("fail to deserialize size", K(ret), K(data_len), K(new_pos)); + } else if (new_pos - pos < size_ && OB_FAIL(serialization::decode_i32(buf, data_len, new_pos, &checksum_))) { + LOG_WARN("fail to deserialize checksum", K(ret), K(data_len), K(new_pos)); + } else if (new_pos - pos < size_ && OB_FAIL(serialization::decode_i32(buf, data_len, new_pos, &payload_size_))) { + LOG_WARN("fail to deserialize length", K(ret), K(data_len), K(new_pos)); + } else if (OB_UNLIKELY(new_pos - pos != size_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("header size doesn't match", K(ret), K(new_pos), K(pos), K(size_)); + } else { + pos = new_pos; + } + return ret; +} + +int ObSecondaryMetaHeader::serialize(char* buf, const int64_t buf_len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(buf) || OB_UNLIKELY(0 >= buf_len || 0 > pos || buf_len - pos < get_serialize_size())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), KP(buf), K(buf_len), K(pos)); + } else if (OB_FAIL(serialization::encode_i32(buf, buf_len, pos, version_))) { + LOG_WARN("fail to serialize header version", K(ret), K(buf_len), K(pos), K_(version)); + } else if (OB_FAIL(serialization::encode_i32(buf, buf_len, pos, size_))) { + LOG_WARN("fail to serialize header size", K(ret), K(buf_len), K(pos), K_(size)); + } else if (OB_FAIL(serialization::encode_i32(buf, buf_len, pos, checksum_))) { + LOG_WARN("fail to serialize checksum", K(ret), K(buf_len), K_(checksum)); + } else if (OB_FAIL(serialization::encode_i32(buf, buf_len, pos, payload_size_))) { + LOG_WARN("fail to serialize length", K(ret), K(buf_len), K_(payload_size)); + } + return ret; +} + +int64_t ObSecondaryMetaHeader::get_serialize_size() const +{ + return serialization::encoded_length_i32(version_) + + serialization::encoded_length_i32(size_) + + serialization::encoded_length_i32(checksum_) + + serialization::encoded_length_i32(payload_size_); +} + +} // namespace storage +} // namespace oceanbase diff --git a/src/storage/tablet/ob_tablet_block_header.h b/src/storage/tablet/ob_tablet_block_header.h new file mode 100644 index 0000000000..ff2af54403 --- /dev/null +++ b/src/storage/tablet/ob_tablet_block_header.h @@ -0,0 +1,135 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_STORAGE_OB_TABLET_BLOCK_HEADER_H +#define OCEANBASE_STORAGE_OB_TABLET_BLOCK_HEADER_H + +#include +#include "lib/utility/ob_print_utils.h" +#include "lib/allocator/page_arena.h" +#include "lib/container/ob_array_wrap.h" +#include "lib/container/ob_array_serialization.h" + +namespace oceanbase +{ +namespace storage +{ +enum class ObSecondaryMetaType : uint8_t +{ + TABLET_MACRO_INFO = 0, + TABLE_STORE = 1, + STORAGE_SCHEMA = 2, + MDS_DATA = 3, + MAX = 4 +}; + +struct ObInlineSecondaryMetaDesc final +{ +public: + ObInlineSecondaryMetaDesc() + : type_(ObSecondaryMetaType::MAX), length_(0) + { + } + ObInlineSecondaryMetaDesc(const ObSecondaryMetaType type, const int32_t length) + : type_(type), length_(length) + { + } + + ObSecondaryMetaType type_; + int32_t length_; + + TO_STRING_KV(K_(type), K_(length)); + +} __attribute__((packed)); + +struct ObTabletBlockHeader final +{ +public: + static const int32_t TABLET_VERSION_V1 = 1; + static const int32_t TABLET_VERSION_V2 = 2; + static const int32_t TABLET_VERSION_V3 = 3; + + ObTabletBlockHeader() + : is_inited_(false), pushed_inline_meta_cnt_(0), + version_(TABLET_VERSION_V3), length_(0), + checksum_(0), inline_meta_count_(0) {} + + int init(const int32_t secondary_meta_count); + bool is_valid() const + { + return is_inited_ && version_ == TABLET_VERSION_V3 && length_ > 0 && checksum_ > 0 && inline_meta_count_ >= 0; + } + + NEED_SERIALIZE_AND_DESERIALIZE; + int push_inline_meta(const ObInlineSecondaryMetaDesc &desc); + + TO_STRING_KV(K_(version), K_(length), K_(checksum), "desc_array", + common::ObArrayWrap(desc_array_, inline_meta_count_)); + + int32_t get_version() const { return version_; } + int32_t get_length() const { return length_; } + + static int get_tablet_version(const char *buf, const int64_t len, int32_t &version); + +public: + static const int32_t MAX_INLINE_META_COUNT = 8; + bool is_inited_; + int32_t pushed_inline_meta_cnt_; + + // below need serialize + int32_t version_; + int32_t length_; // tablet first-level meta size + int32_t checksum_; // checksum for tablet first-level meta + int32_t inline_meta_count_; // inline meta refers the secondary meta which is stored consecutively with tablet first-level meta + ObInlineSecondaryMetaDesc desc_array_[MAX_INLINE_META_COUNT]; +}; + +struct ObSecondaryMetaHeader final +{ +public: + static const int32_t SECONDARY_META_HEADER_VERSION = 1; +public: + ObSecondaryMetaHeader() + : version_(SECONDARY_META_HEADER_VERSION), + size_(sizeof(ObSecondaryMetaHeader)), checksum_(0), payload_size_(0) + { + } + ~ObSecondaryMetaHeader() { destroy(); } + void destroy(); + TO_STRING_KV(K_(version), K_(checksum), K_(size), K_(payload_size)); + NEED_SERIALIZE_AND_DESERIALIZE; +public: + int32_t version_; + int32_t size_; + int32_t checksum_; + int32_t payload_size_; +}; + +struct ObInlineSecondaryMeta final +{ +public: + ObInlineSecondaryMeta() + { + } + ObInlineSecondaryMeta(const void *obj, const ObSecondaryMetaType meta_type) + : obj_(obj), meta_type_(meta_type) + { + } + TO_STRING_KV(KP_(obj), K_(meta_type)); + const void *obj_; + ObSecondaryMetaType meta_type_; +}; + +} // namespace storage +} // namespace oceanbase + +#endif // OCEANBASE_STORAGE_OB_TABLET_BLOCK_HEADER_H diff --git a/src/storage/tablet/ob_tablet_common.h b/src/storage/tablet/ob_tablet_common.h index b0379b6bdd..12b3a5f8a7 100644 --- a/src/storage/tablet/ob_tablet_common.h +++ b/src/storage/tablet/ob_tablet_common.h @@ -59,6 +59,11 @@ public: static const int64_t DEFAULT_GET_TABLET_DURATION_US = 1_s; static const int64_t DEFAULT_GET_TABLET_DURATION_10_S = 10_s; static const int64_t FINAL_TX_ID = 0; + // The length of tablet_addr contains first-level meta's length and inline-meta's length. + // We ensures that the first-level meta's length will not exceed MAX_TABLET_FIRST_LEVEL_META_SIZE by implementation, + // in fact, within 4k in most cases. So just use this length in the situation where only want to read first-level meta, + // although there is some IO amplification, but avoid the trouble of recording the first-level meta's length. + static const int64_t MAX_TABLET_FIRST_LEVEL_META_SIZE = 16 * 1024; // 16k }; } // namespace storage } // namespace oceanbase diff --git a/src/storage/tablet/ob_tablet_complex_addr.h b/src/storage/tablet/ob_tablet_complex_addr.h index a3b02ccf90..47edac7c4f 100644 --- a/src/storage/tablet/ob_tablet_complex_addr.h +++ b/src/storage/tablet/ob_tablet_complex_addr.h @@ -141,7 +141,7 @@ int64_t ObTabletComplexAddr::to_string(char *buf, const int64_t buf_len) cons // do nothing } else { J_OBJ_START(); - J_KV(K_(ptr), K_(addr)); + J_KV(KP_(ptr), K_(addr)); J_OBJ_END(); } diff --git a/src/storage/tablet/ob_tablet_macro_info_iterator.cpp b/src/storage/tablet/ob_tablet_macro_info_iterator.cpp new file mode 100644 index 0000000000..107fc10fce --- /dev/null +++ b/src/storage/tablet/ob_tablet_macro_info_iterator.cpp @@ -0,0 +1,324 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include "storage/tablet/ob_tablet_macro_info_iterator.h" +#include "storage/ob_super_block_struct.h" + +namespace oceanbase +{ +using namespace blocksstable; +namespace storage +{ +ObTabletBlockInfo::ObTabletBlockInfo() + : macro_id_(), block_type_(ObTabletMacroType::INVALID_TYPE), occupy_size_(0) +{ +} + +ObTabletBlockInfo::ObTabletBlockInfo( + const blocksstable::MacroBlockId ¯o_id, + const ObTabletMacroType block_type, + const int64_t occupy_size) + : macro_id_(macro_id), block_type_(block_type), occupy_size_(occupy_size) +{ +} + +ObTabletBlockInfo::~ObTabletBlockInfo() +{ + reset(); +} + +void ObTabletBlockInfo::reset() +{ + macro_id_.reset(); + block_type_ = ObTabletMacroType::INVALID_TYPE; + occupy_size_ = 0; +} + +ObMacroInfoIterator::ObMacroInfoIterator() + : macro_info_(nullptr), block_reader_(), cur_pos_(0), cur_size_(0), + cur_type_(ObTabletMacroType::INVALID_TYPE), + target_type_(ObTabletMacroType::INVALID_TYPE), block_info_arr_(), + allocator_("MacroInfoIter", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), + is_linked_(false), is_loaded_(false), is_inited_(false) +{ +} + +ObMacroInfoIterator::~ObMacroInfoIterator() +{ + destroy(); +} + +void ObMacroInfoIterator::destroy() +{ + block_reader_.reset(); + cur_pos_ = 0; + cur_size_ = 0; + cur_type_ = ObTabletMacroType::INVALID_TYPE; + target_type_ = ObTabletMacroType::INVALID_TYPE; + block_info_arr_.reset(); + is_linked_ = false; + if (is_loaded_ && nullptr != macro_info_) { + macro_info_->reset(); + } + macro_info_ = nullptr; + is_loaded_ = false; + is_inited_ = false; +} + +int ObMacroInfoIterator::init(const ObTabletMacroType target_type, const ObTablet &tablet) +{ + int ret = OB_SUCCESS; + ObTabletMacroInfo *macro_info = nullptr; + bool in_memory = false; + + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("Macro Info Iterator has been inited", K(ret)); + } else if (OB_UNLIKELY(!tablet.is_valid() || ObTabletMacroType::INVALID_TYPE == target_type)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("macro info is invalid", K(ret), K(macro_info), K(target_type)); + } else if (OB_FAIL(tablet.load_macro_info(allocator_, macro_info, in_memory))) { + LOG_WARN("fail to load macro info", K(ret), K(tablet)); + } else if (OB_ISNULL(macro_info) || OB_UNLIKELY(!macro_info->is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid macro info", K(ret), KPC(macro_info)); + } else { + const MacroBlockId &entry_block = macro_info->entry_block_; + if (!IS_EMPTY_BLOCK_LIST(entry_block)) { + if (OB_FAIL(block_reader_.init(entry_block))) { + LOG_WARN("fail to init block reader", K(ret), K(entry_block)); + } else { + is_linked_ = true; + } + } else { + is_linked_ = false; + } + } + if (OB_SUCC(ret)) { + block_info_arr_.reset(); + cur_pos_ = 0; + cur_size_ = 0; + cur_type_ = ObTabletMacroType::MAX; + target_type_ = target_type; + macro_info_ = macro_info; + is_loaded_ = !in_memory; + is_inited_ = true; + } + return ret; +} + +int ObMacroInfoIterator::get_next(ObTabletBlockInfo &block_info) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("macro info iterator hasn't been inited", K(ret)); + } + while (OB_SUCC(ret) && cur_pos_ == cur_size_) { + if (is_linked_) { + if (OB_FAIL(read_from_disk())) { + LOG_WARN("fail to read block info from disk", K(ret)); + } + } else { + if (OB_FAIL(read_from_memory())) { + LOG_WARN("fail to read block info from memory", K(ret)); + } + } + if (OB_SUCC(ret)) { + cur_pos_ = 0; + cur_size_ = block_info_arr_.cnt_; + } + } + if (OB_SUCC(ret)) { + block_info = block_info_arr_.arr_[cur_pos_++]; + } + return ret; +} + +int ObMacroInfoIterator::read_from_memory() +{ + int ret = OB_SUCCESS; + ObArenaAllocator allocator; + if (OB_ISNULL(macro_info_) || OB_UNLIKELY(ObTabletMacroType::INVALID_TYPE == target_type_ + || ObTabletMacroType::INVALID_TYPE == cur_type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid membership", K(ret), KP_(macro_info), K_(target_type), K_(cur_type)); + } else if (ObTabletMacroType::MAX == target_type_) { + switch (cur_type_) { + case ObTabletMacroType::MAX: + cur_type_ = ObTabletMacroType::META_BLOCK; + break; + case ObTabletMacroType::META_BLOCK: + cur_type_ = ObTabletMacroType::DATA_BLOCK; + break; + case ObTabletMacroType::DATA_BLOCK: + cur_type_ = ObTabletMacroType::SHARED_META_BLOCK; + break; + case ObTabletMacroType::SHARED_META_BLOCK: + cur_type_ = ObTabletMacroType::SHARED_DATA_BLOCK; + break; + default: + ret = OB_ITER_END; + break; + } + } else { + if (cur_type_ == target_type_) { + ret = OB_ITER_END; + } else { + cur_type_ = target_type_; + } + } + + if (OB_SUCC(ret)) { + switch (cur_type_) { + case ObTabletMacroType::META_BLOCK: + if (OB_FAIL(reuse_info_arr(macro_info_->meta_block_info_arr_.cnt_))) { + LOG_WARN("fail to reuse block_info_arr_", K(ret), K(macro_info_->meta_block_info_arr_)); + } else if (OB_FAIL(convert_to_block_info(macro_info_->meta_block_info_arr_))) { + LOG_WARN("fail to convert to block info", K(ret), K(macro_info_->meta_block_info_arr_)); + } + break; + case ObTabletMacroType::DATA_BLOCK: + if (OB_FAIL(reuse_info_arr(macro_info_->data_block_info_arr_.cnt_))) { + LOG_WARN("fail to reuse block_info_arr_", K(ret), K(macro_info_->data_block_info_arr_)); + } else if (OB_FAIL(convert_to_block_info(macro_info_->data_block_info_arr_))) { + LOG_WARN("fail to convert to block info", K(ret), K(macro_info_->data_block_info_arr_)); + } + break; + case ObTabletMacroType::SHARED_META_BLOCK: + if (OB_FAIL(reuse_info_arr(macro_info_->shared_meta_block_info_arr_.cnt_))) { + LOG_WARN("fail to reuse block_info_arr_", K(ret), K(macro_info_->shared_meta_block_info_arr_)); + } else if (OB_FAIL(convert_to_block_info(macro_info_->shared_meta_block_info_arr_))) { + LOG_WARN("fail to convert to block info", K(ret), K(macro_info_->shared_meta_block_info_arr_)); + } + break; + case ObTabletMacroType::SHARED_DATA_BLOCK: + if (OB_FAIL(reuse_info_arr(macro_info_->shared_data_block_info_arr_.cnt_))) { + LOG_WARN("fail to reuse block_info_arr_", K(ret), K(macro_info_->shared_data_block_info_arr_)); + } else if (OB_FAIL(convert_to_block_info(macro_info_->shared_data_block_info_arr_))) { + LOG_WARN("fail to convert to block info", K(ret), K(macro_info_->shared_data_block_info_arr_)); + } + break; + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur_type is invalid", K(ret), K(cur_type_)); + break; + } + } + return ret; +} + +int ObMacroInfoIterator::read_from_disk() +{ + int ret = OB_SUCCESS; + ObArenaAllocator allocator; + char *buf = nullptr; + int64_t buf_len = 0; + ObMetaDiskAddr addr; + int64_t pos = 0; + const ObTabletMacroType prev_type = cur_type_; + + if (OB_UNLIKELY(ObTabletMacroType::INVALID_TYPE == target_type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid target_type", K(ret)); + } else { + const bool target_iter = ObTabletMacroType::MAX != target_type_; + do { + pos = 0; + buf = nullptr; + buf_len = 0; + if (OB_FAIL(block_reader_.get_next_item(buf, buf_len, addr))) { + if (OB_ITER_END != ret) { + LOG_WARN("fail to get next item", K(ret)); + } + } else if (OB_FAIL(serialization::decode_i16(buf, buf_len, pos, reinterpret_cast(&cur_type_)))) { + LOG_WARN("fail to deserialize macro type", K(ret), K(buf_len), K(pos)); + } else if (OB_UNLIKELY(ObTabletMacroType::INVALID_TYPE == cur_type_ || ObTabletMacroType::MAX == cur_type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid macor type", K(ret)); + } else if (target_iter && prev_type == target_type_ && cur_type_ != target_type_) { + ret = OB_ITER_END; // if ObTabletMacroType::MAX is not equal to target_type_, we only need to iterate targeted ids + } + } while (OB_SUCC(ret) && target_iter && cur_type_ != target_type_); + } + + if (OB_FAIL(ret)) { + // do nothing + } else if (ObTabletMacroType::SHARED_DATA_BLOCK == cur_type_) { + ObBlockInfoArray tmp_arr; + if (OB_FAIL(tmp_arr.deserialize(allocator, buf, buf_len, pos))) { + LOG_WARN("fail to deserialize block info arr", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(reuse_info_arr(tmp_arr.cnt_))) { + LOG_WARN("fail to reuse block_info_arr_", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(convert_to_block_info(tmp_arr))) { + LOG_WARN("fail to convert to block info", K(ret), K(tmp_arr)); + } + } else { + ObBlockInfoArray tmp_arr; + if (OB_FAIL(tmp_arr.deserialize(allocator, buf, buf_len, pos))) { + LOG_WARN("fail to deserialize block info arr", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(reuse_info_arr(tmp_arr.cnt_))) { + LOG_WARN("fail to reuse block_info_arr_", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(convert_to_block_info(tmp_arr))) { + LOG_WARN("fail to convert to block info", K(ret), K(tmp_arr)); + } + } + return ret; +} + +int ObMacroInfoIterator::convert_to_block_info(const ObBlockInfoArray &tmp_arr) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < tmp_arr.cnt_; i++) { + if (OB_UNLIKELY(!tmp_arr.arr_[i].is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("block info is invalid", K(ret), K(tmp_arr.arr_[i])); + } else { + block_info_arr_.arr_[i] = ObTabletBlockInfo(tmp_arr.arr_[i].shared_macro_id_, cur_type_, tmp_arr.arr_[i].occupy_size_); + } + } + return ret; +} + +int ObMacroInfoIterator::convert_to_block_info(const ObBlockInfoArray &tmp_arr) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < tmp_arr.cnt_; i++) { + if (OB_UNLIKELY(!tmp_arr.arr_[i].is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("block info is invalid", K(ret), K(tmp_arr.arr_[i])); + } else { + block_info_arr_.arr_[i] = ObTabletBlockInfo(tmp_arr.arr_[i], cur_type_, OB_DEFAULT_MACRO_BLOCK_SIZE); + } + } + return ret; +} + +int ObMacroInfoIterator::reuse_info_arr(const int64_t cnt) +{ + int ret = OB_SUCCESS; + if (block_info_arr_.capacity_ >= cnt) { + block_info_arr_.cnt_ = cnt; + } else { + block_info_arr_.reset(); + allocator_.reuse(); + if (OB_FAIL(block_info_arr_.init(cnt, allocator_))) { + LOG_WARN("fail to init block_info_arr_", K(ret), K(cnt)); + } + } + return ret; +} + +} // storage +} // oceanbase \ No newline at end of file diff --git a/src/storage/tablet/ob_tablet_macro_info_iterator.h b/src/storage/tablet/ob_tablet_macro_info_iterator.h new file mode 100644 index 0000000000..4d17ef095b --- /dev/null +++ b/src/storage/tablet/ob_tablet_macro_info_iterator.h @@ -0,0 +1,73 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_STORAGE_OB_TABLET_MACRO_INFO_ITERATOR +#define OCEANBASE_STORAGE_OB_TABLET_MACRO_INFO_ITERATOR + +#include "storage/tablet/ob_tablet_block_aggregated_info.h" +#include "storage/slog_ckpt/ob_linked_macro_block_reader.h" +#include "storage/tablet/ob_tablet.h" + +namespace oceanbase +{ +namespace storage +{ +struct ObTabletBlockInfo final +{ +public: + ObTabletBlockInfo(); + ObTabletBlockInfo( + const blocksstable::MacroBlockId ¯o_id, + const ObTabletMacroType block_type, + const int64_t occupy_size); + ~ObTabletBlockInfo(); + void reset(); +public: + blocksstable::MacroBlockId macro_id_; + ObTabletMacroType block_type_; + int64_t occupy_size_; +}; + +class ObMacroInfoIterator final +{ +public: + ObMacroInfoIterator(); + ~ObMacroInfoIterator(); + ObMacroInfoIterator(const ObMacroInfoIterator &) = delete; + ObMacroInfoIterator &operator=(const ObMacroInfoIterator &) = delete; + void destroy(); + // max means iterate all kinds of ids + int init(const ObTabletMacroType target_type, const ObTablet &tablet); + int get_next(ObTabletBlockInfo &block_info); +private: + int read_from_disk(); + int read_from_memory(); + int reuse_info_arr(const int64_t cnt); + int convert_to_block_info(const ObBlockInfoArray &tmp_arr); + int convert_to_block_info(const ObBlockInfoArray &tmp_arr); +private: + ObTabletMacroInfo *macro_info_; + ObLinkedMacroBlockItemReader block_reader_; + int64_t cur_pos_; + int64_t cur_size_; + ObTabletMacroType cur_type_; + ObTabletMacroType target_type_; + ObBlockInfoArray block_info_arr_; + common::ObArenaAllocator allocator_; + bool is_linked_; + bool is_loaded_; + bool is_inited_; +}; +} +} + +#endif \ No newline at end of file diff --git a/src/storage/tablet/ob_tablet_meta.cpp b/src/storage/tablet/ob_tablet_meta.cpp index bf35909090..8d9cf7fe55 100644 --- a/src/storage/tablet/ob_tablet_meta.cpp +++ b/src/storage/tablet/ob_tablet_meta.cpp @@ -60,6 +60,7 @@ ObTabletMeta::ObTabletMeta() ddl_commit_scn_(SCN::min_scn()), mds_checkpoint_scn_(), transfer_info_(), + space_usage_(), create_schema_version_(0), compat_mode_(lib::Worker::CompatMode::INVALID), has_next_tablet_(false), @@ -448,6 +449,7 @@ void ObTabletMeta::reset() ddl_data_format_version_ = 0; mds_checkpoint_scn_.reset(); transfer_info_.reset(); + space_usage_.reset(); is_inited_ = false; } @@ -593,6 +595,8 @@ int ObTabletMeta::serialize(char *buf, const int64_t len, int64_t &pos) const LOG_WARN("failed to serialize transfer info", K(ret), K(len), K(new_pos), K_(transfer_info)); } else if (new_pos - pos < length && OB_FAIL(serialization::encode_i64(buf, len, new_pos, create_schema_version_))) { LOG_WARN("failed to serialize create schema version", K(ret), K(len), K(new_pos), K_(create_schema_version)); + } else if (new_pos - pos < length && OB_FAIL(space_usage_.serialize(buf, len, new_pos))) { + LOG_WARN("failed to serialize tablet space usage", K(ret), K(len), K(new_pos), K_(space_usage)); } else if (OB_UNLIKELY(length != new_pos - pos)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet meta's length doesn't match standard length", K(ret), K(new_pos), K(pos), K(length), K(length)); @@ -679,7 +683,9 @@ int ObTabletMeta::deserialize( } else if (new_pos - pos < length_ && OB_FAIL(transfer_info_.deserialize(buf, len, new_pos))) { LOG_WARN("failed to deserialize transfer info", K(ret), K(len), K(new_pos)); } else if (new_pos - pos < length_ && OB_FAIL(serialization::decode_i64(buf, len, new_pos, &create_schema_version_))) { - LOG_WARN("failed to deserialize create schema version", K(ret), K(len)); + LOG_WARN("failed to deserialize create schema version", K(ret), K(len), K(new_pos)); + } else if (new_pos - pos < length_ && OB_FAIL(space_usage_.deserialize(buf, len, new_pos))) { + LOG_WARN("failed to deserialize tablet space usage", K(ret), K(len), K(new_pos)); } else if (OB_UNLIKELY(length_ != new_pos - pos)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet's length doesn't match standard length", K(ret), K(new_pos), K(pos), K_(length)); @@ -726,6 +732,7 @@ int64_t ObTabletMeta::get_serialize_size() const size += mds_checkpoint_scn_.get_fixed_serialize_size(); size += transfer_info_.get_serialize_size(); size += serialization::encoded_length_i64(create_schema_version_); + size += space_usage_.get_serialize_size(); return size; } diff --git a/src/storage/tablet/ob_tablet_meta.h b/src/storage/tablet/ob_tablet_meta.h index 7ef32c7d0d..4828693ae4 100644 --- a/src/storage/tablet/ob_tablet_meta.h +++ b/src/storage/tablet/ob_tablet_meta.h @@ -38,13 +38,13 @@ #include "storage/tablet/ob_tablet_mds_data.h" #include "storage/tablet/ob_tablet_create_delete_mds_user_data.h" #include "storage/high_availability/ob_tablet_transfer_info.h" +#include "storage/tablet/ob_tablet_space_usage.h" namespace oceanbase { namespace storage { struct ObMigrationTabletParam; - class ObTabletMeta final { friend class ObTablet; @@ -143,7 +143,8 @@ public: K_(ddl_commit_scn), K_(mds_checkpoint_scn), K_(transfer_info), - K_(create_schema_version)); + K_(create_schema_version), + K_(space_usage)); public: int32_t version_; @@ -175,6 +176,7 @@ public: share::SCN ddl_commit_scn_; share::SCN mds_checkpoint_scn_; ObTabletTransferInfo transfer_info_; // alignment: 8B, size: 32B + ObTabletSpaceUsage space_usage_; // calculated by tablet persist, ObMigrationTabletParam doesn't need it int64_t create_schema_version_; // add after 4.2, record schema_version when first create tablet. NEED COMPAT //ATTENTION : Add a new variable need consider ObMigrationTabletParam // and tablet meta init interface for migration. diff --git a/src/storage/tablet/ob_tablet_persister.cpp b/src/storage/tablet/ob_tablet_persister.cpp index c5b2322e73..4c415e549c 100644 --- a/src/storage/tablet/ob_tablet_persister.cpp +++ b/src/storage/tablet/ob_tablet_persister.cpp @@ -19,6 +19,10 @@ #include "storage/meta_mem/ob_tenant_meta_mem_mgr.h" #include "storage/column_store/ob_column_oriented_sstable.h" #include "storage/tablet/ob_tablet_obj_load_helper.h" +#include "storage/tablet/ob_tablet_block_aggregated_info.h" +#include "storage/slog_ckpt/ob_linked_macro_block_writer.h" +#include "storage/tablet/ob_tablet_common.h" +#include "storage/tablet/ob_tablet_block_header.h" #include "storage/tablet/ob_tablet_slog_helper.h" using namespace std::placeholders; @@ -29,9 +33,26 @@ namespace oceanbase namespace storage { +int ObSharedBlockIndex::hash(uint64_t &hash_val) const +{ + int ret = OB_SUCCESS; + if (OB_FAIL(shared_macro_id_.hash(hash_val))) { + LOG_WARN("fail to calculate macro id's hash value", K(ret), K(shared_macro_id_)); + } else { + hash_val *= nested_offset_; + } + return ret; +} + +OB_INLINE bool ObSharedBlockIndex::operator ==(const ObSharedBlockIndex &other) const +{ + return other.shared_macro_id_ == shared_macro_id_ && other.nested_offset_ == nested_offset_; +} + ObTabletTransformArg::ObTabletTransformArg() : auto_inc_seq_ptr_(nullptr), rowkey_read_info_ptr_(nullptr), + tablet_macro_info_ptr_(nullptr), tablet_meta_(), table_store_addr_(), storage_schema_addr_(), @@ -42,6 +63,7 @@ ObTabletTransformArg::ObTabletTransformArg() extra_medium_info_(), medium_info_list_addr_(), auto_inc_seq_addr_(), + tablet_macro_info_addr_(), tablet_status_cache_(), is_row_store_(true), ddl_kvs_(nullptr), @@ -60,6 +82,7 @@ void ObTabletTransformArg::reset() { auto_inc_seq_ptr_ = nullptr; rowkey_read_info_ptr_ = nullptr; + tablet_macro_info_ptr_ = nullptr; tablet_meta_.reset(); table_store_addr_.reset(); storage_schema_addr_.reset(); @@ -70,6 +93,7 @@ void ObTabletTransformArg::reset() extra_medium_info_.reset(); medium_info_list_addr_.reset(); auto_inc_seq_addr_.reset(); + tablet_macro_info_addr_.reset(); tablet_status_cache_.reset(); is_row_store_ = true; ddl_kvs_ = nullptr; @@ -92,6 +116,7 @@ bool ObTabletTransformArg::is_valid() const && aux_tablet_info_uncommitted_kv_addr_.is_valid() && aux_tablet_info_committed_kv_addr_.is_valid() && auto_inc_seq_addr_.is_valid() + && tablet_macro_info_addr_.is_valid() && medium_info_list_addr_.is_valid(); } @@ -127,41 +152,263 @@ int64_t ObSSTablePersistWrapper::get_serialize_size() const return len; } +//==================================== ObMultiTimeStats====================================// + +ObMultiTimeStats::TimeStats::TimeStats(const char *owner) + : owner_(owner), + start_ts_(ObTimeUtility::current_time()), + last_ts_(start_ts_), + click_count_(0), + has_extra_info_(false) +{ + memset(click_, 0, sizeof(click_)); + memset(click_str_, 0, sizeof(click_str_)); +} + +void ObMultiTimeStats::TimeStats::click(const char *step_name) +{ + const int64_t cur_ts = ObTimeUtility::current_time(); + if (OB_LIKELY(click_count_ < MAX_CLICK_COUNT)) { + click_str_[click_count_] = step_name; + click_[click_count_++] = (int32_t)(cur_ts - last_ts_); + last_ts_ = cur_ts; + } +} + +int64_t ObMultiTimeStats::TimeStats::to_string(char *buf, const int64_t buf_len) const +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + int64_t i = 0; + ret = databuff_printf(buf, buf_len, pos, "owner:'%s' total=%ld%s", + owner_, last_ts_ - start_ts_, click_count_ > 0 ? ", time_dist: " : ""); + + if (OB_SUCC(ret) && click_count_ > 0) { + ret = databuff_printf(buf, buf_len, pos, "%s=%d", click_str_[0], click_[0]); + } + for (int i = 1; OB_SUCC(ret) && i < click_count_; i++) { + ret = databuff_printf(buf, buf_len, pos, ", %s=%d", click_str_[i], click_[i]); + } + if (OB_SUCC(ret)) { + if (has_extra_info_) { + ret = databuff_printf(buf, buf_len, pos, " %s:%s", "extra_info", extra_info_); + } + } + if (OB_FAIL(ret)) { + pos = 0; + } + return pos; +} + +int ObMultiTimeStats::TimeStats::set_extra_info(const char *fmt, ...) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + va_list args; + va_start(args, fmt); + + if (OB_FAIL(databuff_vprintf(extra_info_, MAX_EXTRA_INFO_LENGTH, pos, fmt, args))) { + LOG_WARN("fail to databuff_vprintf", K(ret)); + } else { + has_extra_info_ = true; + } + va_end(args); + + return ret; +} + +ObMultiTimeStats::ObMultiTimeStats(ObArenaAllocator *allocator) + : allocator_(allocator), stats_(nullptr), stats_count_(0) +{ +} + +ObMultiTimeStats::~ObMultiTimeStats() +{ + for (int64_t i = 0; i < stats_count_; i++) { + stats_[i].~TimeStats(); + } + stats_count_ = 0; + stats_ = nullptr; +} + +int ObMultiTimeStats::acquire_stats(const char *owner, ObMultiTimeStats::TimeStats *&stats) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(stats_count_ > MAX_STATS_CNT)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("too many time stats", K(ret), K(stats_count_)); + } else if (OB_ISNULL(stats_) && + OB_ISNULL(stats_ = reinterpret_cast(allocator_->alloc(sizeof(TimeStats) * MAX_STATS_CNT)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memory", K(ret)); + } else { + new (&stats_[stats_count_]) TimeStats(owner); + stats = &stats_[stats_count_]; + stats_count_++; + } + return ret; +} + +int64_t ObMultiTimeStats::to_string(char *buf, const int64_t buf_len) const +{ + int64_t pos = 0; + J_OBJ_START(); + for (int64_t i = 0; i < stats_count_; i++) { + databuff_printf(buf, buf_len, pos, "stats[%ld]: ", i); + BUF_PRINTO(stats_[i]); + if (i != stats_count_-1) { + J_NEWLINE(); + } + } + J_OBJ_END(); + return pos; +} + +ObTabletPersister::ObTabletPersister(const int64_t ctx_id) + : allocator_("TblPersist", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID(), ctx_id), + multi_stats_(&allocator_) +{ +} +ObTabletPersister::~ObTabletPersister() +{ +} +void ObTabletPersister::print_time_stats( + const ObMultiTimeStats::TimeStats &time_stats, + const int64_t stats_warn_threshold, + const int64_t print_interval) +{ + int ret = OB_SUCCESS; + if (time_stats.get_total_time() > stats_warn_threshold) { + if (REACH_TIME_INTERVAL(100_ms)) { + LOG_WARN("[TABLET PERSISTER TIME STATS] cost too much time\n", K_(multi_stats)); + } + } else if (REACH_TIME_INTERVAL(print_interval)) { + FLOG_INFO("[TABLET PERSISTER TIME STATS]\n", K_(multi_stats)); + } +} int ObTabletPersister::persist_and_transform_tablet( const ObTablet &old_tablet, ObTabletHandle &new_handle) { - TIMEGUARD_INIT(STORAGE, 10_ms); int ret = OB_SUCCESS; const int64_t ctx_id = share::is_reserve_mode() ? ObCtxIds::MERGE_RESERVE_CTX_ID : ObCtxIds::DEFAULT_CTX_ID; - - // TODO(@DanLing) use LocalArena later - common::ObArenaAllocator allocator("PerstTbl", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID(), ctx_id); - common::ObSEArray tablet_meta_write_ctxs; - common::ObSEArray sstable_meta_write_ctxs; - tablet_meta_write_ctxs.set_attr(lib::ObMemAttr(MTL_ID(), "TblMetaWriCtx", ctx_id)); - sstable_meta_write_ctxs.set_attr(lib::ObMemAttr(MTL_ID(), "SstMetaWriCtx", ctx_id)); + ObTabletPersister persister(ctx_id); + ObMultiTimeStats::TimeStats *time_stats = nullptr; + common::ObSEArray total_write_ctxs; + ObLinkedMacroBlockItemWriter linked_writer; + ObTabletSpaceUsage space_usage; + int64_t total_tablet_meta_size = 0; + ObTabletMacroInfo tablet_macro_info; + total_write_ctxs.set_attr(lib::ObMemAttr(MTL_ID(), "TblMetaWriCtx", ctx_id)); if (OB_UNLIKELY(!old_tablet.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid old tablet to persist", K(ret), K(old_tablet)); - } else if (CLICK_FAIL(recursively_persist( - old_tablet, allocator, tablet_meta_write_ctxs, sstable_meta_write_ctxs, new_handle))) { - LOG_WARN("fail to recursively persist and fill tablet", K(ret), K(old_tablet)); - } else if (CLICK_FAIL(check_tablet_meta_ids(tablet_meta_write_ctxs, *(new_handle.get_obj())))) { - LOG_WARN("fail to check whether tablet meta's macro ids match", K(ret), K(tablet_meta_write_ctxs), KPC(new_handle.get_obj())); - } else if (CLICK_FAIL(persist_4k_tablet(allocator, new_handle))) { - LOG_WARN("fail to persist 4k tablet", K(ret), K(new_handle), KPC(new_handle.get_obj())); + } else if (OB_FAIL(persister.multi_stats_.acquire_stats("persist_and_transform_tablet", time_stats))) { + LOG_WARN("fail to acquire time stats", K(ret)); + } else if (OB_FAIL(persister.persist_and_fill_tablet( + old_tablet, linked_writer, total_write_ctxs, new_handle, space_usage, tablet_macro_info))) { + LOG_WARN("fail to persist and fill tablet", K(ret), K(old_tablet)); + } else if (FALSE_IT(time_stats->click("persist_and_fill_tablet"))) { + } else if (OB_FAIL(check_tablet_meta_ids(tablet_macro_info.shared_meta_block_info_arr_, *(new_handle.get_obj())))) { + LOG_WARN("fail to check whether tablet meta's macro ids match", + K(ret), K(tablet_macro_info.shared_meta_block_info_arr_), KPC(new_handle.get_obj())); + } else if (FALSE_IT(time_stats->click("check_tablet_meta_ids"))) { + } else if (OB_FAIL(persister.persist_aggregated_meta(tablet_macro_info, new_handle, space_usage))) { + LOG_WARN("fail to persist aggregated tablet", K(ret), K(new_handle), KPC(new_handle.get_obj())); } else { - FLOG_INFO("succeed to persist 4k tablet", K(&old_tablet), K(new_handle.get_obj())); + time_stats->click("persist_aggregated_meta"); + persister.print_time_stats(*time_stats, 20_ms, 1_s); } return ret; } -/*static*/int ObTabletPersister::copy_from_old_tablet( +/*static*/ int ObTabletPersister::persist_and_transform_only_tablet_meta( + const ObTablet &old_tablet, + ObITabletMetaModifier &modifier, + ObTabletHandle &new_tablet) +{ + int ret = OB_SUCCESS; + ObTabletPersister persister; + ObMultiTimeStats::TimeStats *time_stats = nullptr; + ObTabletMacroInfo *macro_info = nullptr; + bool in_memory = false; + + if (OB_UNLIKELY(!old_tablet.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid old tablet", K(ret), K(old_tablet)); + } else if (OB_UNLIKELY(old_tablet.allocator_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("this isn't supported for the tablet from allocator", K(ret), K(old_tablet)); + } else if (OB_UNLIKELY(!old_tablet.hold_ref_cnt_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("old tablet doesn't hold ref cnt", K(ret), K(old_tablet)); + } else if (OB_FAIL(persister.multi_stats_.acquire_stats("persist_and_transform_only_tablet_meta", time_stats))) { + LOG_WARN("fail to acquire time stats", K(ret)); + } else if (OB_FAIL(old_tablet.load_macro_info(persister.allocator_, macro_info, in_memory))) { + LOG_WARN("fail to fetch macro info", K(ret)); + } else if (FALSE_IT(time_stats->click("load_macro_info"))) { + } else if (OB_FAIL(persister.modify_and_fill_tablet(old_tablet, modifier, new_tablet))) { + LOG_WARN("fail to modify and fill tablet", K(ret), K(old_tablet)); + } else { + time_stats->click("modify_and_fill_tablet"); + ObTabletSpaceUsage space_usage = old_tablet.get_tablet_meta().space_usage_; + space_usage.shared_meta_size_ -= upper_align(old_tablet.get_tablet_addr().size(), DIO_READ_ALIGN_SIZE); + if (OB_FAIL(persister.persist_aggregated_meta(*macro_info, new_tablet, space_usage))) { + LOG_WARN("fail to persist aggregated meta", K(ret), KPC(macro_info), K(new_tablet), K(space_usage)); + } else { + time_stats->click("persist_aggregated_meta"); + persister.print_time_stats(*time_stats, 20_ms, 1_s); + } + } + if (OB_NOT_NULL(macro_info) && !in_memory) { + macro_info->~ObTabletMacroInfo(); + macro_info = nullptr; + } + return ret; +} + +int ObTabletPersister::modify_and_fill_tablet( + const ObTablet &old_tablet, + ObITabletMetaModifier &modifier, + ObTabletHandle &new_handle) +{ + int ret = OB_SUCCESS; + const ObTabletMeta &tablet_meta = old_tablet.get_tablet_meta(); + const ObTabletMapKey key(tablet_meta.ls_id_, tablet_meta.tablet_id_); + const char* buf = reinterpret_cast(&old_tablet); + const bool try_smaller_pool = old_tablet.get_try_cache_size() > ObTenantMetaMemMgr::NORMAL_TABLET_POOL_SIZE + ? false : true; + ObMetaObjBufferHeader &buf_header = ObMetaObjBufferHelper::get_buffer_header(const_cast(buf)); + ObTabletMemberWrapper auto_inc_seq; + ObTabletTransformArg arg; + ObTabletPoolType type; + ObMultiTimeStats::TimeStats *time_stats = nullptr; + if (OB_FAIL(multi_stats_.acquire_stats("persist_and_transform_only_tablet_meta", time_stats))) { + LOG_WARN("fail to acquire time stats", K(ret)); + } else if (OB_FAIL(ObTenantMetaMemMgr::get_tablet_pool_type(buf_header.buf_len_, type))) { + LOG_WARN("fail to get tablet pool type", K(ret), K(buf_header)); + } else if (OB_FAIL(acquire_tablet(type, key, try_smaller_pool, new_handle))) { + LOG_WARN("fail to acqurie tablet", K(ret), K(type), K(new_handle)); + } else if (OB_FAIL(convert_tablet_to_mem_arg(old_tablet, auto_inc_seq, arg))) { + LOG_WARN("fail to convert tablet to mem arg", K(ret), K(arg), K(old_tablet)); + } else if (FALSE_IT(time_stats->click("convert_tablet_to_mem_arg"))) { + } else if (OB_FAIL(transform(arg, new_handle.get_buf(), new_handle.get_buf_len()))) { + LOG_WARN("fail to transform tablet", K(ret), K(arg), + KP(new_handle.get_buf()), K(new_handle.get_buf_len()), K(old_tablet)); + } else if (FALSE_IT(new_handle.get_obj()->set_next_tablet_guard(old_tablet.next_tablet_guard_))) { + } else if (OB_FAIL(modifier.modify_tablet_meta(new_handle.get_obj()->tablet_meta_))) { + LOG_WARN("fail to modify tablet meta", K(ret), KPC(new_handle.get_obj())); + } else { + time_stats->click("transform_and_modify"); + } + return ret; +} + +/*static*/ int ObTabletPersister::copy_from_old_tablet( const ObTablet &old_tablet, ObTabletHandle &new_handle) { @@ -176,20 +423,30 @@ int ObTabletPersister::persist_and_transform_tablet( const ObTabletMeta &tablet_meta = old_tablet.get_tablet_meta(); const ObTabletMapKey key(tablet_meta.ls_id_, tablet_meta.tablet_id_); const char* buf = reinterpret_cast(&old_tablet); + const bool try_smaller_pool = old_tablet.get_try_cache_size() > ObTenantMetaMemMgr::NORMAL_TABLET_POOL_SIZE + ? false : true; ObMetaObjBufferHeader &buf_header = ObMetaObjBufferHelper::get_buffer_header(const_cast(buf)); ObTabletMemberWrapper auto_inc_seq; ObTabletTransformArg arg; ObTabletPoolType type; - if (OB_FAIL(ObTenantMetaMemMgr::get_tablet_pool_type(buf_header.buf_len_, type))) { + ObTabletPersister persister; + ObMultiTimeStats::TimeStats *time_stats = nullptr; + + if (OB_FAIL(persister.multi_stats_.acquire_stats("copy_from_old_tablet", time_stats))) { + LOG_WARN("fail to acquire time stats", K(ret)); + } else if (OB_FAIL(ObTenantMetaMemMgr::get_tablet_pool_type(buf_header.buf_len_, type))) { LOG_WARN("fail to get tablet pool type", K(ret), K(buf_header)); - } else if (OB_FAIL(acquire_tablet(type, key, true/*try_smaller_pool*/, new_handle))) { + } else if (OB_FAIL(acquire_tablet(type, key, try_smaller_pool, new_handle))) { LOG_WARN("fail to acqurie tablet", K(ret), K(type), K(new_handle)); } else if (OB_FAIL(convert_tablet_to_mem_arg(old_tablet, auto_inc_seq, arg))) { LOG_WARN("fail to convert tablet to mem arg", K(ret), K(arg), K(old_tablet)); - } else if (OB_FAIL(transform(arg, new_handle.get_buf(), new_handle.get_buf_len()))) { + } else if (FALSE_IT(time_stats->click("convert_tablet_to_mem_arg"))) { + } else if (OB_FAIL(persister.transform(arg, new_handle.get_buf(), new_handle.get_buf_len()))) { LOG_WARN("fail to transform tablet", K(ret), K(arg), KP(new_handle.get_buf()), K(new_handle.get_buf_len()), K(old_tablet)); } else { + time_stats->click("transform"); + persister.print_time_stats(*time_stats, 20_ms, 1_s); new_handle.get_obj()->set_next_tablet_guard(old_tablet.next_tablet_guard_); new_handle.get_obj()->set_tablet_addr(old_tablet.get_tablet_addr()); if (OB_FAIL(new_handle.get_obj()->inc_macro_ref_cnt())) { @@ -200,50 +457,25 @@ int ObTabletPersister::persist_and_transform_tablet( return ret; } -int ObTabletPersister::recursively_persist( - const ObTablet &old_tablet, - common::ObArenaAllocator &allocator, - common::ObIArray &tablet_meta_write_ctxs, - common::ObIArray &sstable_meta_write_ctxs, - ObTabletHandle &new_handle) -{ - TIMEGUARD_INIT(STORAGE, 10_ms); - int ret = OB_SUCCESS; - if (CLICK_FAIL(persist_and_fill_tablet( - old_tablet, allocator, tablet_meta_write_ctxs, sstable_meta_write_ctxs, new_handle))) { - LOG_WARN("fail to persist and fill tablet", K(ret), K(old_tablet)); - } else if (old_tablet.get_tablet_meta().has_next_tablet_) { - ObTabletHandle new_next_handle; - const ObTablet &old_next_tablet = *(old_tablet.get_next_tablet_guard().get_obj()); - if (CLICK_FAIL(recursively_persist( - old_next_tablet, allocator, tablet_meta_write_ctxs, sstable_meta_write_ctxs, new_next_handle))) { - LOG_WARN("fail to recursively persist and fill next tablet", - K(ret), K(old_next_tablet), K(tablet_meta_write_ctxs), K(sstable_meta_write_ctxs)); - } else { - new_handle.get_obj()->set_next_tablet_guard(new_next_handle); - } - } - return ret; -} - int ObTabletPersister::convert_tablet_to_mem_arg( const ObTablet &tablet, ObTabletMemberWrapper &auto_inc_seq, ObTabletTransformArg &arg) { - TIMEGUARD_INIT(STORAGE, 10_ms); int ret = OB_SUCCESS; arg.reset(); if (OB_UNLIKELY(!tablet.is_valid())) { ret = OB_NOT_INIT; LOG_WARN("old tablet isn't valid, don't allow to degrade tablet memory", K(ret), K(tablet)); - } else if (CLICK_FAIL(arg.tablet_status_cache_.assign(tablet.mds_data_.tablet_status_cache_))) { + } else if (OB_FAIL(arg.tablet_status_cache_.assign(tablet.mds_data_.tablet_status_cache_))) { LOG_WARN("fail to assign tablet status cache", K(ret), K(tablet)); - } else if (CLICK_FAIL(arg.tablet_meta_.assign(tablet.tablet_meta_))) { + } else if (OB_FAIL(arg.tablet_meta_.assign(tablet.tablet_meta_))) { LOG_WARN("fail to assign tablet meta", K(ret), K(tablet)); - } else if (CLICK_FAIL(tablet.fetch_autoinc_seq(auto_inc_seq))) { + } else if (OB_FAIL(tablet.fetch_autoinc_seq(auto_inc_seq))) { LOG_WARN("fail to fetch autoinc seq", K(ret), K(tablet)); } else { + arg.tablet_macro_info_addr_ = tablet.macro_info_addr_.addr_; + arg.tablet_macro_info_ptr_ = tablet.macro_info_addr_.ptr_; arg.auto_inc_seq_addr_ = tablet.mds_data_.auto_inc_seq_.addr_; arg.auto_inc_seq_ptr_ = arg.auto_inc_seq_addr_.is_none() ? nullptr : auto_inc_seq.get_member(); arg.rowkey_read_info_ptr_ = tablet.rowkey_read_info_; @@ -265,15 +497,15 @@ int ObTabletPersister::convert_tablet_to_mem_arg( } int ObTabletPersister::convert_tablet_to_disk_arg( - common::ObArenaAllocator &allocator, const ObTablet &tablet, - common::ObIArray &tablet_meta_write_ctxs, - common::ObIArray &sstable_meta_write_ctxs, + common::ObIArray &total_write_ctxs, ObTabletPoolType &type, - ObTabletTransformArg &arg) + ObTabletTransformArg &arg, + int64_t &total_tablet_meta_size, + ObBlockInfoSet &block_info_set) { - TIMEGUARD_INIT(STORAGE, 10_ms); int ret = OB_SUCCESS; + ObMultiTimeStats::TimeStats *time_stats = nullptr; arg.reset(); common::ObSEArray write_infos; @@ -291,37 +523,54 @@ int ObTabletPersister::convert_tablet_to_disk_arg( const ObTabletComplexAddr &committed_aux_tablet_info_addr = tablet.mds_data_.aux_tablet_info_.committed_kv_; const ObTabletComplexAddr &medium_info_list_addr = tablet.mds_data_.medium_info_list_; - if (CLICK_FAIL(arg.tablet_status_cache_.assign(tablet.mds_data_.tablet_status_cache_))) { + if (OB_FAIL(multi_stats_.acquire_stats("convert_tablet_to_disk_arg", time_stats))) { + LOG_WARN("fail to acquire time stats", K(ret)); + } else if (OB_FAIL(arg.tablet_status_cache_.assign(tablet.mds_data_.tablet_status_cache_))) { LOG_WARN("fail to assign tablet status cache", K(ret), K(tablet)); - } else if (CLICK_FAIL(arg.tablet_meta_.assign(tablet.tablet_meta_))) { + } else if (OB_FAIL(arg.tablet_meta_.assign(tablet.tablet_meta_))) { LOG_WARN("fail to assign tablet meta", K(ret), K(tablet)); } else if (FALSE_IT(arg.rowkey_read_info_ptr_ = tablet.rowkey_read_info_)) { } else if (FALSE_IT(arg.extra_medium_info_ = tablet.mds_data_.extra_medium_info_)) { - } else if (CLICK_FAIL(fetch_table_store_and_write_info(tablet, allocator, table_store_wrapper, write_infos, sstable_meta_write_ctxs))) { + } else if (OB_FAIL(fetch_table_store_and_write_info(tablet, table_store_wrapper, + write_infos, total_write_ctxs, total_tablet_meta_size, block_info_set))) { LOG_WARN("fail to fetch table store and write info", K(ret)); - } else if (CLICK_FAIL(load_auto_inc_seq_and_write_info(allocator, tablet.mds_data_.auto_inc_seq_, arg.auto_inc_seq_ptr_, write_infos, arg.auto_inc_seq_addr_))) { + } else if (FALSE_IT(time_stats->click("fetch_table_store_and_write_info"))) { + } else if (OB_FAIL(load_auto_inc_seq_and_write_info(allocator_, tablet.mds_data_.auto_inc_seq_, arg.auto_inc_seq_ptr_, write_infos, arg.auto_inc_seq_addr_))) { LOG_WARN("fail to load auto inc seq and write info", K(ret)); - } else if (FALSE_IT(arg.ddl_kvs_ = tablet.ddl_kvs_)) { - } else if (FALSE_IT(arg.ddl_kv_count_ = tablet.ddl_kv_count_)) { - } else if (FALSE_IT(arg.memtable_count_ = tablet.memtable_count_)) { - } else if (OB_ISNULL(MEMCPY(arg.memtables_, tablet.memtables_, sizeof(arg.memtables_)))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("fail to memcpy memtables", K(ret), KP(arg.memtables_), KP(tablet.memtables_)); - } else if (CLICK_FAIL(load_storage_schema_and_fill_write_info(tablet, allocator, write_infos))) { + } else { + time_stats->click("load_auto_inc_seq"); + arg.ddl_kvs_ = tablet.ddl_kvs_; + arg.ddl_kv_count_ = tablet.ddl_kv_count_; + arg.memtable_count_ = tablet.memtable_count_; + MEMCPY(arg.memtables_, tablet.memtables_, sizeof(arg.memtables_)); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(load_storage_schema_and_fill_write_info(tablet, allocator_, write_infos))) { LOG_WARN("fail to load storage schema and fill write info", K(ret)); - } else if (CLICK_FAIL(load_dump_kv_and_fill_write_info(allocator, uncommitted_tablet_status_addr, write_infos, arg.tablet_status_uncommitted_kv_addr_))) { + } else if (FALSE_IT(time_stats->click("load_storage_schema"))) { + } else if (OB_FAIL(load_dump_kv_and_fill_write_info(allocator_, uncommitted_tablet_status_addr, write_infos, arg.tablet_status_uncommitted_kv_addr_))) { LOG_WARN("fail to load tablet status uncommitted kv", K(ret), K(uncommitted_tablet_status_addr)); - } else if (CLICK_FAIL(load_dump_kv_and_fill_write_info(allocator, committed_tablet_status_addr, write_infos, arg.tablet_status_committed_kv_addr_))) { + } else if (OB_FAIL(load_dump_kv_and_fill_write_info(allocator_, committed_tablet_status_addr, write_infos, arg.tablet_status_committed_kv_addr_))) { LOG_WARN("fail to load tablet status committed kv", K(ret), K(committed_tablet_status_addr)); - } else if (CLICK_FAIL(load_dump_kv_and_fill_write_info(allocator, uncommitted_aux_tablet_info_addr, write_infos, arg.aux_tablet_info_uncommitted_kv_addr_))) { + } else if (OB_FAIL(load_dump_kv_and_fill_write_info(allocator_, uncommitted_aux_tablet_info_addr, write_infos, arg.aux_tablet_info_uncommitted_kv_addr_))) { LOG_WARN("fail to load aux tablet info uncommitted kv", K(ret), K(uncommitted_aux_tablet_info_addr)); - } else if (CLICK_FAIL(load_dump_kv_and_fill_write_info(allocator, committed_aux_tablet_info_addr, write_infos, arg.aux_tablet_info_committed_kv_addr_))) { + } else if (OB_FAIL(load_dump_kv_and_fill_write_info(allocator_, committed_aux_tablet_info_addr, write_infos, arg.aux_tablet_info_committed_kv_addr_))) { LOG_WARN("fail to load aux tablet info committed kv", K(ret), K(committed_aux_tablet_info_addr)); - } else if (CLICK_FAIL(write_and_fill_args(write_infos, arg, tablet_meta_write_ctxs))) { - LOG_WARN("fail to write and fill address", K(ret)); - } else if (CLICK_FAIL(load_medium_info_list_and_write(allocator, medium_info_list_addr, tablet_meta_write_ctxs, arg.medium_info_list_addr_))) { + } else if (FALSE_IT(time_stats->click("load_dump_kv"))) { + } else if (OB_FAIL(write_and_fill_args(write_infos, arg, total_write_ctxs, total_tablet_meta_size, block_info_set.shared_meta_block_info_set_))) { + LOG_WARN("fail to write and fill address", K(ret), K(write_infos)); + } else if (FALSE_IT(time_stats->click("write_and_fill_args"))) { + } else if (OB_FAIL(load_medium_info_list_and_write( + allocator_, + medium_info_list_addr, + total_write_ctxs, + arg.medium_info_list_addr_, + total_tablet_meta_size, + block_info_set.shared_meta_block_info_set_))) { LOG_WARN("fail to load medium info list and write", K(ret), K(medium_info_list_addr)); } else { + time_stats->click("load_medium_info_list_and_write"); const int64_t try_cache_size = tablet.get_try_cache_size() + table_store_wrapper.get_member()->get_deep_copy_size(); if (try_cache_size > ObTenantMetaMemMgr::NORMAL_TABLET_POOL_SIZE) { type = ObTabletPoolType::TP_LARGE; @@ -334,14 +583,16 @@ int ObTabletPersister::convert_tablet_to_disk_arg( int ObTabletPersister::persist_and_fill_tablet( const ObTablet &old_tablet, - common::ObArenaAllocator &allocator, - common::ObIArray &tablet_meta_write_ctxs, - common::ObIArray &sstable_meta_write_ctxs, - ObTabletHandle &new_handle) + ObLinkedMacroBlockItemWriter &linked_writer, + common::ObIArray &total_write_ctxs, + ObTabletHandle &new_handle, + ObTabletSpaceUsage &space_usage, + ObTabletMacroInfo &tablet_macro_info) { - TIMEGUARD_INIT(STORAGE, 10_ms); int ret = OB_SUCCESS; ObTabletTransformArg arg; + ObBlockInfoSet block_info_set; + ObMultiTimeStats::TimeStats *time_stats = nullptr; const ObTabletMeta &tablet_meta = old_tablet.get_tablet_meta(); const ObTabletMapKey key(tablet_meta.ls_id_, tablet_meta.tablet_id_); @@ -349,22 +600,51 @@ int ObTabletPersister::persist_and_fill_tablet( ObTabletMemberWrapper auto_inc_seq; // define here to keep auto_inc_seq_ptr safe bool try_smaller_pool = true; - if (old_tablet.is_empty_shell()) { + if (OB_FAIL(multi_stats_.acquire_stats("persist_and_fill_tablet", time_stats))) { + LOG_WARN("fail to acquire time stats", K(ret)); + } else if (OB_FAIL(block_info_set.init())) { + LOG_WARN("fail to init macro id set", K(ret)); + } else if (old_tablet.is_empty_shell()) { if (OB_FAIL(convert_tablet_to_mem_arg(old_tablet, auto_inc_seq, arg))) { LOG_WARN("fail to conver tablet to mem arg", K(ret), K(old_tablet)); + } else { + time_stats->click("convert_tablet_to_mem_arg"); } - } else if (CLICK_FAIL(convert_tablet_to_disk_arg( - allocator, old_tablet, tablet_meta_write_ctxs, sstable_meta_write_ctxs, type, arg))) { + } else if (OB_FAIL(convert_tablet_to_disk_arg( + old_tablet, total_write_ctxs, type, arg, space_usage.shared_meta_size_, block_info_set))) { LOG_WARN("fail to conver tablet to disk arg", K(ret), K(old_tablet)); - } else if (old_tablet.get_try_cache_size() > ObTenantMetaMemMgr::NORMAL_TABLET_POOL_SIZE) { - try_smaller_pool = false; + } else { + time_stats->click("convert_tablet_to_disk_arg"); + if (old_tablet.get_try_cache_size() > ObTenantMetaMemMgr::NORMAL_TABLET_POOL_SIZE) { + try_smaller_pool = false; + } } if (OB_FAIL(ret)) { - } else if (CLICK_FAIL(acquire_tablet(type, key, try_smaller_pool, new_handle))) { + } else if (OB_FAIL(tablet_macro_info.init(allocator_, block_info_set, linked_writer))) { + LOG_WARN("fail to init tablet block id arrary", K(ret)); + } else { + arg.tablet_macro_info_addr_.set_none_addr(); + arg.tablet_macro_info_ptr_ = &tablet_macro_info; + time_stats->click("init_tabelt_macro_info"); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(acquire_tablet(type, key, try_smaller_pool, new_handle))) { LOG_WARN("fail to acquire tablet", K(ret), K(key), K(type)); - } else if (CLICK_FAIL(transform(arg, new_handle.get_buf(), new_handle.get_buf_len()))) { + } else if (OB_FAIL(transform(arg, new_handle.get_buf(), new_handle.get_buf_len()))) { LOG_WARN("fail to transform old tablet", K(ret), K(arg), K(new_handle), K(type)); + } else { + time_stats->click("transform"); + space_usage.data_size_ = block_info_set.data_block_info_set_.size() * DEFAULT_MACRO_BLOCK_SIZE; + space_usage.meta_size_ = block_info_set.meta_block_info_set_.size() * DEFAULT_MACRO_BLOCK_SIZE; + int64_t shared_data_size = 0; + for (ObBlockInfoSet::MapIterator iter = block_info_set.shared_data_block_info_map_.begin(); + OB_SUCC(ret) && iter != block_info_set.shared_data_block_info_map_.end(); + ++iter) { + shared_data_size += iter->second; + } + space_usage.shared_data_size_ = shared_data_size; } return ret; @@ -374,15 +654,17 @@ int ObTabletPersister::transform_empty_shell(const ObTablet &old_tablet, ObTable { int ret = OB_SUCCESS; - ObArray tmp_tablet_meta_write_ctxs; - ObArray tmp_sstable_meta_write_ctxs; - ObArenaAllocator tmp_allocator; + ObLinkedMacroBlockItemWriter linked_writer; + common::ObArray total_write_ctxs; + ObTabletSpaceUsage space_usage; + ObTabletMacroInfo tablet_macro_info; + ObTabletPersister persister; if (OB_UNLIKELY(!old_tablet.is_empty_shell())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("only support transform empty shell", K(ret), K(old_tablet)); - } else if (OB_FAIL(persist_and_fill_tablet( - old_tablet, tmp_allocator, tmp_tablet_meta_write_ctxs, tmp_sstable_meta_write_ctxs, new_handle))) { + } else if (OB_FAIL(persister.persist_and_fill_tablet(old_tablet, linked_writer, + total_write_ctxs, new_handle, space_usage, tablet_macro_info))) { LOG_WARN("fail to persist old empty shell", K(ret), K(old_tablet)); } @@ -390,41 +672,28 @@ int ObTabletPersister::transform_empty_shell(const ObTablet &old_tablet, ObTable } int ObTabletPersister::check_tablet_meta_ids( - const common::ObIArray &tablet_meta_write_ctxs, + const ObBlockInfoArray &meta_id_arr, const ObTablet &tablet) { - TIMEGUARD_INIT(STORAGE, 10_ms); int ret = OB_SUCCESS; ObSArray meta_ids; - ObSArray ctx_ids; - for (int64_t i = 0; OB_SUCC(ret) && i < tablet_meta_write_ctxs.count(); i++) { - if (OB_FAIL(ObTablet::parse_meta_addr(tablet_meta_write_ctxs.at(i).addr_, ctx_ids))) { - LOG_WARN("fail to parse meta addr", K(ret), K(tablet_meta_write_ctxs.at(i).addr_)); - } - } - if (CLICK_FAIL(ret)) { - // do nothing - } else if (CLICK_FAIL(tablet.get_tablet_meta_ids(meta_ids))) { + if (OB_FAIL(tablet.get_tablet_first_second_level_meta_ids(meta_ids))) { LOG_WARN("fail to get tablet meta ids", K(ret), K(tablet)); - } else if (meta_ids.count() != ctx_ids.count()) { + } else if (OB_UNLIKELY(meta_ids.count() > meta_id_arr.cnt_)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("tablet meta's macro ids don't match", K(ret), K(meta_ids.count()), K(ctx_ids.count())); + LOG_WARN("num of macro blocks doesn't match", K(ret), K(meta_ids.count()), K(meta_id_arr.cnt_)); } else { - CLICK(); - for (int64_t i = 0; OB_SUCC(ret) && i < ctx_ids.count(); i++) { - for (int64_t j = 0; OB_SUCC(ret) && j < meta_ids.count(); j++) { - if (meta_ids.at(j) == ctx_ids.at(i)) { - if (OB_FAIL(meta_ids.remove(j))) { - LOG_WARN("fail to remove id from array", K(ret), K(ctx_ids.at(i))); - } else { - break; - } - } - if (OB_SUCC(ret) && j == meta_ids.count() - 1) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("tablet meta's macro ids don't match", K(ret), K(ctx_ids.at(i))); + bool found = false; + for (int64_t i = 0; OB_SUCC(ret) && i < meta_ids.count(); i++) { + for (int64_t j = 0; !found && j < meta_id_arr.cnt_; j++) { + if (meta_ids.at(i) == meta_id_arr.arr_[j]) { + found = true; } } + if (OB_UNLIKELY(!found)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet meta macro block doesn't match", K(ret)); + } } } return ret; @@ -455,37 +724,76 @@ int ObTabletPersister::acquire_tablet( return ret; } -int ObTabletPersister::persist_4k_tablet(common::ObArenaAllocator &allocator, ObTabletHandle &new_handle) +int ObTabletPersister::persist_aggregated_meta( + const ObTabletMacroInfo &tablet_macro_info, + ObTabletHandle &new_handle, + ObTabletSpaceUsage &space_usage) { - TIMEGUARD_INIT(STORAGE, 10_ms); int ret = OB_SUCCESS; ObTablet *new_tablet = new_handle.get_obj(); ObTenantCheckpointSlogHandler *ckpt_slog_handler = MTL(ObTenantCheckpointSlogHandler*); - common::ObSEArray write_infos; - const int64_t ctx_id = share::is_reserve_mode() - ? ObCtxIds::MERGE_RESERVE_CTX_ID - : ObCtxIds::DEFAULT_CTX_ID; - write_infos.set_attr(lib::ObMemAttr(MTL_ID(), "WriteInfos", ctx_id)); - + ObSharedBlockWriteInfo write_info; ObSharedBlockWriteHandle handle; ObSharedBlocksWriteCtx write_ctx; - if (CLICK_FAIL(fill_write_info(allocator, new_tablet, write_infos))) { + const int64_t secondary_meta_size = tablet_macro_info.get_serialize_size(); + MacroBlockId macro_id; + int64_t offset = 0; + int64_t size = 0; + if (OB_FAIL(fill_tablet_write_info(allocator_, new_tablet, tablet_macro_info, write_info))) { LOG_WARN("fail to fill write info", K(ret), KPC(new_tablet)); - } else if (CLICK_FAIL(ckpt_slog_handler->get_shared_block_reader_writer().async_write(write_infos.at(0), handle))) { - LOG_WARN("fail to async write", K(ret), "write_info", write_infos.at(0)); - } else if (CLICK_FAIL(handle.get_write_ctx(write_ctx))) { + } else if (OB_FAIL(ckpt_slog_handler->get_shared_block_raw_reader_writer().async_write(write_info, handle))) { + LOG_WARN("fail to async write", K(ret), "write_info", write_info); + } else if (OB_FAIL(handle.get_write_ctx(write_ctx))) { LOG_WARN("fail to batch get address", K(ret), K(handle)); } else if (FALSE_IT(new_tablet->set_tablet_addr(write_ctx.addr_))) { - } else if (CLICK_FAIL(new_tablet->inc_macro_ref_cnt())) { + } else if (OB_FAIL(write_ctx.addr_.get_block_addr(macro_id, offset, size))) { + LOG_WARN("fail to get block addr", K(ret), K(write_ctx)); + } else if (OB_FAIL(new_tablet->set_macro_info_addr(macro_id, offset + (size - secondary_meta_size), secondary_meta_size, ObMetaDiskAddr::DiskType::RAW_BLOCK))) { + LOG_WARN("fail to set macro info addr", K(ret), K(macro_id), K(offset), K(size), K(secondary_meta_size)); + } else if (OB_FAIL(new_tablet->inc_macro_ref_with_macro_info(tablet_macro_info))) { LOG_WARN("fail to increase macro ref cnt for new tablet", K(ret), KPC(new_tablet)); + } else { + space_usage.shared_meta_size_ += upper_align(write_ctx.addr_.size(), DIO_READ_ALIGN_SIZE); + new_tablet->tablet_meta_.space_usage_ = space_usage; } return ret; } -int ObTabletPersister::convert_arg_to_tablet( - const ObTabletTransformArg &arg, - ObTablet &tablet, - ObArenaAllocator &allocator) +int ObTabletPersister::fill_tablet_write_info( + common::ObArenaAllocator &allocator, + const ObTablet *tablet, + const ObTabletMacroInfo &tablet_macro_info, + ObSharedBlockWriteInfo &write_info) +{ + int ret = OB_SUCCESS; + ObInlineSecondaryMeta inline_meta(&tablet_macro_info, ObSecondaryMetaType::TABLET_MACRO_INFO); + ObSArray meta_arr; + + if (OB_ISNULL(tablet) || OB_UNLIKELY(!tablet_macro_info.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), KPC(tablet), K(tablet_macro_info)); + } else if (OB_FAIL(meta_arr.push_back(inline_meta))) { + LOG_WARN("fail to push back inline meta", K(ret), K(inline_meta)); + } else { + const int64_t size = tablet->get_serialize_size(meta_arr); + char *buf = static_cast(allocator.alloc(size)); + int64_t pos = 0; + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory for tablet serialize", K(ret), K(size)); + } else if (OB_FAIL(tablet->serialize(buf, size, pos, meta_arr))) { + LOG_WARN("fail to serialize tablet", K(ret), KPC(tablet), K(inline_meta), K(size), K(pos)); + } else { + write_info.buffer_ = buf; + write_info.offset_ = 0; + write_info.size_ = size; + write_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_COMPACT_WRITE); + } + } + return ret; +} + +int ObTabletPersister::convert_arg_to_tablet(const ObTabletTransformArg &arg, ObTablet &tablet) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!arg.is_valid())) { @@ -500,6 +808,7 @@ int ObTabletPersister::convert_arg_to_tablet( } else { tablet.table_store_addr_.addr_ = arg.table_store_addr_; tablet.storage_schema_addr_.addr_ = arg.storage_schema_addr_; + tablet.macro_info_addr_.addr_ = arg.tablet_macro_info_addr_; tablet.mds_data_.tablet_status_.uncommitted_kv_.addr_ = arg.tablet_status_uncommitted_kv_addr_; tablet.mds_data_.tablet_status_.committed_kv_.addr_ = arg.tablet_status_committed_kv_addr_; tablet.mds_data_.aux_tablet_info_.uncommitted_kv_.addr_ = arg.aux_tablet_info_uncommitted_kv_addr_; @@ -512,19 +821,18 @@ int ObTabletPersister::convert_arg_to_tablet( return ret; } -int ObTabletPersister::transform( - const ObTabletTransformArg &arg, - char *buf, - const int64_t len) +int ObTabletPersister::transform(const ObTabletTransformArg &arg, char *buf, const int64_t len) { - TIMEGUARD_INIT(STORAGE, 10_ms); int ret = OB_SUCCESS; ObTablet *tiny_tablet = reinterpret_cast(buf); - ObArenaAllocator allocator(common::ObMemAttr(MTL_ID(), "TmpPullMemTbl")); + ObMultiTimeStats::TimeStats *time_stats = nullptr; + if (len <= sizeof(ObTablet) || OB_ISNULL(buf)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), KP(buf), K(len)); - } else if (OB_FAIL(convert_arg_to_tablet(arg, *tiny_tablet, allocator))) { + } else if (OB_FAIL(multi_stats_.acquire_stats("transform", time_stats))) { + LOG_WARN("fail to acquire time stats", K(ret)); + } else if (OB_FAIL(convert_arg_to_tablet(arg, *tiny_tablet))) { LOG_WARN("fail to convert arg to tablet", K(ret), K(arg.tablet_meta_)); } else { // buf related @@ -540,7 +848,7 @@ int ObTabletPersister::transform( if (OB_UNLIKELY(remain < rowkey_read_info_size)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet memory buffer not enough for rowkey read info", K(ret), K(remain), K(rowkey_read_info_size)); - } else if (CLICK_FAIL(arg.rowkey_read_info_ptr_->deep_copy( + } else if (OB_FAIL(arg.rowkey_read_info_ptr_->deep_copy( buf + start_pos, remain, tiny_tablet->rowkey_read_info_))) { LOG_WARN("fail to deep copy rowkey read info to tablet", K(ret), KPC(arg.rowkey_read_info_ptr_)); } else if (OB_ISNULL(tiny_tablet->rowkey_read_info_)) { @@ -574,6 +882,7 @@ int ObTabletPersister::transform( // table store related ObTabletTableStore *table_store = nullptr; if (OB_SUCC(ret)) { + time_stats->click("before_load_table_store"); if (arg.table_store_addr_.is_none()) { void *ptr = nullptr; if (OB_ISNULL(ptr = allocator.alloc(sizeof(ObTabletTableStore)))) { @@ -581,29 +890,37 @@ int ObTabletPersister::transform( LOG_WARN("fail to allocate a buffer", K(ret), "sizeof", sizeof(ObTabletTableStore)); } else { table_store = new (ptr) ObTabletTableStore(); - if (CLICK_FAIL(table_store->init(allocator, *tiny_tablet))) { + if (OB_FAIL(table_store->init(allocator, *tiny_tablet))) { LOG_WARN("fail to init table store", K(ret), K(*tiny_tablet)); + } else { + time_stats->click("init_table_store"); } } - } else if (CLICK_FAIL(load_table_store(allocator, *tiny_tablet, arg.table_store_addr_, table_store))) { + } else if (OB_FAIL(load_table_store(allocator, *tiny_tablet, arg.table_store_addr_, table_store))) { LOG_WARN("fail to load table store", K(ret), KPC(tiny_tablet), K(arg.table_store_addr_)); + } else { + time_stats->click("load_table_store"); } } + int64_t remain_size_before_cache_table_store = 0; + int64_t table_store_size = 0; if (OB_SUCC(ret)) { - int64_t table_store_size = table_store->get_deep_copy_size(); + remain_size_before_cache_table_store = remain; + table_store_size = table_store->get_deep_copy_size(); if (OB_LIKELY((remain - table_store_size) >= 0)) { - if (CLICK_FAIL(table_store->batch_cache_sstable_meta(allocator, remain - table_store_size))) { + if (OB_FAIL(table_store->batch_cache_sstable_meta(allocator, remain - table_store_size))) { LOG_WARN("fail to batch cache sstable meta", K(ret), K(remain), K(table_store_size)); } else { ObIStorageMetaObj *table_store_obj = nullptr; table_store_size = table_store->get_deep_copy_size(); - if (CLICK_FAIL(table_store->deep_copy(buf + start_pos, remain, table_store_obj))) { + if (OB_FAIL(table_store->deep_copy(buf + start_pos, remain, table_store_obj))) { LOG_WARN("fail to deep copy table store v2", K(ret), K(table_store)); } else if (OB_ISNULL(table_store_obj)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected nullptr for rowkey table store deep copy", K(ret), K(table_store_obj)); } else { + time_stats->click("cache_table_store"); tiny_tablet->table_store_addr_.ptr_ = static_cast(table_store_obj); remain -= table_store_size; start_pos += table_store_size; @@ -625,9 +942,10 @@ int ObTabletPersister::transform( ObIStorageMetaObj *auto_inc_obj = nullptr; const int auto_inc_seq_size = arg.auto_inc_seq_ptr_->get_deep_copy_size(); if (OB_LIKELY((remain - auto_inc_seq_size) > 0)) { - if(CLICK_FAIL(arg.auto_inc_seq_ptr_->deep_copy(buf + start_pos, remain, auto_inc_obj))) { + if(OB_FAIL(arg.auto_inc_seq_ptr_->deep_copy(buf + start_pos, remain, auto_inc_obj))) { LOG_WARN("fail to deep copy auto inc seq", K(ret), K(arg.auto_inc_seq_ptr_)); } else { + time_stats->click("cache_auto_inc_seq"); tiny_tablet->mds_data_.auto_inc_seq_.ptr_ = static_cast(auto_inc_obj); remain -= auto_inc_seq_size; start_pos += auto_inc_seq_size; @@ -638,14 +956,42 @@ int ObTabletPersister::transform( } } } + + // id_array related + if (OB_SUCC(ret)) { + LOG_INFO("TINY TABLET: tablet + rowkey_read_info + tablet store + auto_inc_seq", KP(buf), K(start_pos), K(remain)); + ObTabletMacroInfo *tablet_macro_info_obj = nullptr; + if (OB_ISNULL(arg.tablet_macro_info_ptr_)) { + // no need to prefetch id_array, since we only need it when recycling tablet + } else { + int64_t tablet_macro_info_size = arg.tablet_macro_info_ptr_->get_deep_copy_size(); + if (remain >= tablet_macro_info_size) { + if (OB_FAIL(arg.tablet_macro_info_ptr_->deep_copy(buf + start_pos, remain, tablet_macro_info_obj))) { + LOG_WARN("fail to deep copy block id array", K(ret)); + } else { + time_stats->click("cache_macro_info"); + tiny_tablet->macro_info_addr_.ptr_ = tablet_macro_info_obj; + remain -= tablet_macro_info_size; + start_pos += tablet_macro_info_size; + } + } else { + LOG_DEBUG("TINY TABLET: no enough memory for tablet macro info", K(rowkey_read_info_size), K(remain), K(tablet_macro_info_size)); + } + } + } + if (OB_SUCC(ret)) { if (OB_FAIL(tiny_tablet->table_store_cache_.init(table_store->get_major_sstables(), table_store->get_minor_sstables(), arg.is_row_store_))) { LOG_WARN("failed to init table store cache", K(ret), KPC(table_store), K(arg)); } else { + time_stats->click("init_table_store_cache"); tiny_tablet->is_inited_ = true; } + LOG_DEBUG("succeed to transform", "tablet_id", tiny_tablet->tablet_meta_.tablet_id_, + KPC(tiny_tablet->table_store_addr_.ptr_), K(tiny_tablet->macro_info_addr_), + "tablet_buf_len", len, K(remain_size_before_cache_table_store), K(table_store_size), KPC(arg.tablet_macro_info_ptr_)); } } return ret; @@ -655,7 +1001,8 @@ int ObTabletPersister::batch_write_sstable_info( common::ObIArray &write_infos, common::ObIArray &write_ctxs, common::ObIArray &addrs, - common::ObIArray &meta_write_ctxs) + common::ObIArray &meta_write_ctxs, + ObBlockInfoSet &block_info_set) { int ret = OB_SUCCESS; ObSharedBlockBatchHandle handle; @@ -669,24 +1016,61 @@ int ObTabletPersister::batch_write_sstable_info( LOG_WARN("fail to batch get addr", K(ret), K(handle)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < write_ctxs.count(); ++i) { - if (OB_UNLIKELY(!write_ctxs.at(i).is_valid())) { + ObSharedBlocksWriteCtx &write_ctx = write_ctxs.at(i); + if (OB_UNLIKELY(!write_ctx.is_valid())) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected invalid addr", K(ret), K(i), K(write_ctxs.at(i))); - } else if (OB_FAIL(addrs.push_back(write_ctxs.at(i).addr_))) { - LOG_WARN("fail to push sstable addr to array", K(ret), K(i), K(write_ctxs.at(i))); - } else if (OB_FAIL(meta_write_ctxs.push_back(write_ctxs.at(i)))) { - LOG_WARN("fail to push write ctxs to array", K(ret), K(i), K(write_ctxs.at(i))); + LOG_WARN("unexpected invalid addr", K(ret), K(i), K(write_ctx)); + } else if (OB_FAIL(addrs.push_back(write_ctx.addr_))) { + LOG_WARN("fail to push sstable addr to array", K(ret), K(i), K(write_ctx)); + } else if (OB_FAIL(meta_write_ctxs.push_back(write_ctx))) { + LOG_WARN("fail to push write ctxs to array", K(ret), K(i), K(write_ctx)); + } else if (OB_FAIL(block_info_set.shared_meta_block_info_set_.set_refactored(write_ctx.addr_.block_id(), 0 /*whether to overwrite*/))) { + if (OB_HASH_EXIST != ret) { + LOG_WARN("fail to push macro id into set", K(ret), K(i), K(write_ctx)); + } else { + ret = OB_SUCCESS; + } } } } return ret; } +int ObTabletPersister::convert_macro_info_map(SharedMacroMap &shared_macro_map, ObBlockInfoSet::TabletMacroMap &aggregated_info_map) +{ + int ret = OB_SUCCESS; + ObSharedBlockIndex shared_blk_index; + int64_t occupy_size = 0; + int64_t accumulated_size = 0; + for (SharedMacroIterator iter = shared_macro_map.begin(); OB_SUCC(ret) && iter != shared_macro_map.end(); ++iter) { + shared_blk_index = iter->first; + occupy_size = iter->second; + accumulated_size = 0; + if (OB_FAIL(aggregated_info_map.get_refactored(shared_blk_index.shared_macro_id_, accumulated_size))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("fail to get accumulated size", K(ret), K(shared_blk_index)); + } else { + ret = OB_SUCCESS; + } + } + if (OB_SUCC(ret) && OB_FAIL(aggregated_info_map.set_refactored( + shared_blk_index.shared_macro_id_, + accumulated_size + occupy_size, + 1/*whether to overwrite*/))) { + LOG_WARN("fail to update aggregated info map", K(ret), K(shared_blk_index), K(accumulated_size), K(occupy_size)); + } + } + return ret; +} + int ObTabletPersister::fetch_and_persist_co_sstable( common::ObArenaAllocator &allocator, ObCOSSTableV2 *co_sstable, common::ObIArray &meta_write_ctxs, - common::ObIArray &cg_addrs) + common::ObIArray &cg_addrs, + int64_t &total_tablet_meta_size, + ObBlockInfoSet &block_info_set, + SharedMacroMap &shared_macro_map) { int ret = OB_SUCCESS; common::ObSEArray cg_write_ctxs; @@ -704,7 +1088,7 @@ int ObTabletPersister::fetch_and_persist_co_sstable( LOG_WARN("get invalid arguments", K(ret), KPC(co_sstable)); } else if (FALSE_IT(total_size = co_sstable->get_serialize_size())) { } else if (total_size < SSTABLE_MAX_SERIALIZE_SIZE) { - // do noting + // do nothing } else { ObSSTableArray &cg_sstables = co_sstable->get_cg_sstables(); ObSSTable *cg_sstable = nullptr; @@ -713,29 +1097,39 @@ int ObTabletPersister::fetch_and_persist_co_sstable( ObSSTablePersistWrapper wrapper(cg_sstable); if (OB_FAIL(fill_write_info(allocator, &wrapper, cg_write_infos))) { LOG_WARN("failed to fill sstable write info", K(ret)); + } else if (OB_FAIL(copy_sstable_macro_info(*cg_sstable, shared_macro_map, block_info_set))) { + LOG_WARN("fail to call sstable macro info", K(ret)); } } - ObCOSSTableV2 *tmp_co_sstable = nullptr; if (OB_FAIL(ret)) { - } else if (0 < cg_write_infos.count() && - OB_FAIL(batch_write_sstable_info(cg_write_infos, cg_write_ctxs, cg_addrs, meta_write_ctxs))) { + } else if (0 < cg_write_infos.count() + && OB_FAIL(batch_write_sstable_info(cg_write_infos, cg_write_ctxs, cg_addrs, meta_write_ctxs, block_info_set))) { LOG_WARN("failed to batch write sstable", K(ret)); } else if (OB_UNLIKELY(cg_addrs.count() != cg_sstables.count())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected cg addrs count", K(ret), K(cg_addrs.count()), K(cg_sstables.count())); + } else { + int64_t sstable_meta_size = 0; + for (int64_t i = 0; i < cg_addrs.count(); i++) { + sstable_meta_size += cg_addrs.at(i).size(); + } + total_tablet_meta_size += upper_align(sstable_meta_size, DIO_READ_ALIGN_SIZE); } } + return ret; } int ObTabletPersister::fetch_and_persist_sstable( - common::ObArenaAllocator &allocator, ObTableStoreIterator &table_iter, ObTabletTableStore &new_table_store, - common::ObIArray &meta_write_ctxs) + common::ObIArray &meta_write_ctxs, + int64_t &total_tablet_meta_size, + ObBlockInfoSet &block_info_set) { int ret = OB_SUCCESS; + SharedMacroMap shared_macro_map; common::ObSEArray tables; common::ObSEArray addrs; common::ObSEArray cg_addrs; @@ -752,36 +1146,86 @@ int ObTabletPersister::fetch_and_persist_sstable( ObArenaAllocator tmp_allocator("PersistSSTable", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID(), ctx_id); ObITable *table = nullptr; + ObMultiTimeStats::TimeStats *time_stats = nullptr; + int32_t large_co_sstable_cnt = 0; + int32_t small_co_sstable_cnt = 0; + int32_t normal_sstable_cnt = 0; + int32_t cg_sstable_cnt = 0; + + if (OB_FAIL(multi_stats_.acquire_stats("fetch_and_persist_sstable", time_stats))) { + LOG_WARN("fail to acquire stats", K(ret)); + } else if (OB_FAIL(shared_macro_map.create(ObTablet::SHARED_MACRO_BUCKET_CNT, "ObBlockInfoMap", "SharedBlkNode", MTL_ID()))) { + LOG_WARN("fail to create shared macro map", K(ret)); + } while (OB_SUCC(ret) && OB_SUCC(table_iter.get_next(table))) { if (OB_ISNULL(table) || OB_UNLIKELY(!table->is_sstable())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected error, table is nullptr", K(ret), KPC(table)); } else if (table->is_co_sstable() && table->get_serialize_size() > SSTABLE_MAX_SERIALIZE_SIZE) { + large_co_sstable_cnt++; + ObCOSSTableV2 *co_sstable = static_cast(table); + cg_sstable_cnt += co_sstable->get_cg_sstables().count(); // serialize full co sstable and shell cg sstables when the serialize size of CO reached the limit. FLOG_INFO("cannot full serialize CO within 2MB buffer, should serialize CO with Shell CG", K(ret), KPC(table)); - ObCOSSTableV2 *co_sstable = static_cast(table); cg_addrs.reset(); tmp_allocator.reuse(); ObCOSSTableV2 *tmp_co_sstable = nullptr; - if (OB_FAIL(fetch_and_persist_co_sstable(allocator, co_sstable, meta_write_ctxs, cg_addrs))) { + if (OB_FAIL(fetch_and_persist_co_sstable( + allocator_, co_sstable, meta_write_ctxs, cg_addrs, total_tablet_meta_size, block_info_set, shared_macro_map))) { LOG_WARN("fail to persist co sstable", K(ret)); - } else if (co_sstable->deep_copy(tmp_allocator, cg_addrs, tmp_co_sstable)) { + } else if (OB_FAIL(co_sstable->deep_copy(tmp_allocator, cg_addrs, tmp_co_sstable))) { LOG_WARN("failed to deep copy co sstable", K(ret), KPC(co_sstable)); } else { ObSSTablePersistWrapper wrapper(tmp_co_sstable); - if (OB_FAIL(fill_write_info(allocator, &wrapper, write_infos))) { + if (OB_FAIL(fill_write_info(allocator_, &wrapper, write_infos))) { LOG_WARN("failed to fill sstable write info", K(ret)); } else if (OB_FAIL(tables.push_back(tmp_co_sstable))) { LOG_WARN("failed to add table", K(ret)); + } else if (OB_FAIL(copy_sstable_macro_info(*tmp_co_sstable, shared_macro_map, block_info_set))) { + LOG_WARN("fail to call sstable macro info", K(ret)); } } } else { - ObSSTablePersistWrapper wrapper(static_cast(table)); - if (OB_FAIL(fill_write_info(allocator, &wrapper, write_infos))) { - LOG_WARN("failed to fill sstable write info", K(ret)); - } else if (OB_FAIL(tables.push_back(table))) { - LOG_WARN("failed to add table", K(ret)); + // Statistics the number of macroblocks of cg sstables + int64_t cg_sstable_meta_size = 0; + if (table->is_co_sstable()) { + small_co_sstable_cnt++; + ObCOSSTableV2 *co_sstable = static_cast(table); + ObSSTableArray &cg_sstables = co_sstable->get_cg_sstables(); + cg_sstable_cnt += cg_sstables.count(); + ObSSTable *cg_sstable = nullptr; + for (int64_t idx = 0; OB_SUCC(ret) && idx < cg_sstables.count(); ++idx) { + cg_sstable = cg_sstables[idx]; + const ObMetaDiskAddr &sstable_addr = cg_sstable->get_addr(); + if (OB_FAIL(copy_sstable_macro_info(*cg_sstable, shared_macro_map, block_info_set))) { + LOG_WARN("fail to call sstable macro info", K(ret)); + } else if (sstable_addr.is_block()) { + // this cg sstable has been persisted before + cg_sstable_meta_size += sstable_addr.size(); + if (OB_FAIL(block_info_set.shared_meta_block_info_set_.set_refactored(sstable_addr.block_id(), 0 /*whether to overwrite*/))) { + if (OB_HASH_EXIST != ret) { + LOG_WARN("fail to push macro id into set", K(ret), K(sstable_addr)); + } else { + ret = OB_SUCCESS; + } + } + } + } + } + + if (OB_SUCC(ret)) { + total_tablet_meta_size += upper_align(cg_sstable_meta_size, DIO_READ_ALIGN_SIZE); + normal_sstable_cnt++; + ObSSTable *sstable = static_cast(table); + ObSSTablePersistWrapper wrapper(static_cast(table)); + if (OB_FAIL(fill_write_info(allocator_, &wrapper, write_infos))) { + LOG_WARN("failed to fill sstable write info", K(ret)); + } else if (OB_FAIL(tables.push_back(table))) { + LOG_WARN("fail to push back sstable address", K(ret), K(tables)); + } else if (OB_FAIL(copy_sstable_macro_info(*sstable, shared_macro_map, block_info_set))) { + LOG_WARN("fail to call sstable macro info", K(ret)); + } } } } @@ -789,21 +1233,137 @@ int ObTabletPersister::fetch_and_persist_sstable( ret = OB_SUCCESS; } if (OB_FAIL(ret)) { - } else if (write_infos.count() > 0 && - OB_FAIL(batch_write_sstable_info(write_infos, write_ctxs, addrs, meta_write_ctxs))) { + } else if (OB_FAIL(time_stats->set_extra_info("%s:%ld,%s:%ld,%s:%ld,%s:%ld", + "large_co_sst_cnt", large_co_sstable_cnt, "small_co_sst_cnt", small_co_sstable_cnt, + "normal_sst_cnt", normal_sstable_cnt, "cg_sst_cnt", cg_sstable_cnt))) { + LOG_WARN("fail to set time stats extra info", K(ret)); + } else if (FALSE_IT(time_stats->click("fill_all_sstable_write_info"))) { + } else if (OB_FAIL(convert_macro_info_map(shared_macro_map, block_info_set.shared_data_block_info_map_))) { + LOG_WARN("fail to convert shared data block info map", K(ret)); + } else if (write_infos.count() > 0 + && OB_FAIL(batch_write_sstable_info(write_infos, write_ctxs, addrs, meta_write_ctxs, block_info_set))) { LOG_WARN("failed to batch write sstable", K(ret)); - } else if (OB_FAIL(new_table_store.init(allocator, tables, addrs))) { + } else if (FALSE_IT(time_stats->click("batch_write_sstable_info"))) { + } else if (OB_FAIL(new_table_store.init(allocator_, tables, addrs))) { LOG_WARN("fail to init new table store", K(ret), K(tables), K(addrs)); } else { - FLOG_INFO("success to init new table store", K(ret), K(new_table_store)); // tmp debug log, remove later + time_stats->click("init_new_table_store"); + int64_t sstable_meta_size = 0; + for (int64_t i = 0; i < addrs.count(); i++) { + sstable_meta_size += addrs.at(i).size(); + } + total_tablet_meta_size += upper_align(sstable_meta_size, DIO_READ_ALIGN_SIZE); } return ret; } +int ObTabletPersister::copy_sstable_macro_info(const ObSSTable &sstable, + SharedMacroMap &shared_macro_map, + ObBlockInfoSet &block_info_set) +{ + int ret = OB_SUCCESS; + ObSSTableMetaHandle meta_handle; + if (OB_FAIL(sstable.get_meta(meta_handle))) { + LOG_WARN("fail to get sstable meta handle", K(ret), K(sstable)); + } else if (sstable.is_small_sstable() && OB_FAIL(copy_shared_macro_info( + meta_handle.get_sstable_meta().get_macro_info(), + shared_macro_map, + block_info_set.meta_block_info_set_))) { + LOG_WARN("fail to copy shared macro's info", K(ret), K(meta_handle.get_sstable_meta().get_macro_info())); + } else if (!sstable.is_small_sstable() + && OB_FAIL(copy_data_macro_ids(meta_handle.get_sstable_meta().get_macro_info(), block_info_set))) { + LOG_WARN("fail to copy tablet's data macro ids", K(ret), K(meta_handle.get_sstable_meta().get_macro_info())); + } + return ret; +} + +int ObTabletPersister::copy_shared_macro_info( + const blocksstable::ObSSTableMacroInfo ¯o_info, + SharedMacroMap &shared_macro_map, + ObBlockInfoSet::TabletMacroSet &meta_id_set) +{ + int ret = OB_SUCCESS; + ObMacroIdIterator iter; + MacroBlockId macro_id; + if (OB_FAIL(macro_info.get_data_block_iter(iter))) { + LOG_WARN("fail to get data block iterator", K(ret)); + } else if (OB_FAIL(iter.get_next_macro_id(macro_id))) { + LOG_WARN("fail to get shared macro id", K(ret), K(iter)); + } else { + ObSharedBlockIndex block_idx(macro_id, macro_info.get_nested_offset()); + if (OB_FAIL(shared_macro_map.set_refactored(block_idx, macro_info.get_nested_size(), 0/*whether to overwrite*/))) { + if (OB_HASH_EXIST != ret) { + LOG_WARN("fail to push shared macro info into map", K(ret), K(macro_id), K(macro_info)); + } else { + ret = OB_SUCCESS; + } + } + } + iter.reset(); + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(macro_info.get_other_block_iter(iter))) { + LOG_WARN("fail to get other block iterator", K(ret)); + } else if (OB_FAIL(do_copy_ids(iter, meta_id_set))) { + LOG_WARN("fail to copy other block ids", K(ret)); + } + return ret; +} + +int ObTabletPersister::copy_data_macro_ids( + const blocksstable::ObSSTableMacroInfo ¯o_info, + ObBlockInfoSet &block_info_set) +{ + int ret = OB_SUCCESS; + ObMacroIdIterator iter; + MacroBlockId macro_id; + + if (OB_FAIL(macro_info.get_data_block_iter(iter))) { + LOG_WARN("fail to get data block iterator", K(ret)); + } else if (OB_FAIL(do_copy_ids(iter, block_info_set.data_block_info_set_))) { + LOG_WARN("fail to copy data block ids", K(ret), K(iter)); + } else if (FALSE_IT(iter.reset())) { + } else if (OB_FAIL(macro_info.get_other_block_iter(iter))) { + LOG_WARN("fail to get other block iterator", K(ret)); + } else if (OB_FAIL(do_copy_ids(iter, block_info_set.meta_block_info_set_))) { + LOG_WARN("fail to copy other block ids", K(ret), K(iter)); + } else if (FALSE_IT(iter.reset())) { + } else if (OB_FAIL(macro_info.get_linked_block_iter(iter))) { + LOG_WARN("fail to get linked block iterator", K(ret)); + } else if (OB_FAIL(do_copy_ids(iter, block_info_set.meta_block_info_set_))) { + LOG_WARN("fail to copy linked block ids", K(ret), K(iter)); + } + return ret; +} + +int ObTabletPersister::do_copy_ids( + blocksstable::ObMacroIdIterator &iter, + ObBlockInfoSet::TabletMacroSet &id_set) +{ + int ret = OB_SUCCESS; + MacroBlockId macro_id; + while (OB_SUCC(ret)) { + if (OB_FAIL(iter.get_next_macro_id(macro_id))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("fail to get next macro id", K(ret), K(macro_id)); + } + } else if (OB_FAIL(id_set.set_refactored(macro_id, 0 /*whether to overwrite*/))) { + if (OB_HASH_EXIST != ret) { + LOG_WARN("fail to push macro id into set", K(ret), K(macro_id)); + } else { + ret = OB_SUCCESS; + } + } + } + return OB_ITER_END == ret ? OB_SUCCESS : ret; +} + int ObTabletPersister::write_and_fill_args( const common::ObIArray &write_infos, ObTabletTransformArg &arg, - common::ObIArray &meta_write_ctxs) + common::ObIArray &total_write_ctxs, + int64_t &total_tablet_meta_size, + ObBlockInfoSet::TabletMacroSet &meta_block_id_set) { int ret = OB_SUCCESS; ObTenantCheckpointSlogHandler *ckpt_slog_handler = MTL(ObTenantCheckpointSlogHandler*); @@ -855,14 +1415,30 @@ int ObTabletPersister::write_and_fill_args( if (OB_UNLIKELY(!write_ctx.is_valid())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected write ctx", K(ret), K(i), K(write_ctx), K(handle)); - } else if (OB_FAIL(meta_write_ctxs.push_back(write_ctx))) { + } else if (OB_FAIL(total_write_ctxs.push_back(write_ctx))) { LOG_WARN("fail to push write ctx to array", K(ret), K(i), K(write_ctx)); - } else { + } else if (OB_FAIL(meta_block_id_set.set_refactored(write_ctx.addr_.block_id(), 0 /*whether to overwrite*/))) { + if (OB_HASH_EXIST != ret) { + LOG_WARN("fail to push macro id into set", K(ret), K(write_ctx.addr_)); + } else { + ret = OB_SUCCESS; + } + } + if (OB_SUCC(ret)) { *addr[i] = write_ctx.addr_; } } } } + if (OB_SUCC(ret)) { + int64_t tmp_meta_size = 0; + for (int64_t i = 0; i < total_addr_cnt; i++) { + if (!addr[i]->is_none()) { + tmp_meta_size += addr[i]->size(); + } + } + total_tablet_meta_size += upper_align(tmp_meta_size, DIO_READ_ALIGN_SIZE); + } return ret; } @@ -896,7 +1472,9 @@ int ObTabletPersister::load_medium_info_list_and_write( common::ObArenaAllocator &allocator, const ObTabletComplexAddr &complex_addr, common::ObIArray &meta_write_ctxs, - ObMetaDiskAddr &addr) + ObMetaDiskAddr &addr, + int64_t &total_tablet_meta_size, + ObBlockInfoSet::TabletMacroSet &meta_block_id_set) { int ret = OB_SUCCESS; ObTabletDumpedMediumInfo *medium_info_list = nullptr; @@ -906,7 +1484,7 @@ int ObTabletPersister::load_medium_info_list_and_write( } else if (nullptr == medium_info_list) { addr.set_none_addr(); } else { - if (OB_FAIL(link_write_medium_info_list(medium_info_list, meta_write_ctxs, addr))) { + if (OB_FAIL(link_write_medium_info_list(medium_info_list, meta_write_ctxs, addr, total_tablet_meta_size, meta_block_id_set))) { LOG_WARN("failed to link write medium info list", K(ret)); } } @@ -919,7 +1497,9 @@ int ObTabletPersister::load_medium_info_list_and_write( int ObTabletPersister::link_write_medium_info_list( const ObTabletDumpedMediumInfo *medium_info_list, common::ObIArray &meta_write_ctxs, - ObMetaDiskAddr &addr) + ObMetaDiskAddr &addr, + int64_t &total_tablet_meta_size, + ObBlockInfoSet::TabletMacroSet &meta_block_id_set) { int ret = OB_SUCCESS; ObTenantCheckpointSlogHandler *ckpt_slog_handler = MTL(ObTenantCheckpointSlogHandler*); @@ -927,6 +1507,7 @@ int ObTabletPersister::link_write_medium_info_list( common::ObArenaAllocator arena_allocator(common::ObMemAttr(MTL_ID(), "serializer")); ObSharedBlockWriteInfo write_info; ObSharedBlockLinkHandle write_handle; + int64_t tmp_meta_size = 0; if (nullptr == medium_info_list) { // no need to do link write, just return NONE addr @@ -962,6 +1543,8 @@ int ObTabletPersister::link_write_medium_info_list( } else if (OB_UNLIKELY(!write_handle.is_valid())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected error, write handle is invalid", K(ret), K(write_handle)); + } else { + tmp_meta_size += upper_align(size, DIO_READ_ALIGN_SIZE); } } @@ -985,7 +1568,20 @@ int ObTabletPersister::link_write_medium_info_list( } else if (OB_FAIL(meta_write_ctxs.push_back(write_ctx))) { LOG_WARN("failed to push back write ctx", K(ret), K(write_ctx)); } else { + for (int64_t i = 0; OB_SUCC(ret) && i < write_ctx.block_ids_.count(); i++) { + const MacroBlockId &block_id = write_ctx.block_ids_.at(i); + if (OB_FAIL(meta_block_id_set.set_refactored(block_id, 0 /*whether to overwrite*/))) { + if (OB_HASH_EXIST != ret) { + LOG_WARN("fail to push macro id into set", K(ret), K(write_ctx.addr_)); + } else { + ret = OB_SUCCESS; + } + } + } + } + if (OB_SUCC(ret)) { addr = write_ctx.addr_; + total_tablet_meta_size += tmp_meta_size; } } } @@ -1015,18 +1611,24 @@ int ObTabletPersister::load_table_store( int64_t io_pos = 0; ObSharedBlockReadInfo read_info; ObSharedBlockReadHandle io_handle(io_allocator); + ObMultiTimeStats::TimeStats *time_stats = nullptr; + read_info.addr_ = addr; read_info.io_desc_.set_mode(ObIOMode::READ); read_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_DATA_READ); - if (OB_FAIL(ObSharedBlockReaderWriter::async_read(read_info, io_handle))) { + if (OB_FAIL(multi_stats_.acquire_stats("load_table_store", time_stats))) { + LOG_WARN("fail to acquire stats", K(ret)); + } else if (OB_FAIL(ObSharedBlockReaderWriter::async_read(read_info, io_handle))) { LOG_WARN("fail to async read", K(ret), K(read_info)); } else if (OB_FAIL(io_handle.wait())) { LOG_WARN("fail to wait io_hanlde", K(ret), K(read_info)); + } else if (FALSE_IT(time_stats->click("read_io"))) { } else if (OB_FAIL(io_handle.get_data(io_allocator, io_buf, buf_len))) { LOG_WARN("fail to get data", K(ret), K(read_info)); } else if (OB_FAIL(tmp_store->deserialize(allocator, tablet, io_buf, buf_len, io_pos))) { LOG_WARN("fail to deserialize table store", K(ret), K(tablet), KP(io_buf), K(buf_len)); } else { + time_stats->click("deserialize_table_store"); table_store = tmp_store; LOG_DEBUG("succeed to load table store", K(ret), K(addr), KPC(table_store), K(tablet)); } @@ -1042,43 +1644,60 @@ int ObTabletPersister::transform_tablet_memory_footprint( int ret = OB_SUCCESS; ObTabletMemberWrapper auto_inc_seq; ObTabletTransformArg arg; - if (OB_UNLIKELY(!old_tablet.hold_ref_cnt_)) { + ObTabletPersister persister; + ObMultiTimeStats::TimeStats *time_stats = nullptr; + if (OB_FAIL(persister.multi_stats_.acquire_stats("transform_tablet_memory_footprint", time_stats))) { + LOG_WARN("fail to acquire stats", K(ret)); + } else if (OB_UNLIKELY(!old_tablet.hold_ref_cnt_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("old tablet doesn't hold ref cnt", K(ret), K(old_tablet)); } else if (OB_FAIL(convert_tablet_to_mem_arg(old_tablet, auto_inc_seq, arg))) { LOG_WARN("fail to convert tablet to mem arg", K(ret), K(arg), KP(buf), K(len), K(old_tablet)); - } else if (OB_FAIL(transform(arg, buf, len))) { + } else if (FALSE_IT(time_stats->click("convert_tablet_to_mem_arg"))) { + } else if (OB_FAIL(persister.transform(arg, buf, len))) { LOG_WARN("fail to transform tablet", K(ret), K(arg), KP(buf), K(len), K(old_tablet)); } else { + time_stats->click("transform"); ObTablet *tablet = reinterpret_cast(buf); tablet->set_next_tablet_guard(old_tablet.next_tablet_guard_); tablet->set_tablet_addr(old_tablet.get_tablet_addr()); tablet->hold_ref_cnt_ = old_tablet.hold_ref_cnt_; + persister.print_time_stats(*time_stats, 20_ms, 1_s); } return ret; } int ObTabletPersister::fetch_table_store_and_write_info( const ObTablet &tablet, - common::ObArenaAllocator &allocator, ObTabletMemberWrapper &wrapper, common::ObIArray &write_infos, - common::ObIArray &meta_write_ctxs) + common::ObIArray &meta_write_ctxs, + int64_t &total_tablet_meta_size, + ObBlockInfoSet &block_info_set) { int ret = OB_SUCCESS; ObTabletTableStore new_table_store; + ObMultiTimeStats::TimeStats *time_stats = nullptr; const ObTabletTableStore *table_store = nullptr; ObTableStoreIterator table_iter; - if (OB_FAIL(tablet.fetch_table_store(wrapper))) { + if (OB_FAIL(multi_stats_.acquire_stats("persist_and_fill_tablet", time_stats))) { + LOG_WARN("fail to acquire time stats", K(ret)); + } else if (OB_FAIL(tablet.fetch_table_store(wrapper))) { LOG_WARN("fail to fetch table store", K(ret)); } else if (OB_FAIL(wrapper.get_member(table_store))) { LOG_WARN("fail to get table store from wrapper", K(ret), K(wrapper)); + } else if (FALSE_IT(time_stats->click("fetch_table_store"))) { } else if (OB_FAIL(table_store->get_all_sstable(table_iter))) { LOG_WARN("fail to get all sstable iterator", K(ret), KPC(table_store)); - } else if (OB_FAIL(fetch_and_persist_sstable(allocator, table_iter, new_table_store, meta_write_ctxs))) { + } else if (FALSE_IT(time_stats->click("get_all_sstable"))) { + } else if (OB_FAIL(fetch_and_persist_sstable( + table_iter, new_table_store, meta_write_ctxs, total_tablet_meta_size, block_info_set))) { LOG_WARN("fail to fetch and persist sstable", K(ret), K(table_iter)); - } else if (OB_FAIL(fill_write_info(allocator, &new_table_store, write_infos))) { + } else if (FALSE_IT(time_stats->click("fetch_and_persist_sstable"))) { + } else if (OB_FAIL(fill_write_info(allocator_, &new_table_store, write_infos))) { LOG_WARN("fail to fill table store write info", K(ret), K(new_table_store)); + } else { + time_stats->click("fill_write_info"); } return ret; } diff --git a/src/storage/tablet/ob_tablet_persister.h b/src/storage/tablet/ob_tablet_persister.h index be6daccf0f..afa13eab6a 100644 --- a/src/storage/tablet/ob_tablet_persister.h +++ b/src/storage/tablet/ob_tablet_persister.h @@ -25,8 +25,39 @@ namespace oceanbase { namespace storage { +struct ObBlockInfoSet; +class ObTabletMacroInfo; class ObCOSSTableV2; +struct ObSharedBlockIndex final +{ +public: + ObSharedBlockIndex() + : shared_macro_id_(), nested_offset_(0) + { + } + ObSharedBlockIndex(const blocksstable::MacroBlockId &shared_macro_id, const int64_t nested_offset) + : shared_macro_id_(shared_macro_id), nested_offset_(nested_offset) + { + } + ~ObSharedBlockIndex() + { + reset(); + } + void reset() + { + shared_macro_id_.reset(); + nested_offset_ = 0; + } + int hash(uint64_t &hash_val) const; + bool operator ==(const ObSharedBlockIndex &other) const; + + TO_STRING_KV(K_(shared_macro_id), K_(nested_offset)); +public: + blocksstable::MacroBlockId shared_macro_id_; + int64_t nested_offset_; +}; + class ObTabletTransformArg final { public: @@ -48,11 +79,14 @@ public: K_(extra_medium_info), K_(medium_info_list_addr), K_(auto_inc_seq_addr), + K_(tablet_macro_info_addr), + KP_(tablet_macro_info_ptr), K_(tablet_status_cache), K_(is_row_store)); public: const share::ObTabletAutoincSeq *auto_inc_seq_ptr_; const ObRowkeyReadInfo *rowkey_read_info_ptr_; + const ObTabletMacroInfo *tablet_macro_info_ptr_; ObTabletMeta tablet_meta_; ObMetaDiskAddr table_store_addr_; ObMetaDiskAddr storage_schema_addr_; @@ -63,11 +97,11 @@ public: compaction::ObExtraMediumInfo extra_medium_info_; ObMetaDiskAddr medium_info_list_addr_; ObMetaDiskAddr auto_inc_seq_addr_; + ObMetaDiskAddr tablet_macro_info_addr_; ObTabletCreateDeleteMdsUserData tablet_status_cache_; bool is_row_store_; ObITable **ddl_kvs_; int64_t ddl_kv_count_; - // memtable::ObIMemtable **memtables_; memtable::ObIMemtable *memtables_[MAX_MEMSTORE_CNT]; int64_t memtable_count_; // If you want to add new member, make sure all member is assigned in 2 convert function. @@ -75,7 +109,6 @@ public: // ObTabletPersister::convert_tablet_to_disk_arg }; - class ObSSTablePersistWrapper final { public: @@ -91,17 +124,83 @@ private: DISALLOW_COPY_AND_ASSIGN(ObSSTablePersistWrapper); }; +class ObITabletMetaModifier +{ +public: + ObITabletMetaModifier() = default; + virtual ~ObITabletMetaModifier() = default; + virtual int modify_tablet_meta(ObTabletMeta &meta) = 0; +}; + + +class ObMultiTimeStats +{ +public: + class TimeStats + { + public: + explicit TimeStats(const char *owner); + void click(const char *step_name); + ~TimeStats() {} + int64_t to_string(char *buf, const int64_t buf_len) const; + int set_extra_info(const char *fmt, ...); + int64_t get_total_time() const { return last_ts_ - start_ts_; } + + private: + static const int64_t MAX_CLICK_COUNT = 16; + static const int64_t MAX_EXTRA_INFO_LENGTH = 128; + const char *owner_; + int64_t start_ts_; + int64_t last_ts_; + const char *click_str_[MAX_CLICK_COUNT]; + int32_t click_[MAX_CLICK_COUNT]; + int32_t click_count_; + char extra_info_[MAX_EXTRA_INFO_LENGTH]; + bool has_extra_info_; + + DISALLOW_COPY_AND_ASSIGN(TimeStats); + }; + + explicit ObMultiTimeStats(ObArenaAllocator *allocator); + ~ObMultiTimeStats(); + int acquire_stats(const char *owner, TimeStats *&stats); + int64_t to_string(char *buf, const int64_t buf_len) const; + +private: + static const int64_t MAX_STATS_CNT = 16; + ObArenaAllocator *allocator_; + TimeStats *stats_; + int32_t stats_count_; + + DISALLOW_COPY_AND_ASSIGN(ObMultiTimeStats); +}; class ObTabletPersister final { public: - ObTabletPersister() = default; - ~ObTabletPersister() = default; + static const int64_t MAP_EXTEND_RATIO = 2; + typedef typename common::hash::ObHashMap, + common::hash::equal_to, + common::hash::SimpleAllocer::AllocType>, + common::hash::NormalPointer, + oceanbase::common::ObMalloc, + MAP_EXTEND_RATIO> SharedMacroMap; + typedef typename SharedMacroMap::iterator SharedMacroIterator; +public: + explicit ObTabletPersister(const int64_t ctx_id = 0); + ~ObTabletPersister(); // Persist the old tablet itself and all internal members, and transform it into a new tablet // from object pool. The old tablet can be allocated by allocator or from object pool. static int persist_and_transform_tablet( const ObTablet &old_tablet, ObTabletHandle &new_handle); + static int persist_and_transform_only_tablet_meta( + const ObTablet &old_tablet, + ObITabletMetaModifier &modifier, + ObTabletHandle &new_tablet); // copy from old tablet static int copy_from_old_tablet( const ObTablet &old_tablet, @@ -113,10 +212,25 @@ public: const ObTablet &old_tablet, char *buf, const int64_t len); + static int convert_macro_info_map(SharedMacroMap &shared_macro_map, ObBlockInfoSet::TabletMacroMap &aggregated_info_map); + static int copy_sstable_macro_info( + const blocksstable::ObSSTable &sstable, + SharedMacroMap &shared_macro_map, + ObBlockInfoSet &block_info_set); + static int copy_shared_macro_info( + const blocksstable::ObSSTableMacroInfo ¯o_info, + SharedMacroMap &shared_macro_map, + ObBlockInfoSet::TabletMacroSet &meta_id_set); + static int copy_data_macro_ids( + const blocksstable::ObSSTableMacroInfo ¯o_info, + ObBlockInfoSet &block_info_set); static int transform_empty_shell(const ObTablet &old_tablet, ObTabletHandle &new_handle); private: + static int do_copy_ids( + blocksstable::ObMacroIdIterator &iter, + ObBlockInfoSet::TabletMacroSet &id_set); static int check_tablet_meta_ids( - const common::ObIArray &tablet_meta_write_ctxs, + const ObBlockInfoArray &meta_id_arr, const ObTablet &tablet); static int acquire_tablet( const ObTabletPoolType &type, @@ -127,60 +241,64 @@ private: const ObTablet &tablet, ObTabletMemberWrapper &auto_inc_seq, ObTabletTransformArg &arg); - static int convert_tablet_to_disk_arg( - common::ObArenaAllocator &allocator, + int convert_tablet_to_disk_arg( const ObTablet &tablet, - common::ObIArray &tablet_meta_write_ctxs, - common::ObIArray &sstable_meta_write_ctxs, + common::ObIArray &total_write_ctxs, ObTabletPoolType &type, - ObTabletTransformArg &arg); + ObTabletTransformArg &arg, + int64_t &total_tablet_meta_size, + ObBlockInfoSet &block_info_set); static int convert_arg_to_tablet( const ObTabletTransformArg &arg, - ObTablet &tablet, - ObArenaAllocator &allocator); - static int transform( + ObTablet &tablet); + int transform( const ObTabletTransformArg &arg, char *buf, const int64_t len); - static int recursively_persist( + int persist_and_fill_tablet( const ObTablet &old_tablet, - common::ObArenaAllocator &allocator, - common::ObIArray &tablet_meta_write_ctxs, - common::ObIArray &sstable_meta_write_ctxs, - ObTabletHandle &new_handle); - static int persist_and_fill_tablet( + ObLinkedMacroBlockItemWriter &linked_writer, + common::ObIArray &total_write_ctxs, + ObTabletHandle &new_handle, + ObTabletSpaceUsage &space_usage, + ObTabletMacroInfo ¯o_info); + int modify_and_fill_tablet( const ObTablet &old_tablet, - common::ObArenaAllocator &allocator, - common::ObIArray &tablet_meta_write_ctxs, - common::ObIArray &sstable_meta_write_ctxs, + ObITabletMetaModifier &modifier, ObTabletHandle &new_handle); - static int fetch_and_persist_sstable( - common::ObArenaAllocator &allocator, + int fetch_and_persist_sstable( ObTableStoreIterator &table_iter, ObTabletTableStore &new_table_store, - common::ObIArray &meta_write_ctxs); + common::ObIArray &meta_write_ctxs, + int64_t &total_tablet_meta_size, + ObBlockInfoSet &block_info_set); static int fetch_and_persist_co_sstable( common::ObArenaAllocator &allocator, storage::ObCOSSTableV2 *co_sstable, common::ObIArray &meta_write_ctxs, - common::ObIArray &cg_addrs); + common::ObIArray &cg_addrs, + int64_t &total_tablet_meta_size, + ObBlockInfoSet &block_info_set, + SharedMacroMap &shared_macro_map); static int batch_write_sstable_info( common::ObIArray &write_infos, common::ObIArray &write_ctxs, common::ObIArray &addrs, - common::ObIArray &meta_write_ctxs); + common::ObIArray &meta_write_ctxs, + ObBlockInfoSet &block_info_set); static int load_auto_inc_seq_and_write_info( common::ObArenaAllocator &allocator, const ObTabletComplexAddr &complex_addr, const share::ObTabletAutoincSeq *&auto_inc_seq, common::ObIArray &write_infos, ObMetaDiskAddr &addr); - static int fetch_table_store_and_write_info( + int fetch_table_store_and_write_info( const ObTablet &tablet, - common::ObArenaAllocator &allocator, ObTabletMemberWrapper &wrapper, common::ObIArray &write_infos, - common::ObIArray &meta_write_ctxs); + common::ObIArray &meta_write_ctxs, + int64_t &total_tablet_meta_size, + ObBlockInfoSet &block_info_set); static int load_storage_schema_and_fill_write_info( const ObTablet &tablet, common::ObArenaAllocator &allocator, @@ -194,11 +312,15 @@ private: common::ObArenaAllocator &allocator, const ObTabletComplexAddr &complex_addr, common::ObIArray &meta_write_ctxs, - ObMetaDiskAddr &addr); + ObMetaDiskAddr &addr, + int64_t &total_tablet_meta_size, + ObBlockInfoSet::TabletMacroSet &meta_block_id_set); static int link_write_medium_info_list( const ObTabletDumpedMediumInfo *medium_info_list, common::ObIArray &meta_write_ctxs, - ObMetaDiskAddr &addr); + ObMetaDiskAddr &addr, + int64_t &total_tablet_meta_size, + ObBlockInfoSet::TabletMacroSet &meta_block_id_set); template static int fill_write_info( common::ObArenaAllocator &allocator, @@ -211,18 +333,36 @@ private: static int write_and_fill_args( const common::ObIArray &write_infos, ObTabletTransformArg &arg, - common::ObIArray &meta_write_ctxs); - static int persist_4k_tablet( + common::ObIArray &meta_write_ctxs, + int64_t &total_tablet_meta_size, + ObBlockInfoSet::TabletMacroSet &meta_block_id_set); + int persist_aggregated_meta( + const ObTabletMacroInfo &tablet_macro_info, + ObTabletHandle &new_handle, + ObTabletSpaceUsage &space_usage); + static int fill_tablet_write_info( common::ObArenaAllocator &allocator, - ObTabletHandle &new_handle); - static int load_table_store( + const ObTablet *tablet, + const ObTabletMacroInfo &tablet_macro_info, + ObSharedBlockWriteInfo &write_info); + int load_table_store( common::ObArenaAllocator &allocator, const ObTablet &tablet, const ObMetaDiskAddr &addr, ObTabletTableStore *&table_store); + void print_time_stats( + const ObMultiTimeStats::TimeStats &time_stats, + const int64_t stats_warn_threshold, + const int64_t print_interval); public: static const int64_t SSTABLE_MAX_SERIALIZE_SIZE = 1966080L; // 1.875MB + +private: + ObArenaAllocator allocator_; + ObMultiTimeStats multi_stats_; + + DISALLOW_COPY_AND_ASSIGN(ObTabletPersister); }; template diff --git a/src/storage/tablet/ob_tablet_space_usage.cpp b/src/storage/tablet/ob_tablet_space_usage.cpp new file mode 100644 index 0000000000..cece6adff2 --- /dev/null +++ b/src/storage/tablet/ob_tablet_space_usage.cpp @@ -0,0 +1,105 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include "storage/tablet/ob_tablet_space_usage.h" +#include "storage/tablet/ob_tablet_block_aggregated_info.h" + + +namespace oceanbase +{ +namespace storage +{ +int ObTabletSpaceUsage::serialize(char *buf, const int64_t buf_len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + int32_t length = get_serialize_size(); + int64_t new_pos = pos; + + if (OB_ISNULL(buf) || OB_UNLIKELY(buf_len <= 0) || OB_UNLIKELY(pos < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), KP(buf), K(buf_len), K(pos)); + } else if (OB_UNLIKELY(length > buf_len - pos)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("buffer is not enough", K(ret), K(length), K(buf_len), K(pos)); + } else if (OB_FAIL(serialization::encode_i32(buf, buf_len, new_pos, TABLET_SPACE_USAGE_INFO_VERSION))) { + LOG_WARN("fail to serialize version", K(ret), K(buf_len), K(new_pos)); + } else if (OB_FAIL(serialization::encode_i32(buf, buf_len, new_pos, length))) { + LOG_WARN("fail to serialize length", K(ret), K(buf_len), K(new_pos), K(length)); + } else if (OB_FAIL(serialization::encode_i64(buf, buf_len, new_pos, shared_data_size_))) { + LOG_WARN("fail to serialize shared_data_size_", K(ret), K(buf_len), K(new_pos), K(length), K(shared_data_size_)); + } else if (OB_FAIL(serialization::encode_i64(buf, buf_len, new_pos, data_size_))) { + LOG_WARN("fail to serialize data_size_", K(ret), K(buf_len), K(new_pos), K(length), K(data_size_)); + } else if (OB_FAIL(serialization::encode_i64(buf, buf_len, new_pos, shared_meta_size_))) { + LOG_WARN("fail to serialize shared_meta_size_", K(ret), K(buf_len), K(new_pos), K(length), K(shared_meta_size_)); + } else if (OB_FAIL(serialization::encode_i64(buf, buf_len, new_pos, meta_size_))) { + LOG_WARN("fail to serialize meta_size_", K(ret), K(buf_len), K(new_pos), K(length), K(meta_size_)); + } else if (OB_UNLIKELY(length != new_pos - pos)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("length doesn't match", K(ret), K(length), K(new_pos), K(pos)); + } else { + pos = new_pos; + } + return ret; +} + +int ObTabletSpaceUsage::deserialize(const char *buf, const int64_t data_len, int64_t &pos) +{ + int ret = OB_SUCCESS; + int64_t new_pos = pos; + int32_t length = 0; + int32_t version = -1; + + if (OB_ISNULL(buf) || OB_UNLIKELY(data_len <= 0) || OB_UNLIKELY(pos < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), KP(buf), K(data_len), K(pos)); + } else if (OB_FAIL(serialization::decode_i32(buf, data_len, new_pos, &version))) { + LOG_WARN("fail to deserialize version", K(ret), K(data_len), K(new_pos)); + } else if (OB_UNLIKELY(TABLET_SPACE_USAGE_INFO_VERSION != version)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("version doesn't match", K(ret), K(version)); + } else if (OB_UNLIKELY(serialization::decode_i32(buf, data_len, new_pos, &length))) { + LOG_WARN("fail to deserialize version", K(ret), K(data_len), K(new_pos), K(length)); + } else if (OB_UNLIKELY(length > data_len - pos)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("buffer is not enough", K(ret), K(data_len), K(pos), K(length)); + } else if (new_pos - pos < length && OB_FAIL(serialization::decode_i64(buf, data_len, new_pos, &shared_data_size_))) { + LOG_WARN("fail to deserialize shared_data_size_", K(ret), K(data_len), K(new_pos), K(length)); + } else if (new_pos - pos < length && OB_FAIL(serialization::decode_i64(buf, data_len, new_pos, &data_size_))) { + LOG_WARN("fail to deserialize data_size_", K(ret), K(data_len), K(new_pos), K(length)); + } else if (new_pos - pos < length && OB_FAIL(serialization::decode_i64(buf, data_len, new_pos, &shared_meta_size_))) { + LOG_WARN("fail to deserialize shared_meta_size_", K(ret), K(data_len), K(new_pos), K(length)); + } else if (new_pos - pos < length && OB_FAIL(serialization::decode_i64(buf, data_len, new_pos, &meta_size_))) { + LOG_WARN("fail to deserialize shared_meta_size_", K(ret), K(data_len), K(new_pos), K(length)); + } else if (OB_UNLIKELY(length != new_pos - pos)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("length doesn't match", K(ret), K(length), K(new_pos), K(pos)); + } else { + pos = new_pos; + } + return ret; +} + +int32_t ObTabletSpaceUsage::get_serialize_size() const +{ + int32_t len = 0; + len += serialization::encoded_length_i32(TABLET_SPACE_USAGE_INFO_VERSION); + len += serialization::encoded_length_i32(len); + len += serialization::encoded_length_i64(shared_data_size_); + len += serialization::encoded_length_i64(data_size_); + len += serialization::encoded_length_i64(shared_meta_size_); + len += serialization::encoded_length_i64(meta_size_); + return len; +} +} +} \ No newline at end of file diff --git a/src/storage/tablet/ob_tablet_space_usage.h b/src/storage/tablet/ob_tablet_space_usage.h new file mode 100644 index 0000000000..c8394ee36e --- /dev/null +++ b/src/storage/tablet/ob_tablet_space_usage.h @@ -0,0 +1,53 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_STORAGE_TABLET_OB_TABLET_SPACE_USAGE_H_ +#define OCEANBASE_STORAGE_TABLET_OB_TABLET_SPACE_USAGE_H_ + +#include "lib/utility/ob_print_utils.h" + +namespace oceanbase +{ +namespace storage +{ + +class ObTabletMacroInfo; +struct ObTabletSpaceUsage final +{ +public: + ObTabletSpaceUsage() + : shared_data_size_(0), data_size_(0), shared_meta_size_(0), meta_size_(0) + { + } + void reset() + { + shared_data_size_ = 0; + data_size_ = 0; + shared_meta_size_ = 0; + meta_size_ = 0; + } + TO_STRING_KV(K_(shared_data_size), K_(data_size), K_(shared_meta_size), K_(meta_size)); + int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; + int deserialize(const char *buf, const int64_t data_len, int64_t &pos); + int32_t get_serialize_size() const; +public: + static const int32_t TABLET_SPACE_USAGE_INFO_VERSION = 1; +public: + int64_t shared_data_size_; // shared (data block) size + int64_t data_size_; + int64_t shared_meta_size_; // shared (meta block) size + int64_t meta_size_; +}; +} +} + +#endif \ No newline at end of file diff --git a/src/storage/tablet/ob_tablet_table_store.cpp b/src/storage/tablet/ob_tablet_table_store.cpp index f3140e338a..25211c0fe1 100644 --- a/src/storage/tablet/ob_tablet_table_store.cpp +++ b/src/storage/tablet/ob_tablet_table_store.cpp @@ -1694,7 +1694,7 @@ int ObTabletTableStore::batch_cache_sstable_meta( LOG_WARN("invalid arguments", K(ret), K(remain_size)); } else if (OB_UNLIKELY(remain_size < sizeof(ObSSTableMeta))) { // The remain_size is too small to hold an sstable meta. - } else if (OB_FAIL(get_need_to_cache_sstables(meta_types, cache_keys, sstables))) { /*include cg sstable*/ + } else if (OB_FAIL(get_need_to_cache_sstables(meta_types, cache_keys, sstables))) { /*not include cg sstable*/ LOG_WARN("fail to get need to cache keys", K(ret)); } else if (OB_UNLIKELY(0 == cache_keys.count())) { } else if (OB_FAIL(OB_STORE_CACHE.get_storage_meta_cache().batch_get_meta_and_bypass_cache( @@ -1728,6 +1728,9 @@ int ObTabletTableStore::get_need_to_cache_sstables( return ret; } + +// *NOTE: not include cg sstable here, because the remain size of tablet_buffer is not enough to +// hold cg sstable now. int ObTabletTableStore::get_need_to_cache_sstables( const ObSSTableArray &sstable_array, common::ObIArray &meta_types, @@ -1740,16 +1743,6 @@ int ObTabletTableStore::get_need_to_cache_sstables( if (OB_ISNULL(sstable)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected error, sstable is nullptr", K(ret), KP(sstable)); - } else if (sstable->is_co_sstable()) { - ObCOSSTableV2 *co_sstable = static_cast(sstable); - if (co_sstable->is_empty_co_table()) { - // no cg sstable in empty co table, avoid to call this func recursively. - } else if (OB_FAIL(get_need_to_cache_sstables(co_sstable->get_cg_sstables(), meta_types, keys, sstables))) { - LOG_WARN("failed to get need to cache cg sstables", K(ret), KPC(co_sstable)); - } - } - - if (OB_FAIL(ret)) { } else if (sstable->is_loaded()) { // sstable is already loaded to memory } else { ObStorageMetaValue::MetaType meta_type = sstable->is_co_sstable() @@ -2557,6 +2550,7 @@ int64_t ObTabletTableStore::to_string(char *buf, const int64_t buf_len) const const ObSSTable *table = major_tables_[i]; J_OBJ_START(); J_KV(K(i), "addr", table->get_addr(), + "is_loaded", table->is_loaded(), "type", ObITable::get_table_type_name(table->get_key().table_type_), "tablet_id", table->get_key().tablet_id_, "scn_range", table->get_key().scn_range_, diff --git a/unittest/storage/test_tablet_pointer_map.cpp b/unittest/storage/test_tablet_pointer_map.cpp index f0b92cd4ec..10416973d3 100644 --- a/unittest/storage/test_tablet_pointer_map.cpp +++ b/unittest/storage/test_tablet_pointer_map.cpp @@ -414,13 +414,13 @@ TEST_F(TestMetaDiskAddr, test_meta_disk_address) ObMetaDiskAddr block_addr; ASSERT_TRUE(!block_addr.is_valid()); - ASSERT_EQ(OB_INVALID_ARGUMENT, block_addr.set_block_addr(macro_id, 0, sizeof(ObTablet))); + ASSERT_EQ(OB_INVALID_ARGUMENT, block_addr.set_block_addr(macro_id, 0, sizeof(ObTablet), ObMetaDiskAddr::DiskType::BLOCK)); macro_id.block_index_ = 100; - ASSERT_EQ(OB_INVALID_ARGUMENT, block_addr.set_block_addr(macro_id, -1, sizeof(ObTablet))); - ASSERT_EQ(OB_INVALID_ARGUMENT, block_addr.set_block_addr(macro_id, ObMetaDiskAddr::MAX_OFFSET + 10, sizeof(ObTablet))); - ASSERT_EQ(OB_INVALID_ARGUMENT, block_addr.set_block_addr(macro_id, 0, -1)); - ASSERT_EQ(OB_INVALID_ARGUMENT, block_addr.set_block_addr(macro_id, ObMetaDiskAddr::MAX_OFFSET + 10, ObMetaDiskAddr::MAX_SIZE + sizeof(ObTablet))); - ASSERT_EQ(OB_SUCCESS, block_addr.set_block_addr(macro_id, 0, sizeof(ObTablet))); + ASSERT_EQ(OB_INVALID_ARGUMENT, block_addr.set_block_addr(macro_id, -1, sizeof(ObTablet), ObMetaDiskAddr::DiskType::BLOCK)); + ASSERT_EQ(OB_INVALID_ARGUMENT, block_addr.set_block_addr(macro_id, ObMetaDiskAddr::MAX_OFFSET + 10, sizeof(ObTablet), ObMetaDiskAddr::DiskType::BLOCK)); + ASSERT_EQ(OB_INVALID_ARGUMENT, block_addr.set_block_addr(macro_id, 0, -1, ObMetaDiskAddr::DiskType::BLOCK)); + ASSERT_EQ(OB_INVALID_ARGUMENT, block_addr.set_block_addr(macro_id, ObMetaDiskAddr::MAX_OFFSET + 10, ObMetaDiskAddr::MAX_SIZE + sizeof(ObTablet), ObMetaDiskAddr::DiskType::BLOCK)); + ASSERT_EQ(OB_SUCCESS, block_addr.set_block_addr(macro_id, 0, sizeof(ObTablet), ObMetaDiskAddr::DiskType::BLOCK)); ASSERT_TRUE(block_addr.is_valid()); ASSERT_EQ(ObMetaDiskAddr::DiskType::BLOCK, block_addr.type_); ASSERT_EQ(macro_id.first_id_, block_addr.first_id_);