From 27d3dcfc30ee5c3bbd1823efb4741483d6fb98a6 Mon Sep 17 00:00:00 2001 From: godyangfight Date: Thu, 7 Nov 2024 07:17:09 +0000 Subject: [PATCH] Fix transfer dest lost restore status flag bug. --- .../mtlenv/storage/test_ls_tablet_service.cpp | 63 +++++++++++++++ .../ob_storage_ha_struct.cpp | 78 +++++++++++++++++-- .../high_availability/ob_storage_ha_struct.h | 15 +++- .../ob_tablet_backfill_tx.cpp | 5 +- .../ob_transfer_backfill_tx.cpp | 24 +++++- src/storage/tablet/ob_tablet.cpp | 35 ++++++++- src/storage/tablet/ob_tablet.h | 1 + 7 files changed, 206 insertions(+), 15 deletions(-) diff --git a/mittest/mtlenv/storage/test_ls_tablet_service.cpp b/mittest/mtlenv/storage/test_ls_tablet_service.cpp index 44c142165..65d73b82a 100644 --- a/mittest/mtlenv/storage/test_ls_tablet_service.cpp +++ b/mittest/mtlenv/storage/test_ls_tablet_service.cpp @@ -1217,6 +1217,69 @@ TEST_F(TestLSTabletService, test_serialize_sstable_with_min_filled_tx_scn) } +TEST_F(TestLSTabletService, test_new_tablet_has_backup_table_with_ha_status) +{ + + //create tablet + int ret = OB_SUCCESS; + ObTabletID tablet_id(10000014); + share::schema::ObTableSchema schema; + TestSchemaUtils::prepare_data_schema(schema); + ObLSHandle ls_handle; + ObLSService *ls_svr = MTL(ObLSService*); + ObLS *ls = nullptr; + ret = ls_svr->get_ls(ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD); + ASSERT_EQ(OB_SUCCESS, ret); + ls = ls_handle.get_ls(); + ASSERT_EQ(true, OB_NOT_NULL(ls)); + + ret = TestTabletHelper::create_tablet(ls_handle, tablet_id, schema, allocator_, ObTabletStatus::Status::NORMAL); + ASSERT_EQ(OB_SUCCESS, ret); + + ObTabletHandle tablet_handle; + ObTablet *tablet = nullptr; + ret = ls_handle.get_ls()->get_tablet_svr()->get_tablet(tablet_id, tablet_handle, 0, ObMDSGetTabletMode::READ_WITHOUT_CHECK); + ASSERT_EQ(OB_SUCCESS, ret); + tablet = tablet_handle.get_obj(); + ASSERT_EQ(true, OB_NOT_NULL(tablet)); + + + //create backup sstable + blocksstable::ObSSTable sstable; + ObTabletCreateSSTableParam param; + TestTabletHelper::prepare_sstable_param(tablet_id, schema, param); + param.table_key_.table_type_ = ObITable::MINOR_SSTABLE; + param.filled_tx_scn_ = param.table_key_.get_end_scn(); + param.table_backup_flag_.set_has_backup(); + param.table_backup_flag_.set_no_local(); + ASSERT_EQ(OB_SUCCESS, sstable.init(param, &allocator_)); + + ObTableHandleV2 table_handle; + ret = table_handle.set_sstable(&sstable, &allocator_); + ASSERT_EQ(OB_SUCCESS, ret); + + ObTabletHandle new_table_handle; + const int64_t update_snapshot_version = sstable.get_snapshot_version(); + const int64_t update_multi_version_start = tablet->get_multi_version_start(); + ObStorageSchema *storage_schema = nullptr; + ret = tablet->load_storage_schema(allocator_, storage_schema); + ASSERT_EQ(OB_SUCCESS, ret); + ObBatchUpdateTableStoreParam update_table_store_param; + update_table_store_param.tablet_meta_ = nullptr; + update_table_store_param.rebuild_seq_ = ls->get_rebuild_seq(); + update_table_store_param.need_replace_remote_sstable_ = false; + ret = update_table_store_param.tables_handle_.add_table(table_handle); + ASSERT_EQ(OB_SUCCESS, ret); + + ret = ls_tablet_service_->build_tablet_with_batch_tables(tablet_id, update_table_store_param); + ASSERT_EQ(OB_ERR_UNEXPECTED, ret); + + tablet_handle.reset(); + new_table_handle.reset(); + ret = ls_tablet_service_->do_remove_tablet(ls_id_, tablet_id); + ASSERT_EQ(OB_SUCCESS, ret); +} + } // end storage } // end oceanbase diff --git a/src/storage/high_availability/ob_storage_ha_struct.cpp b/src/storage/high_availability/ob_storage_ha_struct.cpp index 13af65ec0..6ec4ef8f3 100644 --- a/src/storage/high_availability/ob_storage_ha_struct.cpp +++ b/src/storage/high_availability/ob_storage_ha_struct.cpp @@ -1605,7 +1605,8 @@ ObBackfillTabletsTableMgr::ObTabletTableMgr::ObTabletTableMgr() transfer_seq_(0), max_major_end_scn_(SCN::min_scn()), allocator_("Backfill"), - table_handle_array_() + table_handle_array_(), + restore_status_(ObTabletRestoreStatus::RESTORE_STATUS_MAX) { } @@ -1615,18 +1616,20 @@ ObBackfillTabletsTableMgr::ObTabletTableMgr::~ObTabletTableMgr() int ObBackfillTabletsTableMgr::ObTabletTableMgr::init( const common::ObTabletID &tablet_id, - const int64_t transfer_seq) + const int64_t transfer_seq, + const ObTabletRestoreStatus::STATUS &restore_status) { int ret = OB_SUCCESS; if (is_inited_) { ret = OB_INIT_TWICE; LOG_WARN("tablet table mgr init twice", K(ret)); - } else if (!tablet_id.is_valid() || transfer_seq < 0) { + } else if (!tablet_id.is_valid() || transfer_seq < 0 || !ObTabletRestoreStatus::is_valid(restore_status)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("init tablet table mgr get invalid argument", K(ret), K(tablet_id), K(transfer_seq)); + LOG_WARN("init tablet table mgr get invalid argument", K(ret), K(tablet_id), K(transfer_seq), K(restore_status)); } else { tablet_id_ = tablet_id; transfer_seq_ = transfer_seq; + restore_status_ = restore_status; is_inited_ = true; } return ret; @@ -1732,6 +1735,21 @@ int ObBackfillTabletsTableMgr::ObTabletTableMgr::get_max_major_end_scn( return ret; } +int ObBackfillTabletsTableMgr::ObTabletTableMgr::get_restore_status( + ObTabletRestoreStatus::STATUS &restore_status) +{ + int ret = OB_SUCCESS; + restore_status = ObTabletRestoreStatus::RESTORE_STATUS_MAX; + + if (!is_inited_) { + ret = OB_NOT_INIT; + LOG_WARN("tablet table mgr do not init", K(ret)); + } else { + restore_status = restore_status_; + } + return ret; +} + ObBackfillTabletsTableMgr::ObBackfillTabletsTableMgr() : is_inited_(false), lock_(), @@ -1844,15 +1862,18 @@ int ObBackfillTabletsTableMgr::get_tablet_all_sstables( return ret; } -int ObBackfillTabletsTableMgr::init_tablet_table_mgr(const common::ObTabletID &tablet_id, const int64_t transfer_seq) +int ObBackfillTabletsTableMgr::init_tablet_table_mgr( + const common::ObTabletID &tablet_id, + const int64_t transfer_seq, + const ObTabletRestoreStatus::STATUS &restore_status) { int ret = OB_SUCCESS; if (!is_inited_) { ret = OB_NOT_INIT; LOG_WARN("backfill tablets table mgr do not init", K(ret)); - } else if (!tablet_id.is_valid() || transfer_seq < 0) { + } else if (!tablet_id.is_valid() || transfer_seq < 0 || !ObTabletRestoreStatus::is_valid(restore_status)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("init tablet table mgr get invalid argument", K(ret), K(tablet_id), K(transfer_seq)); + LOG_WARN("init tablet table mgr get invalid argument", K(ret), K(tablet_id), K(transfer_seq), K(restore_status)); } else { common::SpinWLockGuard guard(lock_); ObTabletTableMgr *tablet_table_mgr = nullptr; @@ -1864,7 +1885,7 @@ int ObBackfillTabletsTableMgr::init_tablet_table_mgr(const common::ObTabletID &t ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to alloc memory", K(ret), KP(buf)); } else if (FALSE_IT(tablet_table_mgr = new (buf) ObTabletTableMgr())) { - } else if (OB_FAIL(tablet_table_mgr->init(tablet_id, transfer_seq))) { + } else if (OB_FAIL(tablet_table_mgr->init(tablet_id, transfer_seq, restore_status))) { LOG_WARN("failed to init tablet table mgr", K(ret), K(tablet_id)); } else if (OB_FAIL(map_.set_refactored(tablet_id, tablet_table_mgr))) { LOG_WARN("failed to set tablet table mgr into map", K(ret), K(tablet_id)); @@ -1985,6 +2006,47 @@ int ObBackfillTabletsTableMgr::get_local_rebuild_seq(int64_t &local_rebuild_seq) return ret; } +int ObBackfillTabletsTableMgr::get_restore_status( + const common::ObTabletID &tablet_id, ObTabletRestoreStatus::STATUS &restore_status) +{ + int ret = OB_SUCCESS; + restore_status = ObTabletRestoreStatus::RESTORE_STATUS_MAX; + if (!is_inited_) { + ret = OB_NOT_INIT; + LOG_WARN("backfill tablets table mgr do not init", K(ret)); + } else if (!tablet_id.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get restore status get invalid argument", K(ret), K(tablet_id)); + } else { + common::SpinRLockGuard guard(lock_); + ObTabletTableMgr *tablet_table_mgr = nullptr; + int hash_ret = map_.get_refactored(tablet_id, tablet_table_mgr); + if (OB_SUCCESS == hash_ret) { + if (OB_FAIL(tablet_table_mgr->get_restore_status(restore_status))) { + LOG_WARN("failed to set max major end scn", K(ret), K(tablet_id), K(restore_status)); + } + } else { + ret = hash_ret; + LOG_WARN("tablet table mgr do not exist", K(ret), K(tablet_id), K(restore_status)); + } + } + return ret; +} + +int ObBackfillTabletsTableMgr::get_transfer_scn(share::SCN &transfer_scn) +{ + int ret = OB_SUCCESS; + transfer_scn.reset(); + if (!is_inited_) { + ret = OB_NOT_INIT; + LOG_WARN("backfill tablets table mgr do not init", K(ret)); + } else { + common::SpinRLockGuard guard(lock_); + transfer_scn = transfer_start_scn_; + } + return ret; +} + /******************ObMacroBlcokReuseMgr*********************/ ObMacroBlockReuseMgr::ObMacroBlockReuseMgr() : is_inited_(false), diff --git a/src/storage/high_availability/ob_storage_ha_struct.h b/src/storage/high_availability/ob_storage_ha_struct.h index 897f252d5..5741735ff 100644 --- a/src/storage/high_availability/ob_storage_ha_struct.h +++ b/src/storage/high_availability/ob_storage_ha_struct.h @@ -26,6 +26,7 @@ #include "share/scheduler/ob_dag_scheduler_config.h" #include "share/rebuild_tablet/ob_rebuild_tablet_location.h" #include "common/ob_learner_list.h" +#include "storage/high_availability/ob_tablet_ha_status.h" namespace oceanbase { @@ -487,7 +488,10 @@ public: ObBackfillTabletsTableMgr(); ~ObBackfillTabletsTableMgr(); int init(const int64_t rebuild_seq, const share::SCN &transfer_start_scn); - int init_tablet_table_mgr(const common::ObTabletID &tablet_id, const int64_t transfer_seq); + int init_tablet_table_mgr( + const common::ObTabletID &tablet_id, + const int64_t transfer_seq, + const ObTabletRestoreStatus::STATUS &restore_status); int add_sstable( const common::ObTabletID &tablet_id, const int64_t rebuild_seq, @@ -505,6 +509,10 @@ public: const common::ObTabletID &tablet_id, share::SCN &max_major_end_scn); int get_local_rebuild_seq(int64_t &local_rebuild_seq); + int get_restore_status( + const common::ObTabletID &tablet_id, + ObTabletRestoreStatus::STATUS &restore_status); + int get_transfer_scn(share::SCN &transfer_scn); private: class ObTabletTableMgr final { @@ -513,7 +521,8 @@ private: ~ObTabletTableMgr(); int init( const common::ObTabletID &tablet_id, - const int64_t transfer_seq); + const int64_t transfer_seq, + const ObTabletRestoreStatus::STATUS &restore_status); int add_sstable( const int64_t transfer_seq, const share::SCN &transfer_start_scn, @@ -521,6 +530,7 @@ private: int get_all_sstables(ObTablesHandleArray &table_handle_array); int set_max_major_end_scn(const share::SCN &max_major_end_scn); int get_max_major_end_scn(share::SCN &max_major_end_scn); + int get_restore_status(ObTabletRestoreStatus::STATUS &restore_status); private: bool is_inited_; common::ObTabletID tablet_id_; @@ -528,6 +538,7 @@ private: share::SCN max_major_end_scn_; common::ObArenaAllocator allocator_; ObTablesHandleArray table_handle_array_; + ObTabletRestoreStatus::STATUS restore_status_; DISALLOW_COPY_AND_ASSIGN(ObTabletTableMgr); }; private: diff --git a/src/storage/high_availability/ob_tablet_backfill_tx.cpp b/src/storage/high_availability/ob_tablet_backfill_tx.cpp index 994883d50..f4132a196 100644 --- a/src/storage/high_availability/ob_tablet_backfill_tx.cpp +++ b/src/storage/high_availability/ob_tablet_backfill_tx.cpp @@ -860,6 +860,7 @@ int ObTabletBackfillTXTask::init_tablet_table_mgr_() ObTabletHandle tablet_handle; ObTablet *tablet = nullptr; int64_t transfer_seq = 0; + ObTabletRestoreStatus::STATUS restore_status = ObTabletRestoreStatus::RESTORE_STATUS_MAX; if (IS_NOT_INIT) { ret = OB_NOT_INIT; @@ -871,7 +872,9 @@ int ObTabletBackfillTXTask::init_tablet_table_mgr_() ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet should not be NULL", K(ret), K(tablet_info_)); } else if (FALSE_IT(transfer_seq = tablet->get_tablet_meta().transfer_info_.transfer_seq_)) { - } else if (OB_FAIL(tablets_table_mgr_->init_tablet_table_mgr(tablet_info_.tablet_id_, transfer_seq))) { + } else if (OB_FAIL(tablet->get_restore_status(restore_status))) { + LOG_WARN("failed to get restore status", K(ret), KPC(tablet)); + } else if (OB_FAIL(tablets_table_mgr_->init_tablet_table_mgr(tablet_info_.tablet_id_, transfer_seq, restore_status))) { LOG_WARN("failed to init tablet table mgr", K(ret), K(tablet_info_)); } return ret; diff --git a/src/storage/high_availability/ob_transfer_backfill_tx.cpp b/src/storage/high_availability/ob_transfer_backfill_tx.cpp index da3e3e29a..8edde073c 100644 --- a/src/storage/high_availability/ob_transfer_backfill_tx.cpp +++ b/src/storage/high_availability/ob_transfer_backfill_tx.cpp @@ -1485,7 +1485,8 @@ int ObTransferReplaceTableTask::get_source_tablet_tables_( } else if (OB_ISNULL(dest_tablet) || !tablet_info.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("tablet info is invalid", K(ret), K(tablet_info)); - } else if (FALSE_IT(transfer_scn = dest_tablet->get_tablet_meta().transfer_info_.transfer_start_scn_)) { + } else if (OB_FAIL(ctx_->tablets_table_mgr_.get_transfer_scn(transfer_scn))) { + LOG_WARN("failed to get transfer scn", K(ret), K(tablet_info)); } else if (OB_ISNULL(ls_service = MTL(ObLSService*))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed to get ObLSService from MTL", K(ret), KP(ls_service)); @@ -1525,10 +1526,27 @@ int ObTransferReplaceTableTask::get_source_tablet_tables_( ret = OB_EAGAIN; LOG_WARN("the transfer start transaction was rolledback and the task needs to be retried", K(ret), K(tablet_info), K(src_user_data)); } - } else if (OB_FAIL(tablet->get_tablet_meta().ha_status_.get_restore_status(restore_status))) { - LOG_WARN("failed to get tablet restore status", K(ret)); + } else if (src_user_data.transfer_scn_ != dest_tablet->get_tablet_meta().transfer_info_.transfer_start_scn_ + || src_user_data.transfer_scn_ != transfer_scn) { + if (tablet_info.is_committed_) { + ret = OB_TRANSFER_SYS_ERROR; + LOG_ERROR("transfer trans has committed but src and dest transfer scn is not same", K(ret), + KPC(ctx_), KPC(tablet), K(src_user_data), KPC(dest_tablet), K(transfer_scn)); + } else { + ret = OB_EAGAIN; + LOG_WARN("transfer scn is not equal to user data transfer scn, may transfer", K(ret), K(src_user_data), KPC(ctx_)); + //backfill tx ctx is batch context, log sync scn is for batch tablets which have same log sync scn + //single tablet log sync scn which is changed can not retry batch tablets task. + int tmp_ret = OB_SUCCESS; + const bool need_retry = false; + if (OB_SUCCESS != (tmp_ret = ctx_->set_result(ret, need_retry))) { + LOG_WARN("failed to set result", K(tmp_ret), K(ret), KPC(ctx_)); + } + } } else if (OB_FAIL(tablet->fetch_table_store(wrapper))) { LOG_WARN("fetch table store fail", K(ret), KP(tablet)); + } else if (OB_FAIL(ctx_->tablets_table_mgr_.get_restore_status(tablet->get_tablet_id(), restore_status))) { + LOG_WARN("failed to get restore status", K(ret), KPC(tablet)); } else if (OB_FAIL(ctx_->tablets_table_mgr_.get_tablet_all_sstables(tablet_info.tablet_id_, filled_table_handle_array))) { LOG_WARN("failed to get tablet all sstables", K(ret), K(tablet_info)); } else if (OB_FAIL(add_src_major_sstable_(tablet_info.tablet_id_, wrapper, filled_table_handle_array))) { diff --git a/src/storage/tablet/ob_tablet.cpp b/src/storage/tablet/ob_tablet.cpp index 74259e72b..122b3c5d8 100644 --- a/src/storage/tablet/ob_tablet.cpp +++ b/src/storage/tablet/ob_tablet.cpp @@ -8682,7 +8682,6 @@ int ObTablet::build_transfer_backfill_tablet_param( param.snapshot_version_ = src_tablet_meta.snapshot_version_; param.multi_version_start_ = src_tablet_meta.multi_version_start_; param.extra_medium_info_ = src_tablet_meta.extra_medium_info_; - if (OB_FAIL(param.storage_schema_.assign(param.allocator_, src_storage_schema))) { LOG_WARN("failed to assign src storage schema", K(ret), K(src_storage_schema)); } else { @@ -8735,11 +8734,45 @@ int ObTablet::check_table_store_flag_match_with_table_store_(const ObTabletTable LOG_ERROR("tablet table store flag is with major flag but tablet has no major sstable", K(ret), KPC(this), K(table_store_flag), KPC(table_store)); } else { + const ObSSTableArray &major_sstables = table_store->get_major_sstables(); + const ObSSTableArray &minor_sstables = table_store->get_minor_sstables(); + const ObSSTableArray &ddl_sstables = table_store->get_ddl_sstables(); + if (OB_FAIL(check_table_store_without_backup_table_(major_sstables))) { + LOG_WARN("failed to check major sstables", K(ret), KPC(this)); + } else if (OB_FAIL(check_table_store_without_backup_table_(minor_sstables))) { + LOG_WARN("failed to check minor sstables", K(ret), KPC(this)); + } else if (OB_FAIL(check_table_store_without_backup_table_(ddl_sstables))) { + LOG_WARN("failed to check ddl sstables", K(ret), KPC(this)); + } // check passed, do nothing } return ret; } +int ObTablet::check_table_store_without_backup_table_(const ObSSTableArray &sstable_array) +{ + int ret = OB_SUCCESS; + ObSSTableMetaHandle meta_handle; + + for (int64_t i = 0; OB_SUCC(ret) && i < sstable_array.count(); ++i) { + const ObSSTable *sstable = sstable_array.at(i); + meta_handle.reset(); + if (OB_ISNULL(sstable)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("sstable should not be NULL", K(ret), K(sstable_array), K(i)); + } else if (OB_FAIL(sstable->get_meta(meta_handle))) { + LOG_WARN("failed to get meta", K(ret), K(meta_handle)); + } else if (!meta_handle.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("sstable meta should not be invalid", K(ret), KPC(sstable)); + } else if (meta_handle.get_sstable_meta().get_table_backup_flag().has_backup()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet ha status is none but still has backup table, unexpected", K(ret), KPC(sstable), KPC(this)); + } + } + return ret; +} + int ObTablet::build_migration_shared_table_addr_( const ObRootBlockInfo &block_info, common::ObIAllocator &allocator, diff --git a/src/storage/tablet/ob_tablet.h b/src/storage/tablet/ob_tablet.h index a5d59df5e..f0fcdbafa 100644 --- a/src/storage/tablet/ob_tablet.h +++ b/src/storage/tablet/ob_tablet.h @@ -500,6 +500,7 @@ private: const ObDirectLoadType direct_load_type, ObITable::TableKey &table_key, bool &replay_normal_in_cs_replica); + int check_table_store_without_backup_table_(const ObSSTableArray &sstable_array); public: // other const ObMetaDiskAddr &get_tablet_addr() const { return tablet_addr_; }