diff --git a/src/share/ob_debug_sync_point.h b/src/share/ob_debug_sync_point.h index ec6d6379c6..5a518a3d39 100755 --- a/src/share/ob_debug_sync_point.h +++ b/src/share/ob_debug_sync_point.h @@ -546,6 +546,7 @@ class ObString; ACT(AFTER_JOIN_LEARNER_LIST,)\ ACT(BEFORE_TRANSFER_START_COMMIT,)\ ACT(STOP_PRIMARY_LS_THREAD,)\ + ACT(TRANSFER_GET_BACKFILL_TABLETS_BEFORE,)\ ACT(MAX_DEBUG_SYNC_POINT,) DECLARE_ENUM(ObDebugSyncPoint, debug_sync_point, OB_DEBUG_SYNC_POINT_DEF); diff --git a/src/storage/high_availability/ob_ls_migration.cpp b/src/storage/high_availability/ob_ls_migration.cpp index 30a9875bab..64d0be0595 100644 --- a/src/storage/high_availability/ob_ls_migration.cpp +++ b/src/storage/high_availability/ob_ls_migration.cpp @@ -965,6 +965,8 @@ int ObStartMigrationTask::process() LOG_WARN("failed to choose src", K(ret), KPC(ctx_)); } else if (OB_FAIL(build_ls_())) { LOG_WARN("failed to build ls", K(ret), KPC(ctx_)); + } else if (OB_FAIL(fill_restore_arg_if_needed_())) { + LOG_WARN("failed to fill restore arg", K(ret), KPC(ctx_)); } else { #ifdef ERRSIM if (OB_SUCC(ret)) { @@ -1544,6 +1546,33 @@ int ObStartMigrationTask::create_all_tablets_( return ret; } +int ObStartMigrationTask::fill_restore_arg_if_needed_() +{ + // As the source log stream status can be ignored during transfer when log scn + // is before restore consistent scn. So, we should ensure consistent scn is + // valid when replaying transfer log during migration. + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + ObLSRestoreStatus restore_status; + if (OB_FAIL(ObStorageHADagUtils::get_ls(ctx_->arg_.ls_id_, ls_handle))) { + LOG_WARN("failed to get ls", K(ret), KPC(ctx_)); + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", K(ret), KP(ls), KPC(ctx_)); + } else if (OB_FAIL(ls->get_restore_status(restore_status))) { + LOG_WARN("failed to get restore status", K(ret), KPC(ls), KPC(ctx_)); + } else if (!restore_status.is_in_restore()) { + // do nothing + } else if (OB_FAIL(ls->get_ls_restore_handler()->fill_restore_arg())) { + LOG_WARN("failed to fill restore arg", K(ret), KPC(ls), KPC(ctx_)); + } else { + LOG_INFO("succeed fill restore arg during migration", "ls_id", ctx_->arg_.ls_id_, K(restore_status)); + } + + return ret; +} + int ObStartMigrationTask::inner_build_ls_with_old_rpc_() { int ret = OB_SUCCESS; diff --git a/src/storage/high_availability/ob_ls_migration.h b/src/storage/high_availability/ob_ls_migration.h index 054b9be032..4c0b44b98b 100644 --- a/src/storage/high_availability/ob_ls_migration.h +++ b/src/storage/high_availability/ob_ls_migration.h @@ -238,6 +238,7 @@ private: int generate_tablets_migration_dag_(); int report_ls_meta_table_(); int choose_src_(); + int fill_restore_arg_if_needed_(); int fetch_ls_info_(const uint64_t tenant_id, const share::ObLSID &ls_id, const common::ObAddr &member_addr, obrpc::ObCopyLSInfo &ls_info); int get_local_ls_checkpoint_scn_(share::SCN &local_checkpoint_scn); diff --git a/src/storage/high_availability/ob_transfer_backfill_tx.cpp b/src/storage/high_availability/ob_transfer_backfill_tx.cpp index 05acb89765..8e1ef838f7 100644 --- a/src/storage/high_availability/ob_transfer_backfill_tx.cpp +++ b/src/storage/high_availability/ob_transfer_backfill_tx.cpp @@ -85,6 +85,8 @@ int ObTransferWorkerMgr::get_need_backfill_tx_tablets_(ObTransferBackfillTXParam bool in_migration = false; ObLSRestoreStatus restore_status; + DEBUG_SYNC(TRANSFER_GET_BACKFILL_TABLETS_BEFORE); + if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("transfer work not init", K(ret)); diff --git a/src/storage/restore/ob_ls_restore_handler.cpp b/src/storage/restore/ob_ls_restore_handler.cpp index 600fe7dbc1..0c42e99ae4 100644 --- a/src/storage/restore/ob_ls_restore_handler.cpp +++ b/src/storage/restore/ob_ls_restore_handler.cpp @@ -425,7 +425,7 @@ int ObLSRestoreHandler::update_state_handle_() LOG_WARN("fail to get_restore_status", K(ret), KPC(ls_)); } else if (nullptr != state_handler_ && new_status == state_handler_->get_restore_status()) { // no need update state handler - } else if (OB_FAIL(fill_restore_arg_())) { + } else if (OB_FAIL(fill_restore_arg())) { LOG_WARN("fail to fill restore arg", K(ret)); } else { lib::ObMutexGuard guard(mtx_); @@ -1602,7 +1602,7 @@ int ObLSRestoreStartState::inc_need_restore_ls_cnt_() return ret; } -int ObLSRestoreHandler::fill_restore_arg_() +int ObLSRestoreHandler::fill_restore_arg() { int ret = OB_SUCCESS; common::ObMySQLProxy *sql_proxy_ = GCTX.sql_proxy_; @@ -1620,6 +1620,7 @@ int ObLSRestoreHandler::fill_restore_arg_() tenant_id, job_info))) { LOG_WARN("fail to get restore job", K(ret), K(tenant_id)); } else { + lib::ObMutexGuard guard(mtx_); ls_restore_arg_.job_id_ = job_info.get_job_id(); ls_restore_arg_.restore_type_ = share::ObRestoreType::NORMAL_RESTORE; // quick restore or normal restore ls_restore_arg_.tenant_id_ = tenant_id; diff --git a/src/storage/restore/ob_ls_restore_handler.h b/src/storage/restore/ob_ls_restore_handler.h index af8242f514..ee51cf5363 100644 --- a/src/storage/restore/ob_ls_restore_handler.h +++ b/src/storage/restore/ob_ls_restore_handler.h @@ -93,6 +93,7 @@ public: bool is_stop() { return is_stop_; } int update_rebuild_seq(); int64_t get_rebuild_seq(); + int fill_restore_arg(); private: int cancel_task_(); int check_before_do_restore_(bool &can_do_restore); @@ -104,7 +105,6 @@ private: template int construct_state_handler_(T *&new_handler); int deal_failed_restore_(); - int fill_restore_arg_(); private: bool is_inited_; bool is_stop_; // used by ls destory diff --git a/src/storage/tablet/ob_tablet_meta.cpp b/src/storage/tablet/ob_tablet_meta.cpp index 18bdf6e256..593643c6ac 100644 --- a/src/storage/tablet/ob_tablet_meta.cpp +++ b/src/storage/tablet/ob_tablet_meta.cpp @@ -361,34 +361,51 @@ int ObTabletMeta::init( table_store_flag = OB_ISNULL(tablet_meta) ? table_store_flag : tablet_meta->table_store_flag_; } - version_ = TABLET_META_VERSION; - ls_id_ = old_tablet_meta.ls_id_; - tablet_id_ = old_tablet_meta.tablet_id_; - data_tablet_id_ = old_tablet_meta.data_tablet_id_; - ref_tablet_id_ = old_tablet_meta.ref_tablet_id_; - create_scn_ = old_tablet_meta.create_scn_; - create_schema_version_ = old_tablet_meta.create_schema_version_; - start_scn_ = old_tablet_meta.start_scn_; - clog_checkpoint_scn_ = clog_checkpoint_scn; - snapshot_version_ = snapshot_version; - multi_version_start_ = multi_version_start; - compat_mode_ = old_tablet_meta.compat_mode_; - ha_status_ = old_tablet_meta.ha_status_; - report_status_ = old_tablet_meta.report_status_; //old tablet meta report status already reset - table_store_flag_ = table_store_flag; - ddl_checkpoint_scn_ = old_tablet_meta.ddl_checkpoint_scn_; - ddl_start_scn_ = old_tablet_meta.ddl_start_scn_; - ddl_commit_scn_ = old_tablet_meta.ddl_commit_scn_; - ddl_snapshot_version_ = old_tablet_meta.ddl_snapshot_version_; - max_sync_storage_schema_version_ = max_sync_storage_schema_version; - max_serialized_medium_scn_ = MAX(old_tablet_meta.max_serialized_medium_scn_, - OB_ISNULL(tablet_meta) ? 0 : tablet_meta->max_serialized_medium_scn_); - ddl_execution_id_ = old_tablet_meta.ddl_execution_id_; - ddl_data_format_version_ = old_tablet_meta.ddl_data_format_version_; - transfer_info_ = transfer_info; - mds_checkpoint_scn_ = old_tablet_meta.mds_checkpoint_scn_; + // fuse restore status during migration, consider the following timeline + // 1. SOURCE: tablet P0 was created with restore status FULL by replay start transfer in. + // 2. TARGET: rebuild was triggered, then create P0 with restore status FULL, and data status INCOMPLETE. + // 3. SOURCE: transfer handler modified the restore status of P0 to EMPTY. + // 4. SOURCE: the minor of P0 was restored by restore handler, then set the restore status to MINOR_AND_MAJOR_META. + // 5. TARGET: the minor of P0 was restored by migration, then set data status COMPLETE. + // The result is P0 was FULL, but only exist minor sstables, with no major. + ObTabletHAStatus new_ha_status = old_tablet_meta.ha_status_; + if (!old_tablet_meta.ha_status_.is_data_status_complete() && OB_NOT_NULL(tablet_meta)) { + ObTabletRestoreStatus::STATUS src_restore_status; + if (OB_FAIL(tablet_meta->ha_status_.get_restore_status(src_restore_status))) { + LOG_WARN("failed to get restore status", K(ret), KPC(tablet_meta)); + } else if (OB_FAIL(new_ha_status.set_restore_status(src_restore_status))) { + LOG_WARN("failed to set new restore status", K(ret), K(new_ha_status), K(src_restore_status)); + } + } if (OB_SUCC(ret)) { + version_ = TABLET_META_VERSION; + ls_id_ = old_tablet_meta.ls_id_; + tablet_id_ = old_tablet_meta.tablet_id_; + data_tablet_id_ = old_tablet_meta.data_tablet_id_; + ref_tablet_id_ = old_tablet_meta.ref_tablet_id_; + create_scn_ = old_tablet_meta.create_scn_; + create_schema_version_ = old_tablet_meta.create_schema_version_; + start_scn_ = old_tablet_meta.start_scn_; + clog_checkpoint_scn_ = clog_checkpoint_scn; + snapshot_version_ = snapshot_version; + multi_version_start_ = multi_version_start; + compat_mode_ = old_tablet_meta.compat_mode_; + ha_status_ = new_ha_status; + report_status_ = old_tablet_meta.report_status_; //old tablet meta report status already reset + table_store_flag_ = table_store_flag; + ddl_checkpoint_scn_ = old_tablet_meta.ddl_checkpoint_scn_; + ddl_start_scn_ = old_tablet_meta.ddl_start_scn_; + ddl_commit_scn_ = old_tablet_meta.ddl_commit_scn_; + ddl_snapshot_version_ = old_tablet_meta.ddl_snapshot_version_; + max_sync_storage_schema_version_ = max_sync_storage_schema_version; + max_serialized_medium_scn_ = MAX(old_tablet_meta.max_serialized_medium_scn_, + OB_ISNULL(tablet_meta) ? 0 : tablet_meta->max_serialized_medium_scn_); + ddl_execution_id_ = old_tablet_meta.ddl_execution_id_; + ddl_data_format_version_ = old_tablet_meta.ddl_data_format_version_; + transfer_info_ = transfer_info; + mds_checkpoint_scn_ = old_tablet_meta.mds_checkpoint_scn_; + is_inited_ = true; } }