restore status needs to be merged during migration

This commit is contained in:
wxhwang
2023-08-14 15:42:30 +00:00
committed by ob-robot
parent 9b3f07d4ad
commit ddabe52cdb
7 changed files with 80 additions and 29 deletions

View File

@ -546,6 +546,7 @@ class ObString;
ACT(AFTER_JOIN_LEARNER_LIST,)\
ACT(BEFORE_TRANSFER_START_COMMIT,)\
ACT(STOP_PRIMARY_LS_THREAD,)\
ACT(TRANSFER_GET_BACKFILL_TABLETS_BEFORE,)\
ACT(MAX_DEBUG_SYNC_POINT,)
DECLARE_ENUM(ObDebugSyncPoint, debug_sync_point, OB_DEBUG_SYNC_POINT_DEF);

View File

@ -965,6 +965,8 @@ int ObStartMigrationTask::process()
LOG_WARN("failed to choose src", K(ret), KPC(ctx_));
} else if (OB_FAIL(build_ls_())) {
LOG_WARN("failed to build ls", K(ret), KPC(ctx_));
} else if (OB_FAIL(fill_restore_arg_if_needed_())) {
LOG_WARN("failed to fill restore arg", K(ret), KPC(ctx_));
} else {
#ifdef ERRSIM
if (OB_SUCC(ret)) {
@ -1544,6 +1546,33 @@ int ObStartMigrationTask::create_all_tablets_(
return ret;
}
int ObStartMigrationTask::fill_restore_arg_if_needed_()
{
// As the source log stream status can be ignored during transfer when log scn
// is before restore consistent scn. So, we should ensure consistent scn is
// valid when replaying transfer log during migration.
int ret = OB_SUCCESS;
ObLSHandle ls_handle;
ObLS *ls = nullptr;
ObLSRestoreStatus restore_status;
if (OB_FAIL(ObStorageHADagUtils::get_ls(ctx_->arg_.ls_id_, ls_handle))) {
LOG_WARN("failed to get ls", K(ret), KPC(ctx_));
} else if (OB_ISNULL(ls = ls_handle.get_ls())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", K(ret), KP(ls), KPC(ctx_));
} else if (OB_FAIL(ls->get_restore_status(restore_status))) {
LOG_WARN("failed to get restore status", K(ret), KPC(ls), KPC(ctx_));
} else if (!restore_status.is_in_restore()) {
// do nothing
} else if (OB_FAIL(ls->get_ls_restore_handler()->fill_restore_arg())) {
LOG_WARN("failed to fill restore arg", K(ret), KPC(ls), KPC(ctx_));
} else {
LOG_INFO("succeed fill restore arg during migration", "ls_id", ctx_->arg_.ls_id_, K(restore_status));
}
return ret;
}
int ObStartMigrationTask::inner_build_ls_with_old_rpc_()
{
int ret = OB_SUCCESS;

View File

@ -238,6 +238,7 @@ private:
int generate_tablets_migration_dag_();
int report_ls_meta_table_();
int choose_src_();
int fill_restore_arg_if_needed_();
int fetch_ls_info_(const uint64_t tenant_id, const share::ObLSID &ls_id,
const common::ObAddr &member_addr, obrpc::ObCopyLSInfo &ls_info);
int get_local_ls_checkpoint_scn_(share::SCN &local_checkpoint_scn);

View File

@ -85,6 +85,8 @@ int ObTransferWorkerMgr::get_need_backfill_tx_tablets_(ObTransferBackfillTXParam
bool in_migration = false;
ObLSRestoreStatus restore_status;
DEBUG_SYNC(TRANSFER_GET_BACKFILL_TABLETS_BEFORE);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("transfer work not init", K(ret));

View File

@ -425,7 +425,7 @@ int ObLSRestoreHandler::update_state_handle_()
LOG_WARN("fail to get_restore_status", K(ret), KPC(ls_));
} else if (nullptr != state_handler_
&& new_status == state_handler_->get_restore_status()) { // no need update state handler
} else if (OB_FAIL(fill_restore_arg_())) {
} else if (OB_FAIL(fill_restore_arg())) {
LOG_WARN("fail to fill restore arg", K(ret));
} else {
lib::ObMutexGuard guard(mtx_);
@ -1602,7 +1602,7 @@ int ObLSRestoreStartState::inc_need_restore_ls_cnt_()
return ret;
}
int ObLSRestoreHandler::fill_restore_arg_()
int ObLSRestoreHandler::fill_restore_arg()
{
int ret = OB_SUCCESS;
common::ObMySQLProxy *sql_proxy_ = GCTX.sql_proxy_;
@ -1620,6 +1620,7 @@ int ObLSRestoreHandler::fill_restore_arg_()
tenant_id, job_info))) {
LOG_WARN("fail to get restore job", K(ret), K(tenant_id));
} else {
lib::ObMutexGuard guard(mtx_);
ls_restore_arg_.job_id_ = job_info.get_job_id();
ls_restore_arg_.restore_type_ = share::ObRestoreType::NORMAL_RESTORE; // quick restore or normal restore
ls_restore_arg_.tenant_id_ = tenant_id;

View File

@ -93,6 +93,7 @@ public:
bool is_stop() { return is_stop_; }
int update_rebuild_seq();
int64_t get_rebuild_seq();
int fill_restore_arg();
private:
int cancel_task_();
int check_before_do_restore_(bool &can_do_restore);
@ -104,7 +105,6 @@ private:
template <typename T>
int construct_state_handler_(T *&new_handler);
int deal_failed_restore_();
int fill_restore_arg_();
private:
bool is_inited_;
bool is_stop_; // used by ls destory

View File

@ -361,34 +361,51 @@ int ObTabletMeta::init(
table_store_flag = OB_ISNULL(tablet_meta) ? table_store_flag : tablet_meta->table_store_flag_;
}
version_ = TABLET_META_VERSION;
ls_id_ = old_tablet_meta.ls_id_;
tablet_id_ = old_tablet_meta.tablet_id_;
data_tablet_id_ = old_tablet_meta.data_tablet_id_;
ref_tablet_id_ = old_tablet_meta.ref_tablet_id_;
create_scn_ = old_tablet_meta.create_scn_;
create_schema_version_ = old_tablet_meta.create_schema_version_;
start_scn_ = old_tablet_meta.start_scn_;
clog_checkpoint_scn_ = clog_checkpoint_scn;
snapshot_version_ = snapshot_version;
multi_version_start_ = multi_version_start;
compat_mode_ = old_tablet_meta.compat_mode_;
ha_status_ = old_tablet_meta.ha_status_;
report_status_ = old_tablet_meta.report_status_; //old tablet meta report status already reset
table_store_flag_ = table_store_flag;
ddl_checkpoint_scn_ = old_tablet_meta.ddl_checkpoint_scn_;
ddl_start_scn_ = old_tablet_meta.ddl_start_scn_;
ddl_commit_scn_ = old_tablet_meta.ddl_commit_scn_;
ddl_snapshot_version_ = old_tablet_meta.ddl_snapshot_version_;
max_sync_storage_schema_version_ = max_sync_storage_schema_version;
max_serialized_medium_scn_ = MAX(old_tablet_meta.max_serialized_medium_scn_,
OB_ISNULL(tablet_meta) ? 0 : tablet_meta->max_serialized_medium_scn_);
ddl_execution_id_ = old_tablet_meta.ddl_execution_id_;
ddl_data_format_version_ = old_tablet_meta.ddl_data_format_version_;
transfer_info_ = transfer_info;
mds_checkpoint_scn_ = old_tablet_meta.mds_checkpoint_scn_;
// fuse restore status during migration, consider the following timeline
// 1. SOURCE: tablet P0 was created with restore status FULL by replay start transfer in.
// 2. TARGET: rebuild was triggered, then create P0 with restore status FULL, and data status INCOMPLETE.
// 3. SOURCE: transfer handler modified the restore status of P0 to EMPTY.
// 4. SOURCE: the minor of P0 was restored by restore handler, then set the restore status to MINOR_AND_MAJOR_META.
// 5. TARGET: the minor of P0 was restored by migration, then set data status COMPLETE.
// The result is P0 was FULL, but only exist minor sstables, with no major.
ObTabletHAStatus new_ha_status = old_tablet_meta.ha_status_;
if (!old_tablet_meta.ha_status_.is_data_status_complete() && OB_NOT_NULL(tablet_meta)) {
ObTabletRestoreStatus::STATUS src_restore_status;
if (OB_FAIL(tablet_meta->ha_status_.get_restore_status(src_restore_status))) {
LOG_WARN("failed to get restore status", K(ret), KPC(tablet_meta));
} else if (OB_FAIL(new_ha_status.set_restore_status(src_restore_status))) {
LOG_WARN("failed to set new restore status", K(ret), K(new_ha_status), K(src_restore_status));
}
}
if (OB_SUCC(ret)) {
version_ = TABLET_META_VERSION;
ls_id_ = old_tablet_meta.ls_id_;
tablet_id_ = old_tablet_meta.tablet_id_;
data_tablet_id_ = old_tablet_meta.data_tablet_id_;
ref_tablet_id_ = old_tablet_meta.ref_tablet_id_;
create_scn_ = old_tablet_meta.create_scn_;
create_schema_version_ = old_tablet_meta.create_schema_version_;
start_scn_ = old_tablet_meta.start_scn_;
clog_checkpoint_scn_ = clog_checkpoint_scn;
snapshot_version_ = snapshot_version;
multi_version_start_ = multi_version_start;
compat_mode_ = old_tablet_meta.compat_mode_;
ha_status_ = new_ha_status;
report_status_ = old_tablet_meta.report_status_; //old tablet meta report status already reset
table_store_flag_ = table_store_flag;
ddl_checkpoint_scn_ = old_tablet_meta.ddl_checkpoint_scn_;
ddl_start_scn_ = old_tablet_meta.ddl_start_scn_;
ddl_commit_scn_ = old_tablet_meta.ddl_commit_scn_;
ddl_snapshot_version_ = old_tablet_meta.ddl_snapshot_version_;
max_sync_storage_schema_version_ = max_sync_storage_schema_version;
max_serialized_medium_scn_ = MAX(old_tablet_meta.max_serialized_medium_scn_,
OB_ISNULL(tablet_meta) ? 0 : tablet_meta->max_serialized_medium_scn_);
ddl_execution_id_ = old_tablet_meta.ddl_execution_id_;
ddl_data_format_version_ = old_tablet_meta.ddl_data_format_version_;
transfer_info_ = transfer_info;
mds_checkpoint_scn_ = old_tablet_meta.mds_checkpoint_scn_;
is_inited_ = true;
}
}