From f5b64d55eb29a8a4a685a615d0a5033a599d9054 Mon Sep 17 00:00:00 2001 From: wxhwang Date: Fri, 17 Nov 2023 06:45:20 +0000 Subject: [PATCH] [CP] fix some tablets have not restored after tenant has restored successfully --- src/storage/restore/ob_ls_restore_handler.cpp | 20 ++++++-- .../restore/ob_ls_restore_task_mgr.cpp | 47 +++++++++++++------ 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/src/storage/restore/ob_ls_restore_handler.cpp b/src/storage/restore/ob_ls_restore_handler.cpp index be85fc480f..f4e2f125ee 100644 --- a/src/storage/restore/ob_ls_restore_handler.cpp +++ b/src/storage/restore/ob_ls_restore_handler.cpp @@ -1516,7 +1516,7 @@ int ObLSRestoreStartState::do_restore() LOG_WARN("fail to check ls created", K(ret), KPC(ls_)); } else if (!is_created) { if (OB_FAIL(do_with_uncreated_ls_())) { - LOG_WARN("fail to do with uncreadted ls", K(ret), KPC(ls_)); + LOG_WARN("fail to do with uncreated ls", K(ret), KPC(ls_)); } } else if (OB_FAIL(check_ls_leader_ready_(is_ready))) { LOG_WARN("fail to check is ls leader ready", K(ret), KPC(ls_)); @@ -1573,11 +1573,21 @@ int ObLSRestoreStartState::do_with_no_ls_meta_() int ret = OB_SUCCESS; // ls with no ls meta means it created after backup ls_attr_infos. // this ls doesn't have ls meta and tablet in backup, it only needs to replay clog. - // so just advance to restore to consistent_scn and start replay clog. - ObLSRestoreStatus next_status(ObLSRestoreStatus::Status::RESTORE_TO_CONSISTENT_SCN); + ObLSRestoreStatus next_status; + bool is_finish = false; if (OB_FAIL(online_())) { - LOG_WARN("fail to enable log", K(ret)); - } else if (OB_FAIL(report_start_replay_clog_lsn_())) { + LOG_WARN("fail to online ls", K(ret), KPC_(ls)); + } else if (OB_FAIL(check_replay_to_target_scn_(ls_restore_arg_->get_consistent_scn(), is_finish))) { + LOG_WARN("failed to check clog replay to consistent scn", K(ret)); + } else if (!is_finish) { + // the ls is created before consistent scn + next_status = ObLSRestoreStatus::Status::RESTORE_TO_CONSISTENT_SCN; + } else { + // the ls is created after consistent scn + next_status = ObLSRestoreStatus::Status::WAIT_RESTORE_TO_CONSISTENT_SCN; + } + + if (FAILEDx(report_start_replay_clog_lsn_())) { LOG_WARN("fail to report start replay clog lsn", K(ret)); } else if (OB_FAIL(advance_status_(*ls_, next_status))) { LOG_WARN("fail to advance status", K(ret), K(*ls_), K(next_status)); diff --git a/src/storage/restore/ob_ls_restore_task_mgr.cpp b/src/storage/restore/ob_ls_restore_task_mgr.cpp index 21f38dccf4..11ab575412 100644 --- a/src/storage/restore/ob_ls_restore_task_mgr.cpp +++ b/src/storage/restore/ob_ls_restore_task_mgr.cpp @@ -840,28 +840,45 @@ int ObLSRestoreTaskMgr::is_tablet_restore_finish_( case ObLSRestoreStatus::QUICK_RESTORE: case ObLSRestoreStatus::WAIT_QUICK_RESTORE: case ObLSRestoreStatus::QUICK_RESTORE_FINISH: { - is_finish = ha_status.is_restore_status_minor_and_major_meta() - || ha_status.is_restore_status_undefined(); - - if (!ha_status.is_restore_status_full()) { - } else if (!tablet_meta.has_transfer_table()) { - is_finish = true; - } else if (OB_FAIL(check_need_discard_transfer_tablet_(tablet_handle, discard))) { - LOG_WARN("failed to check tablet need discard", K(ret), K_(ls_id), K(tablet_meta)); - } else if (discard) { - // uncommitted tablet created by transfer, but log has been recovered. - is_finish = true; + if (ha_status.is_restore_status_undefined()) { + bool is_deleted = true; + // UNDEFINED should be deleted after log has recovered. + if (ls_restore_status.is_quick_restore()) { + is_finish = true; + } else if (OB_FAIL(check_tablet_is_deleted_(tablet_handle, is_deleted))) { + LOG_WARN("failed to check tablet is deleted", K(ret), K_(ls_id), K(tablet_meta)); + } else if (is_deleted) { + is_finish = true; + LOG_INFO("UNDEFINED tablet is deleted", K_(ls_id), K(tablet_meta)); + } else { + is_finish = false; + LOG_INFO("UNDEFINED tablet is not deleted", K_(ls_id), K(tablet_meta)); + } } else { - // FULL tablet with transfer table, need wait the table be replaced. - is_finish = false; + is_finish = ha_status.is_restore_status_minor_and_major_meta(); + if (!ha_status.is_restore_status_full()) { + } else if (!tablet_meta.has_transfer_table()) { + is_finish = true; + } else if (OB_FAIL(check_need_discard_transfer_tablet_(tablet_handle, discard))) { + LOG_WARN("failed to check tablet need discard", K(ret), K_(ls_id), K(tablet_meta)); + } else if (discard) { + // uncommitted tablet created by transfer, but log has been recovered. + is_finish = true; + } else { + // FULL tablet with transfer table, need wait the table be replaced. + is_finish = false; + } } break; } case ObLSRestoreStatus::RESTORE_MAJOR_DATA : case ObLSRestoreStatus::WAIT_RESTORE_MAJOR_DATA : { - is_finish = ha_status.is_restore_status_full() - || ha_status.is_restore_status_undefined(); + is_finish = ha_status.is_restore_status_full(); + if (ls_restore_status.is_restore_major_data()) { + is_finish |= ha_status.is_restore_status_undefined(); + LOG_INFO("skip UNDEFINED tablet, whose major need not to be restored", K_(ls_id), K(tablet_meta)); + } if (!is_finish) { // If tablet is deleted, major is no need to be restored.