[CP] fix some tablets have not restored after tenant has restored successfully

This commit is contained in:
wxhwang
2023-11-17 06:45:20 +00:00
committed by ob-robot
parent b4f14d7926
commit f5b64d55eb
2 changed files with 47 additions and 20 deletions

View File

@ -1516,7 +1516,7 @@ int ObLSRestoreStartState::do_restore()
LOG_WARN("fail to check ls created", K(ret), KPC(ls_));
} else if (!is_created) {
if (OB_FAIL(do_with_uncreated_ls_())) {
LOG_WARN("fail to do with uncreadted ls", K(ret), KPC(ls_));
LOG_WARN("fail to do with uncreated ls", K(ret), KPC(ls_));
}
} else if (OB_FAIL(check_ls_leader_ready_(is_ready))) {
LOG_WARN("fail to check is ls leader ready", K(ret), KPC(ls_));
@ -1573,11 +1573,21 @@ int ObLSRestoreStartState::do_with_no_ls_meta_()
int ret = OB_SUCCESS;
// ls with no ls meta means it created after backup ls_attr_infos.
// this ls doesn't have ls meta and tablet in backup, it only needs to replay clog.
// so just advance to restore to consistent_scn and start replay clog.
ObLSRestoreStatus next_status(ObLSRestoreStatus::Status::RESTORE_TO_CONSISTENT_SCN);
ObLSRestoreStatus next_status;
bool is_finish = false;
if (OB_FAIL(online_())) {
LOG_WARN("fail to enable log", K(ret));
} else if (OB_FAIL(report_start_replay_clog_lsn_())) {
LOG_WARN("fail to online ls", K(ret), KPC_(ls));
} else if (OB_FAIL(check_replay_to_target_scn_(ls_restore_arg_->get_consistent_scn(), is_finish))) {
LOG_WARN("failed to check clog replay to consistent scn", K(ret));
} else if (!is_finish) {
// the ls is created before consistent scn
next_status = ObLSRestoreStatus::Status::RESTORE_TO_CONSISTENT_SCN;
} else {
// the ls is created after consistent scn
next_status = ObLSRestoreStatus::Status::WAIT_RESTORE_TO_CONSISTENT_SCN;
}
if (FAILEDx(report_start_replay_clog_lsn_())) {
LOG_WARN("fail to report start replay clog lsn", K(ret));
} else if (OB_FAIL(advance_status_(*ls_, next_status))) {
LOG_WARN("fail to advance status", K(ret), K(*ls_), K(next_status));

View File

@ -840,9 +840,22 @@ int ObLSRestoreTaskMgr::is_tablet_restore_finish_(
case ObLSRestoreStatus::QUICK_RESTORE:
case ObLSRestoreStatus::WAIT_QUICK_RESTORE:
case ObLSRestoreStatus::QUICK_RESTORE_FINISH: {
is_finish = ha_status.is_restore_status_minor_and_major_meta()
|| ha_status.is_restore_status_undefined();
if (ha_status.is_restore_status_undefined()) {
bool is_deleted = true;
// UNDEFINED should be deleted after log has recovered.
if (ls_restore_status.is_quick_restore()) {
is_finish = true;
} else if (OB_FAIL(check_tablet_is_deleted_(tablet_handle, is_deleted))) {
LOG_WARN("failed to check tablet is deleted", K(ret), K_(ls_id), K(tablet_meta));
} else if (is_deleted) {
is_finish = true;
LOG_INFO("UNDEFINED tablet is deleted", K_(ls_id), K(tablet_meta));
} else {
is_finish = false;
LOG_INFO("UNDEFINED tablet is not deleted", K_(ls_id), K(tablet_meta));
}
} else {
is_finish = ha_status.is_restore_status_minor_and_major_meta();
if (!ha_status.is_restore_status_full()) {
} else if (!tablet_meta.has_transfer_table()) {
is_finish = true;
@ -855,13 +868,17 @@ int ObLSRestoreTaskMgr::is_tablet_restore_finish_(
// FULL tablet with transfer table, need wait the table be replaced.
is_finish = false;
}
}
break;
}
case ObLSRestoreStatus::RESTORE_MAJOR_DATA :
case ObLSRestoreStatus::WAIT_RESTORE_MAJOR_DATA : {
is_finish = ha_status.is_restore_status_full()
|| ha_status.is_restore_status_undefined();
is_finish = ha_status.is_restore_status_full();
if (ls_restore_status.is_restore_major_data()) {
is_finish |= ha_status.is_restore_status_undefined();
LOG_INFO("skip UNDEFINED tablet, whose major need not to be restored", K_(ls_id), K(tablet_meta));
}
if (!is_finish) {
// If tablet is deleted, major is no need to be restored.