fix ret = -4016 when migration and restore concurrent

This commit is contained in:
hamstersox
2023-02-08 20:04:01 +08:00
committed by ob-robot
parent eb6c0e1eac
commit 838cce3c5d
4 changed files with 33 additions and 12 deletions

View File

@ -78,10 +78,11 @@ int ObLSRestoreTaskMgr::check_task_exist_(const share::ObTaskId &task_id,
return OB_SUCCESS;
}
bool global_restored = false;
int ObLSRestoreTaskMgr::check_tablet_deleted_or_restored_(storage::ObLS &ls, const common::ObTabletID &tablet_id, bool &is_deleted, bool &is_restored)
{
is_deleted = false;
is_restored = true;
is_restored = global_restored;
return OB_SUCCESS;
}
@ -176,7 +177,7 @@ TEST_F(TestRestoreTaskMgr, taskMgr)
FakeLS ls(1001);
ObSArray<ObTabletID> tablets_need_to_restore;
ObSArray<ObTabletID> tablets_restored;
ASSERT_EQ(OB_SUCCESS, task_mgr_.pop_need_restore_tablets(tablets_need_to_restore));
ASSERT_EQ(OB_SUCCESS, task_mgr_.pop_need_restore_tablets(ls, tablets_need_to_restore));
ASSERT_EQ(OB_SUCCESS, task_mgr_.pop_restored_tablets(ls, tablets_restored));
ASSERT_EQ(1024, tablets_need_to_restore.count());
ASSERT_EQ(0, tablets_restored.count());
@ -191,6 +192,7 @@ TEST_F(TestRestoreTaskMgr, taskMgr)
ASSERT_EQ(1, task_mgr_.tablet_map_.size());
// pop restored tablets
global_restored = true;
ASSERT_EQ(OB_SUCCESS, task_mgr_.pop_restored_tablets(ls, tablets_restored));
ASSERT_EQ(1024, task_mgr_.wait_tablet_set_.size());
ASSERT_EQ(1024, task_mgr_.schedule_tablet_set_.size());

View File

@ -557,7 +557,7 @@ int ObLSRestoreHandler::check_tablet_restore_finish_(
} else {
switch (ls_restore_status.get_status()) {
case ObLSRestoreStatus::RESTORE_TABLETS_META : {
if (ObTabletRestoreStatus::is_undefined(restore_status) || ObTabletRestoreStatus::is_empty(restore_status)) {
if (!ObTabletRestoreStatus::is_pending(restore_status)) {
is_finish = true;
}
break;
@ -1976,7 +1976,7 @@ int ObLSRestoreCreateUserTabletState::leader_create_user_tablet_()
LOG_INFO("ready to create leader user tablet", K(ls_restore_status_), KPC(ls_));
if (OB_FAIL(tablet_mgr_.pop_restored_tablets(*ls_, restored_tablets))) {
LOG_WARN("fail to pop restored tablets", K(ret), KPC(ls_));
} else if (OB_FAIL(tablet_mgr_.pop_need_restore_tablets(tablet_need_restore))) {
} else if (OB_FAIL(tablet_mgr_.pop_need_restore_tablets(*ls_, tablet_need_restore))) {
LOG_WARN("fail to pop need restore tablets", K(ret), KPC(ls_));
} else if (tablet_need_restore.empty()) {
ObLSRestoreStatus next_status(ObLSRestoreStatus::Status::WAIT_RESTORE_TABLETS_META);
@ -2009,7 +2009,7 @@ int ObLSRestoreCreateUserTabletState::follower_create_user_tablet_()
LOG_INFO("ready to create follower user tablet", K(ls_restore_status_), KPC(ls_));
if (OB_FAIL(tablet_mgr_.pop_restored_tablets(*ls_, restored_tablets))) {
LOG_WARN("fail to pop restored tablets", K(ret), KPC(ls_));
} else if (OB_FAIL(tablet_mgr_.pop_need_restore_tablets(tablet_need_restore))) {
} else if (OB_FAIL(tablet_mgr_.pop_need_restore_tablets(*ls_, tablet_need_restore))) {
LOG_WARN("fail to pop need restore tablets", K(ret), KPC(ls_));
} else if (tablet_need_restore.empty()) {
ObLSRestoreStatus next_status(ObLSRestoreStatus::Status::WAIT_RESTORE_TABLETS_META);
@ -2109,7 +2109,7 @@ int ObLSQuickRestoreState::leader_quick_restore_()
LOG_WARN("log restore handle can't nullptr", K(ret), K(log_restore_handle));
} else if (OB_FAIL(tablet_mgr_.pop_restored_tablets(*ls_, restored_tablets))) {
LOG_WARN("fail to pop restored tablets", K(ret), KPC(ls_));
} else if (OB_FAIL(tablet_mgr_.pop_need_restore_tablets(tablet_need_restore))) {
} else if (OB_FAIL(tablet_mgr_.pop_need_restore_tablets(*ls_, tablet_need_restore))) {
LOG_WARN("fail to pop need restore tablets", K(ret), KPC(ls_));
} else if (tablet_need_restore.empty()) {
bool is_finish = false;
@ -2161,7 +2161,7 @@ int ObLSQuickRestoreState::follower_quick_restore_()
LOG_WARN("log restore handle can't nullptr", K(ret), K(log_restore_handle));
} else if (OB_FAIL(tablet_mgr_.pop_restored_tablets(*ls_, restored_tablets))) {
LOG_WARN("fail to pop restored tablets", K(ret), KPC(ls_));
} else if (OB_FAIL(tablet_mgr_.pop_need_restore_tablets(tablet_need_restore))) {
} else if (OB_FAIL(tablet_mgr_.pop_need_restore_tablets(*ls_, tablet_need_restore))) {
LOG_WARN("fail to pop need restore tablets", K(ret), KPC(ls_));
} else if (tablet_need_restore.empty()) {
ObLSRestoreStatus next_status(ObLSRestoreStatus::Status::WAIT_QUICK_RESTORE);
@ -2396,7 +2396,7 @@ int ObLSRestoreMajorState::leader_restore_major_data_()
LOG_INFO("ready to restore leader major data", K(ls_restore_status_), KPC(ls_));
if (OB_FAIL(tablet_mgr_.pop_restored_tablets(*ls_, restored_tablets))) {
LOG_WARN("fail to pop restored tablets", K(ret), KPC(ls_));
} else if (OB_FAIL(tablet_mgr_.pop_need_restore_tablets(tablet_need_restore))) {
} else if (OB_FAIL(tablet_mgr_.pop_need_restore_tablets(*ls_, tablet_need_restore))) {
LOG_WARN("fail to pop need restore tablets", K(ret), KPC(ls_));
} else if (tablet_need_restore.empty()) {
ObLSRestoreStatus next_status(ObLSRestoreStatus::Status::WAIT_RESTORE_MAJOR_DATA);
@ -2429,7 +2429,7 @@ int ObLSRestoreMajorState::follower_restore_major_data_()
LOG_INFO("ready to restore follower major data", K(ls_restore_status_), KPC(ls_));
if (OB_FAIL(tablet_mgr_.pop_restored_tablets(*ls_, restored_tablets))) {
LOG_WARN("fail to pop restored tablets", K(ret), KPC(ls_));
} else if (OB_FAIL(tablet_mgr_.pop_need_restore_tablets(tablet_need_restore))) {
} else if (OB_FAIL(tablet_mgr_.pop_need_restore_tablets(*ls_, tablet_need_restore))) {
LOG_WARN("fail to pop need restore tablets", K(ret), KPC(ls_));
} else if (tablet_need_restore.empty()) {
ObLSRestoreStatus next_status(ObLSRestoreStatus::Status::WAIT_RESTORE_MAJOR_DATA);

View File

@ -73,10 +73,12 @@ void ObLSRestoreTaskMgr::destroy()
schedule_tablet_set_.destroy();
}
int ObLSRestoreTaskMgr::pop_need_restore_tablets(ObIArray<ObTabletID> &tablet_need_restore)
int ObLSRestoreTaskMgr::pop_need_restore_tablets(
storage::ObLS &ls, ObIArray<ObTabletID> &tablet_need_restore)
{
int ret = OB_SUCCESS;
tablet_need_restore.reset();
ObArray<ObTabletID> need_remove_tablet;
lib::ObMutexGuard guard(mtx_);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
@ -86,7 +88,15 @@ int ObLSRestoreTaskMgr::pop_need_restore_tablets(ObIArray<ObTabletID> &tablet_ne
} else {
TabletSet::iterator iter = wait_tablet_set_.begin();
while (OB_SUCC(ret) && iter != wait_tablet_set_.end()) {
if (OB_FAIL(tablet_need_restore.push_back(iter->first))) {
bool is_deleted = false;
bool is_restored = false;
if (OB_FAIL(check_tablet_deleted_or_restored_(ls, iter->first, is_deleted, is_restored))) {
LOG_WARN("failed to check tablet deleted or restored", K(ret));
} else if (is_deleted || is_restored) {
if (OB_FAIL(need_remove_tablet.push_back(iter->first))) {
LOG_WARN("failed to push back tablet", K(ret));
}
} else if (OB_FAIL(tablet_need_restore.push_back(iter->first))) {
LOG_WARN("fail to push backup tablet", K(ret));
} else if (tablet_need_restore.count() >= OB_LS_RESOTRE_TABLET_DAG_NET_BATCH_NUM) {
break;
@ -94,6 +104,15 @@ int ObLSRestoreTaskMgr::pop_need_restore_tablets(ObIArray<ObTabletID> &tablet_ne
++iter;
}
}
if(!need_remove_tablet.empty()) {
ARRAY_FOREACH(need_remove_tablet, i) {
if (OB_FAIL(wait_tablet_set_.erase_refactored(need_remove_tablet.at(i)))) {
LOG_WARN("failed to erase from set", K(ret));
}
}
LOG_INFO("tablets may be deleted or restored and removed from wait set.", K(ls), K(need_remove_tablet));
}
if (OB_SUCC(ret)) {
LOG_INFO("succeed pop need restore tablets", K(tablet_need_restore));
}

View File

@ -39,7 +39,7 @@ public:
int add_tablet_in_schedule_set(const ObIArray<common::ObTabletID> &tablet_ids);
int schedule_tablet(const share::ObTaskId &task_id, const ObSArray<common::ObTabletID> &tablet_need_restore, bool &reach_dag_limit);
int pop_need_restore_tablets(ObIArray<common::ObTabletID> &need_restore_tablets);
int pop_need_restore_tablets(storage::ObLS &ls, ObIArray<common::ObTabletID> &need_restore_tablets);
int pop_restored_tablets(storage::ObLS &ls, ObIArray<common::ObTabletID> &tablet_send_to_follower);
int cancel_task();