diff --git a/src/share/ob_debug_sync_point.h b/src/share/ob_debug_sync_point.h index 89a0b2a0f2..d09f9cc2ad 100755 --- a/src/share/ob_debug_sync_point.h +++ b/src/share/ob_debug_sync_point.h @@ -544,6 +544,7 @@ class ObString; ACT(BEFORE_TRANSFER_DOING,)\ ACT(BEFORE_BUILD_LS_MIGRATION_DAG_NET,)\ ACT(AFTER_JOIN_LEARNER_LIST,)\ + ACT(BEFORE_TRANSFER_START_COMMIT,)\ ACT(MAX_DEBUG_SYNC_POINT,) DECLARE_ENUM(ObDebugSyncPoint, debug_sync_point, OB_DEBUG_SYNC_POINT_DEF); diff --git a/src/storage/high_availability/ob_tablet_group_restore.cpp b/src/storage/high_availability/ob_tablet_group_restore.cpp index 82ecf11a0f..0960efe123 100644 --- a/src/storage/high_availability/ob_tablet_group_restore.cpp +++ b/src/storage/high_availability/ob_tablet_group_restore.cpp @@ -2728,8 +2728,6 @@ int ObTabletFinishRestoreTask::process() } } else if (OB_FAIL(update_restore_status_())) { LOG_WARN("failed to update restore status", K(ret), KPC(tablet_restore_ctx_)); - } else if (OB_FAIL(check_tablet_valid_())) { - LOG_WARN("failed to check tablet valid", K(ret), KPC(tablet_restore_ctx_)); } if (OB_SUCCESS != (tmp_ret = record_server_event_())) { @@ -2802,25 +2800,6 @@ int ObTabletFinishRestoreTask::update_restore_status_() return ret; } -int ObTabletFinishRestoreTask::check_tablet_valid_() -{ - int ret = OB_SUCCESS; - ObTabletHandle tablet_handle; - ObTablet *tablet = nullptr; - if (!is_inited_) { - ret = OB_NOT_INIT; - LOG_WARN("tablet finish restore task do not init", K(ret)); - } else if (OB_FAIL(ls_->ha_get_tablet(tablet_restore_ctx_->tablet_id_, tablet_handle))) { - LOG_WARN("failed to get tablet", K(ret), KPC(tablet_restore_ctx_)); - } else if (OB_ISNULL(tablet = tablet_handle.get_obj())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("tablet should not be NULL", K(ret), KP(tablet), KPC(tablet_restore_ctx_)); - } else if (OB_FAIL(tablet->check_valid())) { - LOG_WARN("failed to check valid", K(ret), KPC(tablet)); - } - return ret; -} - int ObTabletFinishRestoreTask::record_server_event_() { int ret = OB_SUCCESS; diff --git a/src/storage/high_availability/ob_tablet_group_restore.h b/src/storage/high_availability/ob_tablet_group_restore.h index b340f82240..80b9167d18 100644 --- a/src/storage/high_availability/ob_tablet_group_restore.h +++ b/src/storage/high_availability/ob_tablet_group_restore.h @@ -452,7 +452,6 @@ public: VIRTUAL_TO_STRING_KV(K("ObTabletRestoreTask"), KP(this), KPC(ha_dag_net_ctx_), KPC(tablet_restore_ctx_)); private: int update_restore_status_(); - int check_tablet_valid_(); int record_server_event_(); private: diff --git a/src/storage/high_availability/ob_transfer_handler.cpp b/src/storage/high_availability/ob_transfer_handler.cpp index 1a587e6c09..4f070c8c65 100644 --- a/src/storage/high_availability/ob_transfer_handler.cpp +++ b/src/storage/high_availability/ob_transfer_handler.cpp @@ -465,6 +465,8 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta LOG_WARN("failed to reset timeout for trans", K(ret)); } else if (OB_FAIL(do_trans_transfer_start_(task_info, timeout_ctx, trans))) { LOG_WARN("failed to do trans transfer start", K(ret), K(task_info)); + } else { + DEBUG_SYNC(BEFORE_TRANSFER_START_COMMIT); } if (OB_TMP_FAIL(commit_trans_(ret, trans))) { LOG_WARN("failed to commit trans", K(tmp_ret), K(ret)); diff --git a/src/storage/ls/ob_ls_tablet_service.cpp b/src/storage/ls/ob_ls_tablet_service.cpp index 7c71c9be2c..211a125f73 100755 --- a/src/storage/ls/ob_ls_tablet_service.cpp +++ b/src/storage/ls/ob_ls_tablet_service.cpp @@ -1614,7 +1614,10 @@ int ObLSTabletService::update_tablet_restore_status( } else if (restore_status == ObTabletRestoreStatus::UNDEFINED && OB_FALSE_IT((void)tablet->tablet_meta_.reset_transfer_table())) { } else { - if (OB_FAIL(ObTabletPersister::persist_and_transform_tablet(*tablet, new_tablet_handle))) { + // TODO(jiahua.cjh) move check valid to tablet init after generate new version tablet. + if (OB_FAIL(tablet->check_valid())) { + LOG_WARN("failed to check tablet valid", K(ret), K(restore_status), KPC(tablet)); + } else if (OB_FAIL(ObTabletPersister::persist_and_transform_tablet(*tablet, new_tablet_handle))) { LOG_WARN("fail to persist and transform tablet", K(ret), KPC(tablet), K(new_tablet_handle)); } else if (FALSE_IT(time_guard.click("Persist"))) { } else if (FALSE_IT(disk_addr = new_tablet_handle.get_obj()->tablet_addr_)) { diff --git a/src/storage/restore/ob_ls_restore_handler.cpp b/src/storage/restore/ob_ls_restore_handler.cpp index 17a337bc2f..0d50d277f4 100644 --- a/src/storage/restore/ob_ls_restore_handler.cpp +++ b/src/storage/restore/ob_ls_restore_handler.cpp @@ -2044,6 +2044,7 @@ int ObLSRestoreConsistentScnState::set_empty_for_transfer_tablets_() ObTabletHandle tablet_handle; ObTablet *tablet = nullptr; ObTabletCreateDeleteMdsUserData user_data; + bool is_commited = false; if (OB_FAIL(iterator.get_next_tablet(tablet_handle))) { if (OB_ITER_END == ret) { ret = OB_SUCCESS; @@ -2058,13 +2059,10 @@ int ObLSRestoreConsistentScnState::set_empty_for_transfer_tablets_() } else if (tablet->is_empty_shell()) { LOG_INFO("skip empty shell", "tablet_id", tablet->get_tablet_meta().tablet_id_); } else if (!tablet->get_tablet_meta().has_transfer_table()) { - } else if (OB_FAIL(ObTXTransferUtils::get_tablet_status(true/*get_commit*/, tablet, user_data))) { - if (OB_EMPTY_RESULT == ret) { - LOG_INFO("skip tablet which transfer in not commit", KPC(tablet)); - ret = OB_SUCCESS; - } else { - LOG_WARN("failed to get tablet status", K(ret), KPC(tablet)); - } + } else if (OB_FAIL(tablet->get_latest_tablet_status(user_data, is_commited))) { + LOG_WARN("failed to get tablet status", K(ret), KPC(tablet)); + } else if (!is_commited && ObTabletStatus::TRANSFER_IN == user_data.tablet_status_.get_status()) { + LOG_INFO("skip tablet which transfer in not commit", "tablet_id", tablet->get_tablet_meta().tablet_id_, K(user_data)); } else if (!tablet->get_tablet_meta().ha_status_.is_restore_status_full()) { LOG_INFO("skip tablet which restore status is not full", "tablet_id", tablet->get_tablet_meta().tablet_id_, @@ -2450,6 +2448,9 @@ int ObLSRestoreMajorState::leader_restore_major_data_() LOG_WARN("fail to do restore major", K(ret), K(tablet_need_restore), KPC(ls_)); } +#if 0 + // TODO(wangxiaohui.wxh): 4.3, let leader restore from backup and follower restore from leader. + int tmp_ret = OB_SUCCESS; // try rpc's best if (restored_tablets.empty()) { } else if (OB_SUCCESS != (tmp_ret = notify_follower_restore_tablet_(restored_tablets))) { @@ -2457,6 +2458,8 @@ int ObLSRestoreMajorState::leader_restore_major_data_() } else { LOG_INFO("success send tablets to follower for restore", K(restored_tablets)); } +#endif + return ret; } @@ -2493,11 +2496,21 @@ int ObLSRestoreMajorState::do_restore_major_( ObTabletGroupRestoreArg arg; bool reach_dag_limit = false; bool is_new_election = false; + // No matter is leader or follower, always restore data from backup. + if (OB_FAIL(leader_fill_tablet_group_restore_arg_(tablet_need_restore.get_tablet_list(), tablet_need_restore.action(), arg))) { + LOG_WARN("fail to fill leader ls restore arg", K(ret)); + } + +#if 0 + // TODO(wangxiaohui.wxh): 4.3, let leader restore from backup and follower restore from leader. if (!is_follower(role_) && OB_FAIL(leader_fill_tablet_group_restore_arg_(tablet_need_restore.get_tablet_list(), tablet_need_restore.action(), arg))) { LOG_WARN("fail to fill ls restore arg", K(ret)); } else if (is_follower(role_) && OB_FAIL(follower_fill_tablet_group_restore_arg_(tablet_need_restore.get_tablet_list(), tablet_need_restore.action(), arg))) { LOG_WARN("fail to fill ls restore arg", K(ret)); - } else if (OB_FAIL(check_new_election_(is_new_election))) { + } +#endif + + if (FAILEDx(check_new_election_(is_new_election))) { LOG_WARN("fail to check change role", K(ret)); } else if (is_new_election) { ret = OB_EAGAIN; diff --git a/src/storage/restore/ob_ls_restore_handler.h b/src/storage/restore/ob_ls_restore_handler.h index f0985853d8..fc72244489 100644 --- a/src/storage/restore/ob_ls_restore_handler.h +++ b/src/storage/restore/ob_ls_restore_handler.h @@ -137,11 +137,12 @@ public: int check_leader_restore_finish(bool &finish); storage::ObLS *get_ls() const { return ls_; } - // Check if log has been recovered to consistent_scn. - virtual int check_recover_to_consistent_scn_finish(bool &is_finish) const { return false; } - // Check if log has been recovered to restore_scn. - virtual int check_recover_finish(bool &is_finish) const { return false; } + virtual int check_recover_finish(bool &is_finish) const + { + is_finish = false; + return OB_SUCCESS; + } TO_STRING_KV(K_(*ls), K_(ls_restore_status)); protected: @@ -274,7 +275,7 @@ public: virtual int do_restore() override; // Check if log has recovered to consistent_scn. - virtual int check_recover_to_consistent_scn_finish(bool &is_finish) const override; + int check_recover_to_consistent_scn_finish(bool &is_finish) const; private: // Set restore status to EMPTY for those committed tablets whose restore status is FULL, @@ -293,9 +294,6 @@ public: virtual ~ObLSQuickRestoreState(); virtual int do_restore() override; - // Check if log has been recovered to consistent_scn. - virtual int check_recover_to_consistent_scn_finish(bool &is_finish) const override { return true; } - // Check if log has been recovered to restore_scn. virtual int check_recover_finish(bool &is_finish) const override; @@ -318,11 +316,12 @@ public: virtual ~ObLSQuickRestoreFinishState(); virtual int do_restore() override; - // Check if log has been recovered to consistent_scn. - virtual int check_recover_to_consistent_scn_finish(bool &is_finish) const override { return true; } - // Check if log has been recovered to restore_scn. - virtual int check_recover_finish(bool &is_finish) const override { return true; } + virtual int check_recover_finish(bool &is_finish) const override + { + is_finish = true; + return OB_SUCCESS; + } private: int leader_quick_restore_finish_(); int follower_quick_restore_finish_(); @@ -335,11 +334,13 @@ public: ObLSRestoreMajorState(); virtual ~ObLSRestoreMajorState(); virtual int do_restore() override; - // Check if log has been recovered to consistent_scn. - virtual int check_recover_to_consistent_scn_finish(bool &is_finish) const override { return true; } // Check if log has been recovered to restore_scn. - virtual int check_recover_finish(bool &is_finish) const override { return true; } + virtual int check_recover_finish(bool &is_finish) const override + { + is_finish = true; + return OB_SUCCESS; + } private: int leader_restore_major_data_(); int follower_restore_major_data_(); @@ -354,11 +355,13 @@ class ObLSRestoreFinishState final : public ObILSRestoreState ObLSRestoreFinishState(); virtual ~ObLSRestoreFinishState(); virtual int do_restore() override; - // Check if log has been recovered to consistent_scn. - virtual int check_recover_to_consistent_scn_finish(bool &is_finish) const override { return true; } // Check if log has been recovered to restore_scn. - virtual int check_recover_finish(bool &is_finish) const override { return true; } + virtual int check_recover_finish(bool &is_finish) const override + { + is_finish = true; + return OB_SUCCESS; + } private: int restore_finish_(); DISALLOW_COPY_AND_ASSIGN(ObLSRestoreFinishState); @@ -413,11 +416,12 @@ public: : ObLSRestoreWaitState(ObLSRestoreStatus::Status::WAIT_RESTORE_TO_CONSISTENT_SCN) {} virtual ~ObLSWaitRestoreConsistentScnState() {} - // Check if log has been recovered to consistent_scn. - virtual int check_recover_to_consistent_scn_finish(bool &is_finish) const override { return true; } - // Check if log has been recovered to restore_scn. - virtual int check_recover_finish(bool &is_finish) const override { return false; } + virtual int check_recover_finish(bool &is_finish) const override + { + is_finish = false; + return OB_SUCCESS; + } protected: int check_can_advance_status_(bool &can) const override; @@ -434,11 +438,12 @@ public: : ObLSRestoreWaitState(share::ObLSRestoreStatus::Status::WAIT_QUICK_RESTORE) {} virtual ~ObLSRestoreWaitQuickRestoreState() {} - // Check if log has been recovered to consistent_scn. - virtual int check_recover_to_consistent_scn_finish(bool &is_finish) const override { return true; } - // Check if log has been recovered to restore_scn. - virtual int check_recover_finish(bool &is_finish) const override { return true; } + virtual int check_recover_finish(bool &is_finish) const override + { + is_finish = true; + return OB_SUCCESS; + } private: DISALLOW_COPY_AND_ASSIGN(ObLSRestoreWaitQuickRestoreState); }; @@ -450,11 +455,12 @@ public: : ObLSRestoreWaitState(share::ObLSRestoreStatus::Status::WAIT_RESTORE_MAJOR_DATA) {} virtual ~ObLSRestoreWaitRestoreMajorDataState() {} - // Check if log has been recovered to consistent_scn. - virtual int check_recover_to_consistent_scn_finish(bool &is_finish) const override { return true; } - // Check if log has been recovered to restore_scn. - virtual int check_recover_finish(bool &is_finish) const override { return true; } + virtual int check_recover_finish(bool &is_finish) const override + { + is_finish = true; + return OB_SUCCESS; + } private: DISALLOW_COPY_AND_ASSIGN(ObLSRestoreWaitRestoreMajorDataState); }; diff --git a/src/storage/restore/ob_ls_restore_task_mgr.cpp b/src/storage/restore/ob_ls_restore_task_mgr.cpp index fd9247c819..eb7836b898 100644 --- a/src/storage/restore/ob_ls_restore_task_mgr.cpp +++ b/src/storage/restore/ob_ls_restore_task_mgr.cpp @@ -667,9 +667,10 @@ int ObLSRestoreTaskMgr::check_need_reload_tablets_(bool &reload) } else if (final_reload_) { LOG_DEBUG("final reload is set, need not reload", K_(ls_id), K(ls_restore_status), "is_follower", is_follower_()); } else if (is_follower_()) { - // follower can reload tablets only if leader has been restored except at QUICK_RESTORE. + // follower can reload tablets only if leader has been restored except at QUICK_RESTORE or RESTORE_MAJOR. bool finish = true; if (!ls_restore_status.is_quick_restore() + && !ls_restore_status.is_restore_major_data() && OB_FAIL(restore_state_handler_->check_leader_restore_finish(finish))) { LOG_WARN("fail to check leader restore finish", K(ret), KPC_(restore_state_handler)); } else if (!finish) { @@ -723,6 +724,7 @@ int ObLSRestoreTaskMgr::check_tablet_need_discard_when_reload_( discard = true; LOG_DEBUG("skip restored tablet", K(tablet_id), K(ls_restore_status), "ha_status", tablet_meta.ha_status_); } else if (ls_restore_status.is_quick_restore()) { + } else if (ls_restore_status.is_restore_major_data()) { } else if (is_follower && !has_checked_leader_done_) { // The follower does not load tablets to restore before leader has been restored. discard = true;