From 1b6966fef1db15be97c974bb9a6b0d50669b6aeb Mon Sep 17 00:00:00 2001 From: oceanoverflow Date: Tue, 19 Dec 2023 20:48:09 +0000 Subject: [PATCH] fix rs schedule backup state machine --- .../backup/ob_backup_data_scheduler.cpp | 12 +- .../backup/ob_backup_data_set_task_mgr.cpp | 117 ++++++++++++------ .../backup/ob_backup_data_set_task_mgr.h | 5 +- .../backup/ob_backup_schedule_task.cpp | 36 ++++++ .../backup/ob_backup_schedule_task.h | 12 ++ src/share/backup/ob_backup_struct.cpp | 4 + src/share/backup/ob_backup_struct.h | 18 +-- src/share/ob_debug_sync_point.h | 1 + 8 files changed, 160 insertions(+), 45 deletions(-) diff --git a/src/rootserver/backup/ob_backup_data_scheduler.cpp b/src/rootserver/backup/ob_backup_data_scheduler.cpp index c3bb86f853..33e235bbdd 100644 --- a/src/rootserver/backup/ob_backup_data_scheduler.cpp +++ b/src/rootserver/backup/ob_backup_data_scheduler.cpp @@ -141,7 +141,9 @@ int ObBackupDataScheduler::do_get_need_reload_task_( ObBackupLSTaskAttr &ls_task = ls_tasks.at(i); ObBackupScheduleTask *task = nullptr; bool is_dropped = false; - if (!(job.plus_archivelog_ && set_task_attr.status_.is_backup_log()) + if (ObBackupTaskStatus::Status::FINISH == ls_task.status_.status_ && OB_SUCCESS == ls_task.result_) { + // do nothing + } else if (!(job.plus_archivelog_ && set_task_attr.status_.is_backup_log()) && OB_FAIL(ObBackupDataLSTaskMgr::check_ls_is_dropped(ls_task, *sql_proxy_, is_dropped))) { LOG_WARN("failed to check ls is dropped", K(ret), K(ls_task)); } else if (is_dropped) { @@ -216,6 +218,14 @@ int ObBackupDataScheduler::build_task_( } break; } + case ObBackupDataTaskType::Type::BACKUP_META_FINISH: { + HEAP_VAR(ObBackupDataLSMetaFinishTask, tmp_task) { + if (OB_FAIL(do_build_task_(job, set_task_attr, ls_task, allocator, tmp_task, task))) { + LOG_WARN("[DATA_BACKUP]failed to do build task", K(ret), K(job), K(ls_task)); + } + } + break; + } default: break; } diff --git a/src/rootserver/backup/ob_backup_data_set_task_mgr.cpp b/src/rootserver/backup/ob_backup_data_set_task_mgr.cpp index 547ae7d951..f341ba8625 100644 --- a/src/rootserver/backup/ob_backup_data_set_task_mgr.cpp +++ b/src/rootserver/backup/ob_backup_data_set_task_mgr.cpp @@ -117,7 +117,15 @@ int ObBackupSetTaskMgr::advance_status_( LOG_WARN("[DATA_BACKUP]failed to check leader", K(ret)); } else if (OB_FAIL(ObBackupTaskOperator::advance_task_status(trans, set_task_attr_, next_status, result, scn, end_ts))) { LOG_WARN("[DATA_BACKUP]failed to advance set status", K(ret), K(set_task_attr_), K(next_status)); - } + } else { + ROOTSERVICE_EVENT_ADD("backup_data", "advance_status", + "tenant_id", job_attr_->tenant_id_, + "job_id", job_attr_->job_id_, + "backup_set_id", job_attr_->backup_set_id_, + "curr_status", set_task_attr_.status_.get_str(), + "next_status", next_status.get_str(), + "result", result); + } return ret; } @@ -150,6 +158,12 @@ int ObBackupSetTaskMgr::process() } break; } + case ObBackupStatus::Status::BACKUP_META_FINISH: { + if (OB_FAIL(backup_meta_finish_())) { + LOG_WARN("[DATA_BACKUP]failed to backup meta finish", K(ret), K(set_task_attr_)); + } + break; + } case ObBackupStatus::Status::BACKUP_DATA_MINOR: case ObBackupStatus::Status::BACKUP_DATA_MAJOR: { if (OB_FAIL(backup_data_())) { @@ -508,22 +522,17 @@ int ObBackupSetTaskMgr::backup_user_meta_() if (OB_FAIL(change_meta_turn_(sys_ls_task))) { LOG_WARN("failed to change meta turn", K(ret)); } - } else if (OB_FAIL(calc_consistent_scn_(ls_task, consistent_scn))) { - LOG_WARN("failed to calc consistent scn", K(ret), K(ls_task)); - } else if (OB_FAIL(merge_ls_meta_infos_(ls_task))) { - LOG_WARN("fail to merge ls meta infos", K(ret), K(ls_task)); - } else if (OB_FAIL(merge_tablet_to_ls_info_(consistent_scn, ls_task))) { - LOG_WARN("[DATA_BACKUP]failed to merge tablet to ls info", K(ret), K(ls_task)); - } else if (OB_FALSE_IT(DEBUG_SYNC(BEFORE_BACKUP_DATA))) { } else if (OB_FAIL(trans_.start(sql_proxy_, meta_tenant_id_))) { LOG_WARN("fail to start trans", K(ret), K(meta_tenant_id_)); } else { - ObBackupStatus next_status = ObBackupStatus::BACKUP_DATA_MINOR; + ObBackupStatus next_status = ObBackupStatus::BACKUP_META_FINISH; if (OB_FAIL(convert_task_type_(ls_task))) { LOG_WARN("[DATA_BACKUP]fail to update task type to backup data", K(ret)); } else if (OB_FAIL(advance_status_(trans_, next_status))) { LOG_WARN("[DATA_BACKUP]failed to advance status to BACKUP_DATA_MINOR", K(ret), K(next_status)); - } + } else { + ROOTSERVICE_EVENT_ADD("backup_data", "after_backup_consistent_scn"); + } if (OB_SUCC(ret)) { if (OB_FAIL(trans_.end(true))) { @@ -545,6 +554,58 @@ int ObBackupSetTaskMgr::backup_user_meta_() return ret; } +int ObBackupSetTaskMgr::backup_meta_finish_() +{ + int ret = OB_SUCCESS; + ObArray ls_task; + ObArray ls_ids; + share::SCN consistent_scn; + + DEBUG_SYNC(BEFORE_BACKUP_META_FINISH); + + if (OB_FAIL(ObBackupLSTaskOperator::get_ls_tasks(*sql_proxy_, job_attr_->job_id_, job_attr_->tenant_id_, false/*update*/, ls_task))) { + LOG_WARN("[DATA_BACKUP]failed to get log stream tasks", K(ret), "job_id", job_attr_->job_id_, "tenant_id", job_attr_->tenant_id_); + } else if (ls_task.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("[DATA_BACKUP]no logstream task", K(ret), "job_id", job_attr_->job_id_, "tenant_id", job_attr_->tenant_id_); + } else if (OB_FAIL(calc_consistent_scn_(ls_task, consistent_scn))) { + LOG_WARN("failed to calc consistent scn", K(ret), K(ls_task)); + } else if (OB_FAIL(merge_ls_meta_infos_(ls_task))) { + LOG_WARN("fail to merge ls meta infos", K(ret), K(ls_task)); + } else if (OB_FAIL(merge_tablet_to_ls_info_(consistent_scn, ls_task, ls_ids))) { + LOG_WARN("[DATA_BACKUP]failed to merge tablet to ls info", K(ret), K(ls_task)); + } else if (OB_FALSE_IT(DEBUG_SYNC(BEFORE_BACKUP_DATA))) { + } else if (OB_FAIL(trans_.start(sql_proxy_, meta_tenant_id_))) { + LOG_WARN("fail to start trans", K(ret), K(meta_tenant_id_)); + } else { + ObBackupStatus next_status = ObBackupStatus::BACKUP_DATA_MINOR; + share::ObBackupDataTaskType type(share::ObBackupDataTaskType::Type::BACKUP_DATA_MINOR); + if (OB_FAIL(convert_task_type_(ls_task))) { + LOG_WARN("[DATA_BACKUP]fail to update task type to backup data", K(ret)); + } else if (OB_FAIL(advance_status_(trans_, next_status))) { + LOG_WARN("[DATA_BACKUP]failed to advance status to BACKUP_DATA_MINOR", K(ret), K(next_status)); + } else if (OB_FAIL(generate_ls_tasks_(ls_ids, type))) { + LOG_WARN("failed to generate ls tasks", K(ret), K(ls_ids), K(type)); + } else { + ROOTSERVICE_EVENT_ADD("backup_data", "after_backup_consistent_scn"); + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(trans_.end(true))) { + LOG_WARN("failed to commit trans", KR(ret)); + } else { + backup_service_->wakeup(); + } + } else { + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = trans_.end(false))) { + LOG_WARN("failed to rollback", KR(ret), K(tmp_ret)); + } + } + } + return ret; +} + int ObBackupSetTaskMgr::calc_consistent_scn_(ObIArray &ls_tasks, share::SCN &consistent_scn) { int ret = OB_SUCCESS; @@ -675,12 +736,13 @@ int ObBackupSetTaskMgr::merge_ls_meta_infos_( return ret; } -int ObBackupSetTaskMgr::merge_tablet_to_ls_info_(const share::SCN &consistent_scn, const ObIArray &ls_tasks) +int ObBackupSetTaskMgr::merge_tablet_to_ls_info_(const share::SCN &consistent_scn, + const ObIArray &ls_tasks, common::ObIArray &ls_ids) { int ret = OB_SUCCESS; + ls_ids.reset(); ObHashMap> latest_ls_tablet_map; ObHashMap backup_ls_map; // the ls task persisted in __all_backup_ls_task - ObArray ls_ids; const int64_t OB_BACKUP_MAX_LS_BUCKET = 1024; SCN max_backup_scn; if (ls_tasks.empty() || !consistent_scn.is_valid()) { @@ -738,29 +800,6 @@ int ObBackupSetTaskMgr::merge_tablet_to_ls_info_(const share::SCN &consistent_sc } } } - share::ObBackupDataTaskType type(share::ObBackupDataTaskType::Type::BACKUP_DATA_MINOR); - if (OB_FAIL(ret)) { - } else if (OB_FAIL(trans_.start(sql_proxy_, meta_tenant_id_))) { - LOG_WARN("fail to start trans", K(ret), K(meta_tenant_id_)); - } else if (OB_FAIL(generate_ls_tasks_(ls_ids, type))) { - LOG_WARN("failed to generate ls tasks", K(ret), K(ls_ids), K(type)); - } else { - ROOTSERVICE_EVENT_ADD("backup_data", - "after_backup_consistent_scn", - "tenant_id", - job_attr_->tenant_id_, - "job_id", - job_attr_->job_id_, - "task_id", - set_task_attr_.task_id_); - } - if (trans_.is_started()) { - int tmp_ret = OB_SUCCESS; - if (OB_TMP_FAIL(trans_.end(OB_SUCC(ret)))) { - ret = OB_SUCC(ret) ? tmp_ret : ret; - LOG_WARN("failed to end trans", K(ret), K(tmp_ret)); - } - } return ret; } @@ -977,6 +1016,10 @@ int ObBackupSetTaskMgr::get_next_status_(const share::ObBackupStatus &cur_status break; } case ObBackupStatus::Status::BACKUP_USER_META: { + next_status = ObBackupStatus::Status::BACKUP_META_FINISH; + break; + } + case ObBackupStatus::Status::BACKUP_META_FINISH: { next_status = ObBackupStatus::Status::BACKUP_DATA_MINOR; break; } @@ -1525,6 +1568,10 @@ int ObBackupSetTaskMgr::convert_task_type_(const ObIArray &l ObBackupDataTaskType type; switch(set_task_attr_.status_.status_) { case ObBackupStatus::Status::BACKUP_USER_META: { + type.type_ = ObBackupDataTaskType::Type::BACKUP_META_FINISH; + break; + } + case ObBackupStatus::Status::BACKUP_META_FINISH: { type.type_ = ObBackupDataTaskType::Type::BACKUP_DATA_MINOR; break; } diff --git a/src/rootserver/backup/ob_backup_data_set_task_mgr.h b/src/rootserver/backup/ob_backup_data_set_task_mgr.h index 3efd953146..6f3bb83526 100644 --- a/src/rootserver/backup/ob_backup_data_set_task_mgr.h +++ b/src/rootserver/backup/ob_backup_data_set_task_mgr.h @@ -57,11 +57,14 @@ private: int backup_sys_meta_(); int do_backup_meta_(ObIArray &ls_task, int64_t &finish_cnt); int backup_user_meta_(); + int backup_meta_finish_(); int calc_consistent_scn_(ObIArray &ls_tasks, share::SCN &consistent_scn); int check_need_change_meta_turn_(ObIArray &ls_tasks, bool &need_change_turn); int change_meta_turn_(const share::ObBackupLSTaskAttr &sys_ls_task); int get_backup_user_meta_task_(ObIArray &ls_task); - int merge_tablet_to_ls_info_(const share::SCN &consistent_scn, const ObIArray &ls_tasks); + int merge_tablet_to_ls_info_(const share::SCN &consistent_scn, + const ObIArray &ls_tasks, + common::ObIArray &ls_ids); int get_tablet_list_by_snapshot( const share::SCN &consistent_scn, common::hash::ObHashMap> &latest_ls_tablet_map); int fill_map_with_sys_tablets_(common::hash::ObHashMap> &latest_ls_tablet_map); diff --git a/src/rootserver/backup/ob_backup_schedule_task.cpp b/src/rootserver/backup/ob_backup_schedule_task.cpp index 95cdb5f897..37a49315ab 100644 --- a/src/rootserver/backup/ob_backup_schedule_task.cpp +++ b/src/rootserver/backup/ob_backup_schedule_task.cpp @@ -971,5 +971,41 @@ int ObBackupDataLSMetaTask::execute(obrpc::ObSrvRpcProxy &rpc_proxy) const return ret; } +int ObBackupDataLSMetaFinishTask::clone(void *input_ptr, ObBackupScheduleTask *&out_task) const +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(input_ptr)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(input_ptr)); + } else { + ObBackupDataLSMetaFinishTask *my_task = new (input_ptr) ObBackupDataLSMetaFinishTask(); + if (OB_ISNULL(my_task)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("taks is nullptr", K(ret)); + } else if (OB_FAIL(my_task->ObBackupDataBaseTask::deep_copy(*this))) { + LOG_WARN("fail to deep copy base task", K(ret)); + } + if (OB_SUCC(ret)) { + out_task = my_task; + } else if (OB_NOT_NULL(my_task)) { + my_task->~ObBackupDataLSMetaFinishTask(); + my_task = nullptr; + } + } + return ret; +} + +int64_t ObBackupDataLSMetaFinishTask::get_deep_copy_size() const +{ + return sizeof(ObBackupDataLSMetaFinishTask); +} + +int ObBackupDataLSMetaFinishTask::execute(obrpc::ObSrvRpcProxy &rpc_proxy) const +{ + int ret = OB_SUCCESS; + UNUSED(rpc_proxy); + return ret; +} + } // namespace rootserver } // namespace oceanbase diff --git a/src/rootserver/backup/ob_backup_schedule_task.h b/src/rootserver/backup/ob_backup_schedule_task.h index 00d50366e4..684eabe105 100644 --- a/src/rootserver/backup/ob_backup_schedule_task.h +++ b/src/rootserver/backup/ob_backup_schedule_task.h @@ -350,6 +350,18 @@ private: DISALLOW_COPY_AND_ASSIGN(ObBackupDataLSMetaTask); }; +class ObBackupDataLSMetaFinishTask final : public ObBackupDataLSTask +{ +public: + ObBackupDataLSMetaFinishTask() {} + virtual ~ObBackupDataLSMetaFinishTask() {} + virtual int clone(void *input_ptr, ObBackupScheduleTask *&out_task) const override; + virtual int64_t get_deep_copy_size() const override; + virtual int execute(obrpc::ObSrvRpcProxy &rpc_proxy) const override; +private: + DISALLOW_COPY_AND_ASSIGN(ObBackupDataLSMetaFinishTask); +}; + class ObBackupCleanLSTask : public ObBackupScheduleTask { public: diff --git a/src/share/backup/ob_backup_struct.cpp b/src/share/backup/ob_backup_struct.cpp index 642c6c9e40..eae370057e 100644 --- a/src/share/backup/ob_backup_struct.cpp +++ b/src/share/backup/ob_backup_struct.cpp @@ -2918,6 +2918,7 @@ const char* ObBackupStatus::get_str() const "CANCELED", "BACKUP_SYS_META", "BACKUP_USER_META", + "BACKUP_META_FINISH", "BACKUP_DATA_SYS", "BACKUP_DATA_MINOR", "BACKUP_DATA_MAJOR", @@ -2946,6 +2947,7 @@ int ObBackupStatus::set_status(const char *str) "CANCELED", "BACKUP_SYS_META", "BACKUP_USER_META", + "BACKUP_META_FINISH", "BACKUP_DATA_SYS", "BACKUP_DATA_MINOR", "BACKUP_DATA_MAJOR", @@ -3398,6 +3400,7 @@ const char* ObBackupDataTaskType::get_str() const const char *str = "UNKNOWN"; const char *type_strs[] = { "BACKUP_META", + "BACKUP_META_FINISH", "BACKUP_DATA_MINOR", "BACKUP_DATA_MAJOR", "PLUS_ARCHIVE_LOG", @@ -3417,6 +3420,7 @@ int ObBackupDataTaskType::set_type(const char *buf) ObString s(buf); const char *type_strs[] = { "BACKUP_META", + "BACKUP_META_FINISH", "BACKUP_DATA_MINOR", "BACKUP_DATA_MAJOR", "PLUS_ARCHIVE_LOG", diff --git a/src/share/backup/ob_backup_struct.h b/src/share/backup/ob_backup_struct.h index bca2b94898..60e178846d 100644 --- a/src/share/backup/ob_backup_struct.h +++ b/src/share/backup/ob_backup_struct.h @@ -1220,10 +1220,11 @@ public: CANCELED = 5, BACKUP_SYS_META = 6, BACKUP_USER_META = 7, - BACKUP_DATA_SYS = 8, - BACKUP_DATA_MINOR = 9, - BACKUP_DATA_MAJOR = 10, - BACKUP_LOG = 11, + BACKUP_META_FINISH = 8, + BACKUP_DATA_SYS = 9, + BACKUP_DATA_MINOR = 10, + BACKUP_DATA_MAJOR = 11, + BACKUP_LOG = 12, MAX_STATUS }; ObBackupStatus(): status_(MAX_STATUS) {} @@ -1434,10 +1435,11 @@ struct ObBackupDataTaskType final enum Type { BACKUP_META = 0, // backup ls, tablet meta and inner tablet sstable - BACKUP_DATA_MINOR = 1, - BACKUP_DATA_MAJOR = 2, - BACKUP_PLUS_ARCHIVE_LOG = 3, - BACKUP_BUILD_INDEX = 4, + BACKUP_META_FINISH = 1, + BACKUP_DATA_MINOR = 2, + BACKUP_DATA_MAJOR = 3, + BACKUP_PLUS_ARCHIVE_LOG = 4, + BACKUP_BUILD_INDEX = 5, BACKUP_MAX }; ObBackupDataTaskType() : type_(Type::BACKUP_MAX) {} diff --git a/src/share/ob_debug_sync_point.h b/src/share/ob_debug_sync_point.h index bbd3a52e5b..a20ffbf5b2 100755 --- a/src/share/ob_debug_sync_point.h +++ b/src/share/ob_debug_sync_point.h @@ -382,6 +382,7 @@ class ObString; ACT(BEFORE_REPLAY_DDL_PREPRARE,)\ ACT(BEFORE_REPLAY_DDL_COMMIT,)\ ACT(BEFORE_BACKUP_UESR_META,)\ + ACT(BEFORE_BACKUP_META_FINISH,)\ ACT(BEFORE_INSERT_UERR_RECOVER_TABLE_JOB,)\ ACT(BEFORE_GENERATE_IMPORT_TABLE_TASK,)\ ACT(BEFORE_RECOVER_UESR_RECOVER_TABLE_JOB,)\