fix rs schedule backup state machine

This commit is contained in:
oceanoverflow
2023-12-19 20:48:09 +00:00
committed by ob-robot
parent 64b11adfb4
commit 1b6966fef1
8 changed files with 160 additions and 45 deletions

View File

@ -141,7 +141,9 @@ int ObBackupDataScheduler::do_get_need_reload_task_(
ObBackupLSTaskAttr &ls_task = ls_tasks.at(i); ObBackupLSTaskAttr &ls_task = ls_tasks.at(i);
ObBackupScheduleTask *task = nullptr; ObBackupScheduleTask *task = nullptr;
bool is_dropped = false; bool is_dropped = false;
if (!(job.plus_archivelog_ && set_task_attr.status_.is_backup_log()) if (ObBackupTaskStatus::Status::FINISH == ls_task.status_.status_ && OB_SUCCESS == ls_task.result_) {
// do nothing
} else if (!(job.plus_archivelog_ && set_task_attr.status_.is_backup_log())
&& OB_FAIL(ObBackupDataLSTaskMgr::check_ls_is_dropped(ls_task, *sql_proxy_, is_dropped))) { && OB_FAIL(ObBackupDataLSTaskMgr::check_ls_is_dropped(ls_task, *sql_proxy_, is_dropped))) {
LOG_WARN("failed to check ls is dropped", K(ret), K(ls_task)); LOG_WARN("failed to check ls is dropped", K(ret), K(ls_task));
} else if (is_dropped) { } else if (is_dropped) {
@ -216,6 +218,14 @@ int ObBackupDataScheduler::build_task_(
} }
break; break;
} }
case ObBackupDataTaskType::Type::BACKUP_META_FINISH: {
HEAP_VAR(ObBackupDataLSMetaFinishTask, tmp_task) {
if (OB_FAIL(do_build_task_(job, set_task_attr, ls_task, allocator, tmp_task, task))) {
LOG_WARN("[DATA_BACKUP]failed to do build task", K(ret), K(job), K(ls_task));
}
}
break;
}
default: default:
break; break;
} }

View File

@ -117,7 +117,15 @@ int ObBackupSetTaskMgr::advance_status_(
LOG_WARN("[DATA_BACKUP]failed to check leader", K(ret)); LOG_WARN("[DATA_BACKUP]failed to check leader", K(ret));
} else if (OB_FAIL(ObBackupTaskOperator::advance_task_status(trans, set_task_attr_, next_status, result, scn, end_ts))) { } else if (OB_FAIL(ObBackupTaskOperator::advance_task_status(trans, set_task_attr_, next_status, result, scn, end_ts))) {
LOG_WARN("[DATA_BACKUP]failed to advance set status", K(ret), K(set_task_attr_), K(next_status)); LOG_WARN("[DATA_BACKUP]failed to advance set status", K(ret), K(set_task_attr_), K(next_status));
} } else {
ROOTSERVICE_EVENT_ADD("backup_data", "advance_status",
"tenant_id", job_attr_->tenant_id_,
"job_id", job_attr_->job_id_,
"backup_set_id", job_attr_->backup_set_id_,
"curr_status", set_task_attr_.status_.get_str(),
"next_status", next_status.get_str(),
"result", result);
}
return ret; return ret;
} }
@ -150,6 +158,12 @@ int ObBackupSetTaskMgr::process()
} }
break; break;
} }
case ObBackupStatus::Status::BACKUP_META_FINISH: {
if (OB_FAIL(backup_meta_finish_())) {
LOG_WARN("[DATA_BACKUP]failed to backup meta finish", K(ret), K(set_task_attr_));
}
break;
}
case ObBackupStatus::Status::BACKUP_DATA_MINOR: case ObBackupStatus::Status::BACKUP_DATA_MINOR:
case ObBackupStatus::Status::BACKUP_DATA_MAJOR: { case ObBackupStatus::Status::BACKUP_DATA_MAJOR: {
if (OB_FAIL(backup_data_())) { if (OB_FAIL(backup_data_())) {
@ -508,22 +522,17 @@ int ObBackupSetTaskMgr::backup_user_meta_()
if (OB_FAIL(change_meta_turn_(sys_ls_task))) { if (OB_FAIL(change_meta_turn_(sys_ls_task))) {
LOG_WARN("failed to change meta turn", K(ret)); LOG_WARN("failed to change meta turn", K(ret));
} }
} else if (OB_FAIL(calc_consistent_scn_(ls_task, consistent_scn))) {
LOG_WARN("failed to calc consistent scn", K(ret), K(ls_task));
} else if (OB_FAIL(merge_ls_meta_infos_(ls_task))) {
LOG_WARN("fail to merge ls meta infos", K(ret), K(ls_task));
} else if (OB_FAIL(merge_tablet_to_ls_info_(consistent_scn, ls_task))) {
LOG_WARN("[DATA_BACKUP]failed to merge tablet to ls info", K(ret), K(ls_task));
} else if (OB_FALSE_IT(DEBUG_SYNC(BEFORE_BACKUP_DATA))) {
} else if (OB_FAIL(trans_.start(sql_proxy_, meta_tenant_id_))) { } else if (OB_FAIL(trans_.start(sql_proxy_, meta_tenant_id_))) {
LOG_WARN("fail to start trans", K(ret), K(meta_tenant_id_)); LOG_WARN("fail to start trans", K(ret), K(meta_tenant_id_));
} else { } else {
ObBackupStatus next_status = ObBackupStatus::BACKUP_DATA_MINOR; ObBackupStatus next_status = ObBackupStatus::BACKUP_META_FINISH;
if (OB_FAIL(convert_task_type_(ls_task))) { if (OB_FAIL(convert_task_type_(ls_task))) {
LOG_WARN("[DATA_BACKUP]fail to update task type to backup data", K(ret)); LOG_WARN("[DATA_BACKUP]fail to update task type to backup data", K(ret));
} else if (OB_FAIL(advance_status_(trans_, next_status))) { } else if (OB_FAIL(advance_status_(trans_, next_status))) {
LOG_WARN("[DATA_BACKUP]failed to advance status to BACKUP_DATA_MINOR", K(ret), K(next_status)); LOG_WARN("[DATA_BACKUP]failed to advance status to BACKUP_DATA_MINOR", K(ret), K(next_status));
} } else {
ROOTSERVICE_EVENT_ADD("backup_data", "after_backup_consistent_scn");
}
if (OB_SUCC(ret)) { if (OB_SUCC(ret)) {
if (OB_FAIL(trans_.end(true))) { if (OB_FAIL(trans_.end(true))) {
@ -545,6 +554,58 @@ int ObBackupSetTaskMgr::backup_user_meta_()
return ret; return ret;
} }
int ObBackupSetTaskMgr::backup_meta_finish_()
{
int ret = OB_SUCCESS;
ObArray<ObBackupLSTaskAttr> ls_task;
ObArray<ObLSID> ls_ids;
share::SCN consistent_scn;
DEBUG_SYNC(BEFORE_BACKUP_META_FINISH);
if (OB_FAIL(ObBackupLSTaskOperator::get_ls_tasks(*sql_proxy_, job_attr_->job_id_, job_attr_->tenant_id_, false/*update*/, ls_task))) {
LOG_WARN("[DATA_BACKUP]failed to get log stream tasks", K(ret), "job_id", job_attr_->job_id_, "tenant_id", job_attr_->tenant_id_);
} else if (ls_task.empty()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("[DATA_BACKUP]no logstream task", K(ret), "job_id", job_attr_->job_id_, "tenant_id", job_attr_->tenant_id_);
} else if (OB_FAIL(calc_consistent_scn_(ls_task, consistent_scn))) {
LOG_WARN("failed to calc consistent scn", K(ret), K(ls_task));
} else if (OB_FAIL(merge_ls_meta_infos_(ls_task))) {
LOG_WARN("fail to merge ls meta infos", K(ret), K(ls_task));
} else if (OB_FAIL(merge_tablet_to_ls_info_(consistent_scn, ls_task, ls_ids))) {
LOG_WARN("[DATA_BACKUP]failed to merge tablet to ls info", K(ret), K(ls_task));
} else if (OB_FALSE_IT(DEBUG_SYNC(BEFORE_BACKUP_DATA))) {
} else if (OB_FAIL(trans_.start(sql_proxy_, meta_tenant_id_))) {
LOG_WARN("fail to start trans", K(ret), K(meta_tenant_id_));
} else {
ObBackupStatus next_status = ObBackupStatus::BACKUP_DATA_MINOR;
share::ObBackupDataTaskType type(share::ObBackupDataTaskType::Type::BACKUP_DATA_MINOR);
if (OB_FAIL(convert_task_type_(ls_task))) {
LOG_WARN("[DATA_BACKUP]fail to update task type to backup data", K(ret));
} else if (OB_FAIL(advance_status_(trans_, next_status))) {
LOG_WARN("[DATA_BACKUP]failed to advance status to BACKUP_DATA_MINOR", K(ret), K(next_status));
} else if (OB_FAIL(generate_ls_tasks_(ls_ids, type))) {
LOG_WARN("failed to generate ls tasks", K(ret), K(ls_ids), K(type));
} else {
ROOTSERVICE_EVENT_ADD("backup_data", "after_backup_consistent_scn");
}
if (OB_SUCC(ret)) {
if (OB_FAIL(trans_.end(true))) {
LOG_WARN("failed to commit trans", KR(ret));
} else {
backup_service_->wakeup();
}
} else {
int tmp_ret = OB_SUCCESS;
if (OB_SUCCESS != (tmp_ret = trans_.end(false))) {
LOG_WARN("failed to rollback", KR(ret), K(tmp_ret));
}
}
}
return ret;
}
int ObBackupSetTaskMgr::calc_consistent_scn_(ObIArray<share::ObBackupLSTaskAttr> &ls_tasks, share::SCN &consistent_scn) int ObBackupSetTaskMgr::calc_consistent_scn_(ObIArray<share::ObBackupLSTaskAttr> &ls_tasks, share::SCN &consistent_scn)
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
@ -675,12 +736,13 @@ int ObBackupSetTaskMgr::merge_ls_meta_infos_(
return ret; return ret;
} }
int ObBackupSetTaskMgr::merge_tablet_to_ls_info_(const share::SCN &consistent_scn, const ObIArray<ObBackupLSTaskAttr> &ls_tasks) int ObBackupSetTaskMgr::merge_tablet_to_ls_info_(const share::SCN &consistent_scn,
const ObIArray<ObBackupLSTaskAttr> &ls_tasks, common::ObIArray<share::ObLSID> &ls_ids)
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
ls_ids.reset();
ObHashMap<ObLSID, ObArray<ObTabletID>> latest_ls_tablet_map; ObHashMap<ObLSID, ObArray<ObTabletID>> latest_ls_tablet_map;
ObHashMap<ObLSID, const ObBackupLSTaskAttr *> backup_ls_map; // the ls task persisted in __all_backup_ls_task ObHashMap<ObLSID, const ObBackupLSTaskAttr *> backup_ls_map; // the ls task persisted in __all_backup_ls_task
ObArray<share::ObLSID> ls_ids;
const int64_t OB_BACKUP_MAX_LS_BUCKET = 1024; const int64_t OB_BACKUP_MAX_LS_BUCKET = 1024;
SCN max_backup_scn; SCN max_backup_scn;
if (ls_tasks.empty() || !consistent_scn.is_valid()) { if (ls_tasks.empty() || !consistent_scn.is_valid()) {
@ -738,29 +800,6 @@ int ObBackupSetTaskMgr::merge_tablet_to_ls_info_(const share::SCN &consistent_sc
} }
} }
} }
share::ObBackupDataTaskType type(share::ObBackupDataTaskType::Type::BACKUP_DATA_MINOR);
if (OB_FAIL(ret)) {
} else if (OB_FAIL(trans_.start(sql_proxy_, meta_tenant_id_))) {
LOG_WARN("fail to start trans", K(ret), K(meta_tenant_id_));
} else if (OB_FAIL(generate_ls_tasks_(ls_ids, type))) {
LOG_WARN("failed to generate ls tasks", K(ret), K(ls_ids), K(type));
} else {
ROOTSERVICE_EVENT_ADD("backup_data",
"after_backup_consistent_scn",
"tenant_id",
job_attr_->tenant_id_,
"job_id",
job_attr_->job_id_,
"task_id",
set_task_attr_.task_id_);
}
if (trans_.is_started()) {
int tmp_ret = OB_SUCCESS;
if (OB_TMP_FAIL(trans_.end(OB_SUCC(ret)))) {
ret = OB_SUCC(ret) ? tmp_ret : ret;
LOG_WARN("failed to end trans", K(ret), K(tmp_ret));
}
}
return ret; return ret;
} }
@ -977,6 +1016,10 @@ int ObBackupSetTaskMgr::get_next_status_(const share::ObBackupStatus &cur_status
break; break;
} }
case ObBackupStatus::Status::BACKUP_USER_META: { case ObBackupStatus::Status::BACKUP_USER_META: {
next_status = ObBackupStatus::Status::BACKUP_META_FINISH;
break;
}
case ObBackupStatus::Status::BACKUP_META_FINISH: {
next_status = ObBackupStatus::Status::BACKUP_DATA_MINOR; next_status = ObBackupStatus::Status::BACKUP_DATA_MINOR;
break; break;
} }
@ -1525,6 +1568,10 @@ int ObBackupSetTaskMgr::convert_task_type_(const ObIArray<ObBackupLSTaskAttr> &l
ObBackupDataTaskType type; ObBackupDataTaskType type;
switch(set_task_attr_.status_.status_) { switch(set_task_attr_.status_.status_) {
case ObBackupStatus::Status::BACKUP_USER_META: { case ObBackupStatus::Status::BACKUP_USER_META: {
type.type_ = ObBackupDataTaskType::Type::BACKUP_META_FINISH;
break;
}
case ObBackupStatus::Status::BACKUP_META_FINISH: {
type.type_ = ObBackupDataTaskType::Type::BACKUP_DATA_MINOR; type.type_ = ObBackupDataTaskType::Type::BACKUP_DATA_MINOR;
break; break;
} }

View File

@ -57,11 +57,14 @@ private:
int backup_sys_meta_(); int backup_sys_meta_();
int do_backup_meta_(ObIArray<share::ObBackupLSTaskAttr> &ls_task, int64_t &finish_cnt); int do_backup_meta_(ObIArray<share::ObBackupLSTaskAttr> &ls_task, int64_t &finish_cnt);
int backup_user_meta_(); int backup_user_meta_();
int backup_meta_finish_();
int calc_consistent_scn_(ObIArray<share::ObBackupLSTaskAttr> &ls_tasks, share::SCN &consistent_scn); int calc_consistent_scn_(ObIArray<share::ObBackupLSTaskAttr> &ls_tasks, share::SCN &consistent_scn);
int check_need_change_meta_turn_(ObIArray<share::ObBackupLSTaskAttr> &ls_tasks, bool &need_change_turn); int check_need_change_meta_turn_(ObIArray<share::ObBackupLSTaskAttr> &ls_tasks, bool &need_change_turn);
int change_meta_turn_(const share::ObBackupLSTaskAttr &sys_ls_task); int change_meta_turn_(const share::ObBackupLSTaskAttr &sys_ls_task);
int get_backup_user_meta_task_(ObIArray<share::ObBackupLSTaskAttr> &ls_task); int get_backup_user_meta_task_(ObIArray<share::ObBackupLSTaskAttr> &ls_task);
int merge_tablet_to_ls_info_(const share::SCN &consistent_scn, const ObIArray<share::ObBackupLSTaskAttr> &ls_tasks); int merge_tablet_to_ls_info_(const share::SCN &consistent_scn,
const ObIArray<share::ObBackupLSTaskAttr> &ls_tasks,
common::ObIArray<share::ObLSID> &ls_ids);
int get_tablet_list_by_snapshot( int get_tablet_list_by_snapshot(
const share::SCN &consistent_scn, common::hash::ObHashMap<share::ObLSID, ObArray<ObTabletID>> &latest_ls_tablet_map); const share::SCN &consistent_scn, common::hash::ObHashMap<share::ObLSID, ObArray<ObTabletID>> &latest_ls_tablet_map);
int fill_map_with_sys_tablets_(common::hash::ObHashMap<share::ObLSID, ObArray<ObTabletID>> &latest_ls_tablet_map); int fill_map_with_sys_tablets_(common::hash::ObHashMap<share::ObLSID, ObArray<ObTabletID>> &latest_ls_tablet_map);

View File

@ -971,5 +971,41 @@ int ObBackupDataLSMetaTask::execute(obrpc::ObSrvRpcProxy &rpc_proxy) const
return ret; return ret;
} }
int ObBackupDataLSMetaFinishTask::clone(void *input_ptr, ObBackupScheduleTask *&out_task) const
{
int ret = OB_SUCCESS;
if (OB_ISNULL(input_ptr)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), KP(input_ptr));
} else {
ObBackupDataLSMetaFinishTask *my_task = new (input_ptr) ObBackupDataLSMetaFinishTask();
if (OB_ISNULL(my_task)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("taks is nullptr", K(ret));
} else if (OB_FAIL(my_task->ObBackupDataBaseTask::deep_copy(*this))) {
LOG_WARN("fail to deep copy base task", K(ret));
}
if (OB_SUCC(ret)) {
out_task = my_task;
} else if (OB_NOT_NULL(my_task)) {
my_task->~ObBackupDataLSMetaFinishTask();
my_task = nullptr;
}
}
return ret;
}
int64_t ObBackupDataLSMetaFinishTask::get_deep_copy_size() const
{
return sizeof(ObBackupDataLSMetaFinishTask);
}
int ObBackupDataLSMetaFinishTask::execute(obrpc::ObSrvRpcProxy &rpc_proxy) const
{
int ret = OB_SUCCESS;
UNUSED(rpc_proxy);
return ret;
}
} // namespace rootserver } // namespace rootserver
} // namespace oceanbase } // namespace oceanbase

View File

@ -350,6 +350,18 @@ private:
DISALLOW_COPY_AND_ASSIGN(ObBackupDataLSMetaTask); DISALLOW_COPY_AND_ASSIGN(ObBackupDataLSMetaTask);
}; };
class ObBackupDataLSMetaFinishTask final : public ObBackupDataLSTask
{
public:
ObBackupDataLSMetaFinishTask() {}
virtual ~ObBackupDataLSMetaFinishTask() {}
virtual int clone(void *input_ptr, ObBackupScheduleTask *&out_task) const override;
virtual int64_t get_deep_copy_size() const override;
virtual int execute(obrpc::ObSrvRpcProxy &rpc_proxy) const override;
private:
DISALLOW_COPY_AND_ASSIGN(ObBackupDataLSMetaFinishTask);
};
class ObBackupCleanLSTask : public ObBackupScheduleTask class ObBackupCleanLSTask : public ObBackupScheduleTask
{ {
public: public:

View File

@ -2918,6 +2918,7 @@ const char* ObBackupStatus::get_str() const
"CANCELED", "CANCELED",
"BACKUP_SYS_META", "BACKUP_SYS_META",
"BACKUP_USER_META", "BACKUP_USER_META",
"BACKUP_META_FINISH",
"BACKUP_DATA_SYS", "BACKUP_DATA_SYS",
"BACKUP_DATA_MINOR", "BACKUP_DATA_MINOR",
"BACKUP_DATA_MAJOR", "BACKUP_DATA_MAJOR",
@ -2946,6 +2947,7 @@ int ObBackupStatus::set_status(const char *str)
"CANCELED", "CANCELED",
"BACKUP_SYS_META", "BACKUP_SYS_META",
"BACKUP_USER_META", "BACKUP_USER_META",
"BACKUP_META_FINISH",
"BACKUP_DATA_SYS", "BACKUP_DATA_SYS",
"BACKUP_DATA_MINOR", "BACKUP_DATA_MINOR",
"BACKUP_DATA_MAJOR", "BACKUP_DATA_MAJOR",
@ -3398,6 +3400,7 @@ const char* ObBackupDataTaskType::get_str() const
const char *str = "UNKNOWN"; const char *str = "UNKNOWN";
const char *type_strs[] = { const char *type_strs[] = {
"BACKUP_META", "BACKUP_META",
"BACKUP_META_FINISH",
"BACKUP_DATA_MINOR", "BACKUP_DATA_MINOR",
"BACKUP_DATA_MAJOR", "BACKUP_DATA_MAJOR",
"PLUS_ARCHIVE_LOG", "PLUS_ARCHIVE_LOG",
@ -3417,6 +3420,7 @@ int ObBackupDataTaskType::set_type(const char *buf)
ObString s(buf); ObString s(buf);
const char *type_strs[] = { const char *type_strs[] = {
"BACKUP_META", "BACKUP_META",
"BACKUP_META_FINISH",
"BACKUP_DATA_MINOR", "BACKUP_DATA_MINOR",
"BACKUP_DATA_MAJOR", "BACKUP_DATA_MAJOR",
"PLUS_ARCHIVE_LOG", "PLUS_ARCHIVE_LOG",

View File

@ -1220,10 +1220,11 @@ public:
CANCELED = 5, CANCELED = 5,
BACKUP_SYS_META = 6, BACKUP_SYS_META = 6,
BACKUP_USER_META = 7, BACKUP_USER_META = 7,
BACKUP_DATA_SYS = 8, BACKUP_META_FINISH = 8,
BACKUP_DATA_MINOR = 9, BACKUP_DATA_SYS = 9,
BACKUP_DATA_MAJOR = 10, BACKUP_DATA_MINOR = 10,
BACKUP_LOG = 11, BACKUP_DATA_MAJOR = 11,
BACKUP_LOG = 12,
MAX_STATUS MAX_STATUS
}; };
ObBackupStatus(): status_(MAX_STATUS) {} ObBackupStatus(): status_(MAX_STATUS) {}
@ -1434,10 +1435,11 @@ struct ObBackupDataTaskType final
enum Type enum Type
{ {
BACKUP_META = 0, // backup ls, tablet meta and inner tablet sstable BACKUP_META = 0, // backup ls, tablet meta and inner tablet sstable
BACKUP_DATA_MINOR = 1, BACKUP_META_FINISH = 1,
BACKUP_DATA_MAJOR = 2, BACKUP_DATA_MINOR = 2,
BACKUP_PLUS_ARCHIVE_LOG = 3, BACKUP_DATA_MAJOR = 3,
BACKUP_BUILD_INDEX = 4, BACKUP_PLUS_ARCHIVE_LOG = 4,
BACKUP_BUILD_INDEX = 5,
BACKUP_MAX BACKUP_MAX
}; };
ObBackupDataTaskType() : type_(Type::BACKUP_MAX) {} ObBackupDataTaskType() : type_(Type::BACKUP_MAX) {}

View File

@ -382,6 +382,7 @@ class ObString;
ACT(BEFORE_REPLAY_DDL_PREPRARE,)\ ACT(BEFORE_REPLAY_DDL_PREPRARE,)\
ACT(BEFORE_REPLAY_DDL_COMMIT,)\ ACT(BEFORE_REPLAY_DDL_COMMIT,)\
ACT(BEFORE_BACKUP_UESR_META,)\ ACT(BEFORE_BACKUP_UESR_META,)\
ACT(BEFORE_BACKUP_META_FINISH,)\
ACT(BEFORE_INSERT_UERR_RECOVER_TABLE_JOB,)\ ACT(BEFORE_INSERT_UERR_RECOVER_TABLE_JOB,)\
ACT(BEFORE_GENERATE_IMPORT_TABLE_TASK,)\ ACT(BEFORE_GENERATE_IMPORT_TABLE_TASK,)\
ACT(BEFORE_RECOVER_UESR_RECOVER_TABLE_JOB,)\ ACT(BEFORE_RECOVER_UESR_RECOVER_TABLE_JOB,)\