From b165cdd50732054d0809b5b3d91b3f56448c5597 Mon Sep 17 00:00:00 2001 From: lalalafeier Date: Mon, 18 Dec 2023 23:17:38 +0000 Subject: [PATCH] Fix check_all_ls_restore_finish_ in clone scheduler --- src/rootserver/restore/ob_clone_scheduler.cpp | 47 +++++++++++-------- src/rootserver/restore/ob_clone_scheduler.h | 10 ++-- .../ob_tenant_snapshot_util.cpp | 10 ++-- src/sql/engine/cmd/ob_clone_executor.cpp | 3 +- 4 files changed, 37 insertions(+), 33 deletions(-) diff --git a/src/rootserver/restore/ob_clone_scheduler.cpp b/src/rootserver/restore/ob_clone_scheduler.cpp index 9b8bc82f31..f93bcff00a 100644 --- a/src/rootserver/restore/ob_clone_scheduler.cpp +++ b/src/rootserver/restore/ob_clone_scheduler.cpp @@ -890,28 +890,35 @@ int ObCloneScheduler::get_tenant_snap_ls_replica_simple_items_( return ret; } -int ObCloneScheduler::check_one_ls_replica_restore_finish_( +int ObCloneScheduler::check_one_ls_restore_finish_( const share::ObCloneJob& job, - const ObLSInfo& ls_info, - const ObLSStatusInfoArray& ls_array, + const ObLSStatusInfo& ls_status_info, + const ObArray& ls_info_array, const ObArray& ls_snapshot_array, TenantRestoreStatus &tenant_restore_status) /*a valid value in the outer func, do not reset it*/ { int ret = OB_SUCCESS; - bool found_in_ls_status = false; - for (int64_t i = 0; i < ls_array.count(); ++i) { - if (ls_array.at(i).get_ls_id() == ls_info.get_ls_id()) { - found_in_ls_status = true; + bool found_in_ls_meta_table = false; + const ObLSInfo *ls_info_ptr = nullptr; + for (int64_t i = 0; i < ls_info_array.count(); ++i) { + if (ls_info_array.at(i).get_ls_id() == ls_status_info.get_ls_id()) { + ls_info_ptr = &ls_info_array.at(i); + found_in_ls_meta_table = true; break; } } - if (!found_in_ls_status) { - LOG_INFO("ls in __all_ls_meta_table does not appear in __all_ls_status", K(ls_info), K(ls_array)); + if (!found_in_ls_meta_table) { + ret = OB_NEED_WAIT; + LOG_WARN("ls in __all_ls_status does not appear in __all_ls_meta_table", KR(ret), + K(ls_status_info), K(ls_info_array)); + } else if (OB_ISNULL(ls_info_ptr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null ls_info_ptr", KR(ret)); } else { - for (int64_t i = 0; OB_SUCC(ret) && !tenant_restore_status.is_failed() && i < ls_info.get_replicas().count(); ++i) { - const ObLSReplica &replica = ls_info.get_replicas().at(i); + for (int64_t i = 0; OB_SUCC(ret) && !tenant_restore_status.is_failed() && i < ls_info_ptr->get_replicas().count(); ++i) { + const ObLSReplica &replica = ls_info_ptr->get_replicas().at(i); bool found_in_ls_snapshot = false; share::ObLSRestoreStatus ls_restore_status; @@ -979,15 +986,15 @@ int ObCloneScheduler::check_all_ls_restore_finish_(const share::ObCloneJob &job, } else if (OB_FAIL(get_tenant_snap_ls_replica_simple_items_(job, ls_snapshot_array))) { LOG_WARN("fail to get_tenant_snap_ls_replica_simple_items_", KR(ret), K(job)); } else { - for (int64_t i = 0; OB_SUCC(ret) && !tenant_restore_status.is_failed() && i < ls_info_array.count(); ++i) { - const ObLSInfo& ls_info = ls_info_array.at(i); - if (OB_FAIL(check_one_ls_replica_restore_finish_(job, - ls_info, - ls_array, - ls_snapshot_array, - tenant_restore_status))) { - LOG_WARN("fail to check_one_ls_replica_restore_finish_", - KR(ret), K(ls_info), K(ls_array), K(ls_snapshot_array), K(tenant_restore_status), K(job)); + for (int64_t i = 0; OB_SUCC(ret) && !tenant_restore_status.is_failed() && i < ls_array.count(); ++i) { + const ObLSStatusInfo& ls_status_info = ls_array.at(i); + if (OB_FAIL(check_one_ls_restore_finish_(job, + ls_status_info, + ls_info_array, + ls_snapshot_array, + tenant_restore_status))) { + LOG_WARN("fail to check_one_ls_restore_finish_", + KR(ret), K(ls_status_info), K(ls_info_array), K(ls_snapshot_array), K(tenant_restore_status), K(job)); } } if (!tenant_restore_status.is_success()) { diff --git a/src/rootserver/restore/ob_clone_scheduler.h b/src/rootserver/restore/ob_clone_scheduler.h index cfcd4cc3e8..514dc8fb04 100644 --- a/src/rootserver/restore/ob_clone_scheduler.h +++ b/src/rootserver/restore/ob_clone_scheduler.h @@ -126,11 +126,11 @@ private: int check_all_ls_restore_finish_(const share::ObCloneJob &job, TenantRestoreStatus &tenant_restore_status); - int check_one_ls_replica_restore_finish_(const share::ObCloneJob& job, - const share::ObLSInfo& ls_info, - const share::ObLSStatusInfoArray& ls_array, - const ObArray& ls_snapshot_array, - TenantRestoreStatus &tenant_restore_status); + int check_one_ls_restore_finish_(const share::ObCloneJob& job, + const share::ObLSStatusInfo& ls_status_info, + const ObArray& ls_info_array, + const ObArray& ls_snapshot_array, + TenantRestoreStatus &tenant_restore_status); private: static const int32_t MAX_RETRY_CNT = 5; static const int64_t DEFAULT_TIMEOUT = 10 * 1000 * 1000L; diff --git a/src/rootserver/tenant_snapshot/ob_tenant_snapshot_util.cpp b/src/rootserver/tenant_snapshot/ob_tenant_snapshot_util.cpp index a38e81037c..c6d7f10bb6 100644 --- a/src/rootserver/tenant_snapshot/ob_tenant_snapshot_util.cpp +++ b/src/rootserver/tenant_snapshot/ob_tenant_snapshot_util.cpp @@ -533,8 +533,8 @@ int ObTenantSnapshotUtil::unlock_tenant_snapshot_simulated_mutex_from_snapshot_t const int64_t owner_job_id = OB_INVALID_ID; bool is_conflicted_owner_job_id = true; - if (OB_UNLIKELY(!is_user_tenant(tenant_id) || ObTenantSnapStatus::MAX == old_status || - !snapshot_scn.is_valid())) { + //NOTE: snapshot_scn may be invalid in case of failure to create snapshot. + if (OB_UNLIKELY(!is_user_tenant(tenant_id) || ObTenantSnapStatus::MAX == old_status)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(old_status), K(snapshot_scn)); } else if (OB_FAIL(unlock_(trans, tenant_id, owner_job_id, old_status, snapshot_scn, is_conflicted_owner_job_id))) { @@ -706,7 +706,6 @@ int ObTenantSnapshotUtil::add_create_tenant_snapshot_task(ObMySQLTransaction &tr { int ret = OB_SUCCESS; uint64_t data_version = 0; - SCN gts_scn = SCN::invalid_scn(); int64_t create_time = OB_INVALID_TIMESTAMP; ObTenantSnapshotTableOperator table_op; ObTenantSnapItem item; @@ -717,9 +716,6 @@ int ObTenantSnapshotUtil::add_create_tenant_snapshot_task(ObMySQLTransaction &tr || !tenant_snapshot_id.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(snapshot_name), K(tenant_snapshot_id)); - } else if (OB_ISNULL(GCTX.sql_proxy_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("sql proxy is null", KR(ret), KP(GCTX.sql_proxy_)); } else if (OB_FAIL(check_and_get_data_version(tenant_id, data_version))) { LOG_WARN("fail to check and get data version or tenant is in upgrading procedure", KR(ret), K(tenant_id)); } else if (FALSE_IT(create_time = ObTimeUtility::current_time())) { @@ -877,7 +873,7 @@ int ObTenantSnapshotUtil::add_restore_tenant_task(ObMySQLTransaction &trans, ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(snap_item)); } else if (ObTenantSnapStatus::NORMAL != snap_item.get_status()) { - ret = OB_INVALID_ARGUMENT; + ret = OB_OP_NOT_ALLOW; LOG_WARN("not allowed for current snapshot operation", KR(ret), K(snap_item)); LOG_USER_ERROR(OB_OP_NOT_ALLOW, "there may be other operation on the same tenant snapshot, restore tenant"); } else if (OB_FAIL(table_op.init(snap_item.get_tenant_id(), &trans))) { diff --git a/src/sql/engine/cmd/ob_clone_executor.cpp b/src/sql/engine/cmd/ob_clone_executor.cpp index cce07af095..6a15e7fb9c 100644 --- a/src/sql/engine/cmd/ob_clone_executor.cpp +++ b/src/sql/engine/cmd/ob_clone_executor.cpp @@ -176,8 +176,9 @@ int ObCloneTenantExecutor::wait_clone_tenant_finished_(ObExecContext &ctx, "DBA_OB_CLONE_PROGRESS or DBA_OB_CLONE_HISTORY", failed_status.length(), failed_status.ptr()))) { LOG_WARN("fail to append format", KR(tmp_ret), K(job_id), K(MTL_ID())); + } else { + LOG_USER_ERROR(OB_ERR_CLONE_TENANT, format_msg.string().length(), format_msg.ptr()); } - LOG_USER_ERROR(OB_ERR_CLONE_TENANT, format_msg.string().length(), format_msg.ptr()); } } }