diff --git a/src/rootserver/ob_ls_recovery_reportor.cpp b/src/rootserver/ob_ls_recovery_reportor.cpp index e8df7a164c..ba7e209276 100755 --- a/src/rootserver/ob_ls_recovery_reportor.cpp +++ b/src/rootserver/ob_ls_recovery_reportor.cpp @@ -125,6 +125,7 @@ void ObLSRecoveryReportor::run2() ObThreadCondGuard guard(get_cond()); const uint64_t meta_tenant_id = gen_meta_tenant_id(tenant_id_); while (!stop_) { + DEBUG_SYNC(STOP_LS_RECOVERY_THREAD); ObCurTraceId::init(GCONF.self_addr_); if (OB_ISNULL(GCTX.schema_service_)) { ret = OB_ERR_UNEXPECTED; diff --git a/src/share/ls/ob_ls_status_operator.cpp b/src/share/ls/ob_ls_status_operator.cpp index 0df91185b4..6ce5c84f9b 100755 --- a/src/share/ls/ob_ls_status_operator.cpp +++ b/src/share/ls/ob_ls_status_operator.cpp @@ -1361,6 +1361,8 @@ int ObLSStatusOperator::check_ls_log_stat_info_( // special case: support stop 1F in 2F1A need_retry = true; LOG_WARN("fail to generate arb replica num", KR(ret), KPC(tenant_schema), K(ls_log_stat_info)); + ret = OB_OP_NOT_ALLOW; + //must be OB_OP_NOT_ALLOW && need_retry can retry } else if (valid_servers.count() + arb_replica_num < rootserver::majority(leader.get_paxos_replica_num())) { ret = OB_OP_NOT_ALLOW; LOG_WARN("ls doesn't have enough valid paxos member when checking ls_log_stat_info", diff --git a/src/share/ob_debug_sync_point.h b/src/share/ob_debug_sync_point.h index 5a518a3d39..53f8a42e4a 100755 --- a/src/share/ob_debug_sync_point.h +++ b/src/share/ob_debug_sync_point.h @@ -547,6 +547,7 @@ class ObString; ACT(BEFORE_TRANSFER_START_COMMIT,)\ ACT(STOP_PRIMARY_LS_THREAD,)\ ACT(TRANSFER_GET_BACKFILL_TABLETS_BEFORE,)\ + ACT(STOP_LS_RECOVERY_THREAD,)\ ACT(MAX_DEBUG_SYNC_POINT,) DECLARE_ENUM(ObDebugSyncPoint, debug_sync_point, OB_DEBUG_SYNC_POINT_DEF); diff --git a/src/share/ob_primary_standby_service.cpp b/src/share/ob_primary_standby_service.cpp index e57de925ff..75e0224029 100644 --- a/src/share/ob_primary_standby_service.cpp +++ b/src/share/ob_primary_standby_service.cpp @@ -330,11 +330,12 @@ int ObPrimaryStandbyService::do_recover_tenant( { int ret = OB_SUCCESS; ObAllTenantInfo tenant_info; + uint64_t tenant_version = 0; const uint64_t exec_tenant_id = gen_meta_tenant_id(tenant_id); common::ObMySQLTransaction trans; + ObTenantStatus tenant_status = TENANT_STATUS_MAX; ObLSRecoveryStatOperator ls_recovery_operator; ObLSRecoveryStat sys_ls_recovery; - ObTenantStatus tenant_status = TENANT_STATUS_MAX; if (OB_FAIL(check_inner_stat_())) { LOG_WARN("inner stat error", KR(ret), K_(inited)); } else if (!obrpc::ObRecoverTenantArg::is_valid(recover_type, recovery_until_scn) @@ -360,19 +361,30 @@ int ObPrimaryStandbyService::do_recover_tenant( } else if (tenant_info.get_switchover_status() != working_sw_status) { ret = OB_OP_NOT_ALLOW; LOG_WARN("unexpected tenant switchover status", KR(ret), K(working_sw_status), K(tenant_info)); + } else if (obrpc::ObRecoverTenantArg::RecoverType::UNTIL == recover_type + && recovery_until_scn < tenant_info.get_sync_scn()) { + ret = OB_OP_NOT_ALLOW; + LOG_WARN("recover before tenant sync_scn is not allow", KR(ret), K(tenant_info), + K(tenant_id), K(recover_type), K(recovery_until_scn)); + LOG_USER_ERROR(OB_OP_NOT_ALLOW, "recover before tenant sync_scn sync_scn is"); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_version))) { + LOG_WARN("failed to get tenant min version", KR(ret), K(tenant_id)); + } else if (tenant_version >= DATA_VERSION_4_2_1_0) { + //noting, no need check sys ls recovery stat } else if (OB_FAIL(ls_recovery_operator.get_ls_recovery_stat(tenant_id, share::SYS_LS, true /*for_update*/, sys_ls_recovery, trans))) { LOG_WARN("failed to get ls recovery stat", KR(ret), K(tenant_id)); } else if (obrpc::ObRecoverTenantArg::RecoverType::UNTIL == recover_type - && (recovery_until_scn < tenant_info.get_sync_scn() - || recovery_until_scn < sys_ls_recovery.get_sync_scn())) { + && recovery_until_scn < sys_ls_recovery.get_sync_scn()) { ret = OB_OP_NOT_ALLOW; - LOG_WARN("recover before tenant sync_scn or SYS LS sync_scn is not allow", KR(ret), K(tenant_info), + LOG_WARN("recover before SYS LS sync_scn is not allow", KR(ret), K(tenant_info), K(tenant_id), K(recover_type), K(recovery_until_scn), K(sys_ls_recovery)); - LOG_USER_ERROR(OB_OP_NOT_ALLOW, "recover before tenant sync_scn or SYS LS sync_scn is"); + LOG_USER_ERROR(OB_OP_NOT_ALLOW, "recover before tenant SYS LS sync_scn is"); + } + if (OB_FAIL(ret)) { } else if (is_tenant_normal(tenant_status)) { const SCN &recovery_until_scn_to_set = obrpc::ObRecoverTenantArg::RecoverType::UNTIL == recover_type ? - recovery_until_scn : SCN::max(tenant_info.get_sync_scn(), sys_ls_recovery.get_sync_scn()); + recovery_until_scn : SCN::max(tenant_info.get_sync_scn(), sys_ls_recovery.get_sync_scn()); if (tenant_info.get_recovery_until_scn() == recovery_until_scn_to_set) { LOG_WARN("recovery_until_scn is same with original", KR(ret), K(tenant_info), K(tenant_id), K(recover_type), K(recovery_until_scn)); diff --git a/src/share/ob_primary_standby_service.h b/src/share/ob_primary_standby_service.h index fb589cdd02..bd171c332e 100644 --- a/src/share/ob_primary_standby_service.h +++ b/src/share/ob_primary_standby_service.h @@ -211,7 +211,6 @@ private: * @return return code */ int check_ls_restore_status_(const uint64_t tenant_id); - private: const static int64_t SEC_UNIT = 1000L * 1000L; const static int64_t PRINT_INTERVAL = 10 * 1000 * 1000L; diff --git a/src/share/ob_tenant_info_proxy.cpp b/src/share/ob_tenant_info_proxy.cpp index 6f5197232c..b449a4c824 100755 --- a/src/share/ob_tenant_info_proxy.cpp +++ b/src/share/ob_tenant_info_proxy.cpp @@ -700,8 +700,6 @@ int ObAllTenantInfoProxy::update_tenant_recovery_until_scn( ObSqlString sql; int64_t affected_rows = 0; ObTimeoutCtx ctx; - ObLSRecoveryStatOperator ls_recovery_operator; - ObLSRecoveryStat sys_ls_recovery; ObLogRestoreSourceMgr restore_source_mgr; uint64_t compat_version = 0; @@ -717,13 +715,6 @@ int ObAllTenantInfoProxy::update_tenant_recovery_until_scn( } else if (OB_UNLIKELY(!recovery_until_scn.is_valid_and_not_min())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("recovery_until_scn invalid", KR(ret), K(recovery_until_scn)); - } else if (OB_FAIL(ls_recovery_operator.get_ls_recovery_stat(tenant_id, share::SYS_LS, - true /*for_update*/, sys_ls_recovery, trans))) { - LOG_WARN("failed to get ls recovery stat", KR(ret), K(tenant_id)); - } else if (recovery_until_scn < sys_ls_recovery.get_sync_scn()) { - ret = OB_OP_NOT_ALLOW; - LOG_WARN("recover before SYS LS sync_scn is not allowed", KR(ret), K(tenant_id), K(recovery_until_scn), K(sys_ls_recovery)); - LOG_USER_ERROR(OB_OP_NOT_ALLOW, "recover before SYS LS sync_scn is"); } else if (OB_FAIL(rootserver::ObRootUtils::get_rs_default_timeout_ctx(ctx))) { LOG_WARN("fail to get timeout ctx", KR(ret), K(ctx)); } else if (OB_FAIL(sql.assign_fmt( @@ -750,10 +741,10 @@ int ObAllTenantInfoProxy::update_tenant_recovery_until_scn( } int64_t cost = ObTimeUtility::current_time() - begin_time; - LOG_INFO("update_recovery_until_scn finish", KR(ret), K(tenant_id), K(sys_ls_recovery), + LOG_INFO("update_recovery_until_scn finish", KR(ret), K(tenant_id), K(recovery_until_scn), K(affected_rows), K(switchover_epoch), K(sql), K(cost)); ROOTSERVICE_EVENT_ADD("tenant_info", "update_recovery_until_scn", K(ret), K(tenant_id), - K(recovery_until_scn), K(affected_rows), K(switchover_epoch), K(sys_ls_recovery)); + K(recovery_until_scn), K(affected_rows), K(switchover_epoch)); return ret; }