support recover cancel immediately

This commit is contained in:
maosy
2023-08-28 04:44:09 +00:00
committed by ob-robot
parent 1a7878549a
commit 5bd76e3a6f
6 changed files with 24 additions and 18 deletions

View File

@ -125,6 +125,7 @@ void ObLSRecoveryReportor::run2()
ObThreadCondGuard guard(get_cond());
const uint64_t meta_tenant_id = gen_meta_tenant_id(tenant_id_);
while (!stop_) {
DEBUG_SYNC(STOP_LS_RECOVERY_THREAD);
ObCurTraceId::init(GCONF.self_addr_);
if (OB_ISNULL(GCTX.schema_service_)) {
ret = OB_ERR_UNEXPECTED;

View File

@ -1361,6 +1361,8 @@ int ObLSStatusOperator::check_ls_log_stat_info_(
// special case: support stop 1F in 2F1A
need_retry = true;
LOG_WARN("fail to generate arb replica num", KR(ret), KPC(tenant_schema), K(ls_log_stat_info));
ret = OB_OP_NOT_ALLOW;
//must be OB_OP_NOT_ALLOW && need_retry can retry
} else if (valid_servers.count() + arb_replica_num < rootserver::majority(leader.get_paxos_replica_num())) {
ret = OB_OP_NOT_ALLOW;
LOG_WARN("ls doesn't have enough valid paxos member when checking ls_log_stat_info",

View File

@ -547,6 +547,7 @@ class ObString;
ACT(BEFORE_TRANSFER_START_COMMIT,)\
ACT(STOP_PRIMARY_LS_THREAD,)\
ACT(TRANSFER_GET_BACKFILL_TABLETS_BEFORE,)\
ACT(STOP_LS_RECOVERY_THREAD,)\
ACT(MAX_DEBUG_SYNC_POINT,)
DECLARE_ENUM(ObDebugSyncPoint, debug_sync_point, OB_DEBUG_SYNC_POINT_DEF);

View File

@ -330,11 +330,12 @@ int ObPrimaryStandbyService::do_recover_tenant(
{
int ret = OB_SUCCESS;
ObAllTenantInfo tenant_info;
uint64_t tenant_version = 0;
const uint64_t exec_tenant_id = gen_meta_tenant_id(tenant_id);
common::ObMySQLTransaction trans;
ObTenantStatus tenant_status = TENANT_STATUS_MAX;
ObLSRecoveryStatOperator ls_recovery_operator;
ObLSRecoveryStat sys_ls_recovery;
ObTenantStatus tenant_status = TENANT_STATUS_MAX;
if (OB_FAIL(check_inner_stat_())) {
LOG_WARN("inner stat error", KR(ret), K_(inited));
} else if (!obrpc::ObRecoverTenantArg::is_valid(recover_type, recovery_until_scn)
@ -360,19 +361,30 @@ int ObPrimaryStandbyService::do_recover_tenant(
} else if (tenant_info.get_switchover_status() != working_sw_status) {
ret = OB_OP_NOT_ALLOW;
LOG_WARN("unexpected tenant switchover status", KR(ret), K(working_sw_status), K(tenant_info));
} else if (obrpc::ObRecoverTenantArg::RecoverType::UNTIL == recover_type
&& recovery_until_scn < tenant_info.get_sync_scn()) {
ret = OB_OP_NOT_ALLOW;
LOG_WARN("recover before tenant sync_scn is not allow", KR(ret), K(tenant_info),
K(tenant_id), K(recover_type), K(recovery_until_scn));
LOG_USER_ERROR(OB_OP_NOT_ALLOW, "recover before tenant sync_scn sync_scn is");
} else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_version))) {
LOG_WARN("failed to get tenant min version", KR(ret), K(tenant_id));
} else if (tenant_version >= DATA_VERSION_4_2_1_0) {
//noting, no need check sys ls recovery stat
} else if (OB_FAIL(ls_recovery_operator.get_ls_recovery_stat(tenant_id, share::SYS_LS,
true /*for_update*/, sys_ls_recovery, trans))) {
LOG_WARN("failed to get ls recovery stat", KR(ret), K(tenant_id));
} else if (obrpc::ObRecoverTenantArg::RecoverType::UNTIL == recover_type
&& (recovery_until_scn < tenant_info.get_sync_scn()
|| recovery_until_scn < sys_ls_recovery.get_sync_scn())) {
&& recovery_until_scn < sys_ls_recovery.get_sync_scn()) {
ret = OB_OP_NOT_ALLOW;
LOG_WARN("recover before tenant sync_scn or SYS LS sync_scn is not allow", KR(ret), K(tenant_info),
LOG_WARN("recover before SYS LS sync_scn is not allow", KR(ret), K(tenant_info),
K(tenant_id), K(recover_type), K(recovery_until_scn), K(sys_ls_recovery));
LOG_USER_ERROR(OB_OP_NOT_ALLOW, "recover before tenant sync_scn or SYS LS sync_scn is");
LOG_USER_ERROR(OB_OP_NOT_ALLOW, "recover before tenant SYS LS sync_scn is");
}
if (OB_FAIL(ret)) {
} else if (is_tenant_normal(tenant_status)) {
const SCN &recovery_until_scn_to_set = obrpc::ObRecoverTenantArg::RecoverType::UNTIL == recover_type ?
recovery_until_scn : SCN::max(tenant_info.get_sync_scn(), sys_ls_recovery.get_sync_scn());
recovery_until_scn : SCN::max(tenant_info.get_sync_scn(), sys_ls_recovery.get_sync_scn());
if (tenant_info.get_recovery_until_scn() == recovery_until_scn_to_set) {
LOG_WARN("recovery_until_scn is same with original", KR(ret), K(tenant_info), K(tenant_id),
K(recover_type), K(recovery_until_scn));

View File

@ -211,7 +211,6 @@ private:
* @return return code
*/
int check_ls_restore_status_(const uint64_t tenant_id);
private:
const static int64_t SEC_UNIT = 1000L * 1000L;
const static int64_t PRINT_INTERVAL = 10 * 1000 * 1000L;

View File

@ -700,8 +700,6 @@ int ObAllTenantInfoProxy::update_tenant_recovery_until_scn(
ObSqlString sql;
int64_t affected_rows = 0;
ObTimeoutCtx ctx;
ObLSRecoveryStatOperator ls_recovery_operator;
ObLSRecoveryStat sys_ls_recovery;
ObLogRestoreSourceMgr restore_source_mgr;
uint64_t compat_version = 0;
@ -717,13 +715,6 @@ int ObAllTenantInfoProxy::update_tenant_recovery_until_scn(
} else if (OB_UNLIKELY(!recovery_until_scn.is_valid_and_not_min())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("recovery_until_scn invalid", KR(ret), K(recovery_until_scn));
} else if (OB_FAIL(ls_recovery_operator.get_ls_recovery_stat(tenant_id, share::SYS_LS,
true /*for_update*/, sys_ls_recovery, trans))) {
LOG_WARN("failed to get ls recovery stat", KR(ret), K(tenant_id));
} else if (recovery_until_scn < sys_ls_recovery.get_sync_scn()) {
ret = OB_OP_NOT_ALLOW;
LOG_WARN("recover before SYS LS sync_scn is not allowed", KR(ret), K(tenant_id), K(recovery_until_scn), K(sys_ls_recovery));
LOG_USER_ERROR(OB_OP_NOT_ALLOW, "recover before SYS LS sync_scn is");
} else if (OB_FAIL(rootserver::ObRootUtils::get_rs_default_timeout_ctx(ctx))) {
LOG_WARN("fail to get timeout ctx", KR(ret), K(ctx));
} else if (OB_FAIL(sql.assign_fmt(
@ -750,10 +741,10 @@ int ObAllTenantInfoProxy::update_tenant_recovery_until_scn(
}
int64_t cost = ObTimeUtility::current_time() - begin_time;
LOG_INFO("update_recovery_until_scn finish", KR(ret), K(tenant_id), K(sys_ls_recovery),
LOG_INFO("update_recovery_until_scn finish", KR(ret), K(tenant_id),
K(recovery_until_scn), K(affected_rows), K(switchover_epoch), K(sql), K(cost));
ROOTSERVICE_EVENT_ADD("tenant_info", "update_recovery_until_scn", K(ret), K(tenant_id),
K(recovery_until_scn), K(affected_rows), K(switchover_epoch), K(sys_ls_recovery));
K(recovery_until_scn), K(affected_rows), K(switchover_epoch));
return ret;
}