fix daily_major_freeze_launcher about OB_EAGAIN

This commit is contained in:
LiefB 2023-07-03 11:42:35 +00:00 committed by ob-robot
parent 92f96f39d8
commit c8fe3d8eb0
3 changed files with 22 additions and 4 deletions

View File

@ -144,13 +144,21 @@ int ObDailyMajorFreezeLauncher::try_launch_major_freeze()
LOG_WARN("fail to get localtime", KR(ret), K(errno));
} else if ((human_time_ptr->tm_hour == hour) && (human_time_ptr->tm_min == minute)) {
if (!already_launch_) {
const int64_t start_us = ObTimeUtility::current_time();
const int64_t RETRY_TIME_LIMIT = 2 * 3600 * 1000 * 1000L; // 2h
do {
ObMajorFreezeParam param;
param.transport_ = GCTX.net_frame_->get_req_transport();
if (OB_FAIL(param.add_freeze_info(tenant_id_))) {
LOG_WARN("fail to push_back", KR(ret), K_(tenant_id));
} else if (OB_FAIL(ObMajorFreezeHelper::major_freeze(param))) {
LOG_WARN("fail to major freeze", K(param), KR(ret));
if ((OB_TIMEOUT == ret)) {
ret = OB_EAGAIN; // in order to try launch major freeze again, set ret = OB_EAGAIN here
LOG_WARN("may be ddl confilict, will try to launch major freeze again", KR(ret), K(param),
"sleep_us", MAJOR_FREEZE_RETRY_INTERVAL_US * MAJOR_FREEZE_RETRY_LIMIT);
} else {
LOG_WARN("fail to major freeze", K(param), KR(ret));
}
} else {
already_launch_ = true;
LOG_INFO("launch major freeze by duty time", K_(tenant_id),
@ -169,7 +177,12 @@ int ObDailyMajorFreezeLauncher::try_launch_major_freeze()
ob_usleep(MAJOR_FREEZE_RETRY_INTERVAL_US);
}
}
} while (!stop_ && (OB_EAGAIN == ret));
} while (!stop_ && (OB_EAGAIN == ret) && ((ObTimeUtility::current_time() - start_us) < RETRY_TIME_LIMIT));
if (!already_launch_ && !stop_ && (OB_EAGAIN == ret)
&& ((ObTimeUtility::current_time() - start_us) > RETRY_TIME_LIMIT)) {
LOG_ERROR("daily major freeze is not launched due to ddl conflict, and reaches retry "
"time limit", KR(ret), K(start_us), "now", ObTimeUtility::current_time());
}
} else {
LOG_INFO("major_freeze has been already launched, no need to do again", K_(tenant_id));
}

View File

@ -55,7 +55,7 @@ private:
int try_gc_tablet_checksum();
private:
static const int64_t MAJOR_FREEZE_RETRY_LIMIT = 300;
static const int64_t MAJOR_FREEZE_RETRY_LIMIT = 120;
static const int64_t MAJOR_FREEZE_LAUNCHER_THREAD_CNT = 1;
static const int64_t LAUNCHER_INTERVAL_US = 5 * 1000 * 1000; // 5s
static const int64_t MAJOR_FREEZE_RETRY_INTERVAL_US = 1000 * 1000; // 1s

View File

@ -293,7 +293,12 @@ int ObFreezeInfoManager::set_freeze_info()
// In 'ddl_sql_transaction.start()', it implements the semantics of 'lock_all_ddl_operation'.
if (OB_FAIL(trans.start(sql_proxy_, tenant_id_, fake_schema_version))) {
LOG_WARN("fail to start transaction", KR(ret), K_(tenant_id), K(fake_schema_version));
if ((OB_TRANS_TIMEOUT == ret) || (OB_ERR_EXCLUSIVE_LOCK_CONFLICT == ret)) {
ret = OB_EAGAIN; // in order to try launch major freeze again, set ret = OB_EAGAIN here
LOG_WARN("ddl conflict, will try to launch major freeze again", KR(ret), K_(tenant_id));
} else {
LOG_WARN("fail to start transaction", KR(ret), K_(tenant_id), K(fake_schema_version));
}
// 1. lock snapshot_gc_ts in __all_global_stat
} else if (OB_FAIL(ObGlobalStatProxy::select_snapshot_gc_scn_for_update(
trans, tenant_id_, remote_snapshot_gc_scn))) {