From c8fe3d8eb05fc9ea4f0365014756ea71f26f00d7 Mon Sep 17 00:00:00 2001 From: LiefB <954800091@qq.com> Date: Mon, 3 Jul 2023 11:42:35 +0000 Subject: [PATCH] fix daily_major_freeze_launcher about OB_EAGAIN --- .../freeze/ob_daily_major_freeze_launcher.cpp | 17 +++++++++++++++-- .../freeze/ob_daily_major_freeze_launcher.h | 2 +- .../freeze/ob_freeze_info_manager.cpp | 7 ++++++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/rootserver/freeze/ob_daily_major_freeze_launcher.cpp b/src/rootserver/freeze/ob_daily_major_freeze_launcher.cpp index af539e3fcc..ae13db5873 100644 --- a/src/rootserver/freeze/ob_daily_major_freeze_launcher.cpp +++ b/src/rootserver/freeze/ob_daily_major_freeze_launcher.cpp @@ -144,13 +144,21 @@ int ObDailyMajorFreezeLauncher::try_launch_major_freeze() LOG_WARN("fail to get localtime", KR(ret), K(errno)); } else if ((human_time_ptr->tm_hour == hour) && (human_time_ptr->tm_min == minute)) { if (!already_launch_) { + const int64_t start_us = ObTimeUtility::current_time(); + const int64_t RETRY_TIME_LIMIT = 2 * 3600 * 1000 * 1000L; // 2h do { ObMajorFreezeParam param; param.transport_ = GCTX.net_frame_->get_req_transport(); if (OB_FAIL(param.add_freeze_info(tenant_id_))) { LOG_WARN("fail to push_back", KR(ret), K_(tenant_id)); } else if (OB_FAIL(ObMajorFreezeHelper::major_freeze(param))) { - LOG_WARN("fail to major freeze", K(param), KR(ret)); + if ((OB_TIMEOUT == ret)) { + ret = OB_EAGAIN; // in order to try launch major freeze again, set ret = OB_EAGAIN here + LOG_WARN("may be ddl confilict, will try to launch major freeze again", KR(ret), K(param), + "sleep_us", MAJOR_FREEZE_RETRY_INTERVAL_US * MAJOR_FREEZE_RETRY_LIMIT); + } else { + LOG_WARN("fail to major freeze", K(param), KR(ret)); + } } else { already_launch_ = true; LOG_INFO("launch major freeze by duty time", K_(tenant_id), @@ -169,7 +177,12 @@ int ObDailyMajorFreezeLauncher::try_launch_major_freeze() ob_usleep(MAJOR_FREEZE_RETRY_INTERVAL_US); } } - } while (!stop_ && (OB_EAGAIN == ret)); + } while (!stop_ && (OB_EAGAIN == ret) && ((ObTimeUtility::current_time() - start_us) < RETRY_TIME_LIMIT)); + if (!already_launch_ && !stop_ && (OB_EAGAIN == ret) + && ((ObTimeUtility::current_time() - start_us) > RETRY_TIME_LIMIT)) { + LOG_ERROR("daily major freeze is not launched due to ddl conflict, and reaches retry " + "time limit", KR(ret), K(start_us), "now", ObTimeUtility::current_time()); + } } else { LOG_INFO("major_freeze has been already launched, no need to do again", K_(tenant_id)); } diff --git a/src/rootserver/freeze/ob_daily_major_freeze_launcher.h b/src/rootserver/freeze/ob_daily_major_freeze_launcher.h index 40f9a3ae71..83a7b1133c 100644 --- a/src/rootserver/freeze/ob_daily_major_freeze_launcher.h +++ b/src/rootserver/freeze/ob_daily_major_freeze_launcher.h @@ -55,7 +55,7 @@ private: int try_gc_tablet_checksum(); private: - static const int64_t MAJOR_FREEZE_RETRY_LIMIT = 300; + static const int64_t MAJOR_FREEZE_RETRY_LIMIT = 120; static const int64_t MAJOR_FREEZE_LAUNCHER_THREAD_CNT = 1; static const int64_t LAUNCHER_INTERVAL_US = 5 * 1000 * 1000; // 5s static const int64_t MAJOR_FREEZE_RETRY_INTERVAL_US = 1000 * 1000; // 1s diff --git a/src/rootserver/freeze/ob_freeze_info_manager.cpp b/src/rootserver/freeze/ob_freeze_info_manager.cpp index 66b954c7f1..c19778fddc 100644 --- a/src/rootserver/freeze/ob_freeze_info_manager.cpp +++ b/src/rootserver/freeze/ob_freeze_info_manager.cpp @@ -293,7 +293,12 @@ int ObFreezeInfoManager::set_freeze_info() // In 'ddl_sql_transaction.start()', it implements the semantics of 'lock_all_ddl_operation'. if (OB_FAIL(trans.start(sql_proxy_, tenant_id_, fake_schema_version))) { - LOG_WARN("fail to start transaction", KR(ret), K_(tenant_id), K(fake_schema_version)); + if ((OB_TRANS_TIMEOUT == ret) || (OB_ERR_EXCLUSIVE_LOCK_CONFLICT == ret)) { + ret = OB_EAGAIN; // in order to try launch major freeze again, set ret = OB_EAGAIN here + LOG_WARN("ddl conflict, will try to launch major freeze again", KR(ret), K_(tenant_id)); + } else { + LOG_WARN("fail to start transaction", KR(ret), K_(tenant_id), K(fake_schema_version)); + } // 1. lock snapshot_gc_ts in __all_global_stat } else if (OB_FAIL(ObGlobalStatProxy::select_snapshot_gc_scn_for_update( trans, tenant_id_, remote_snapshot_gc_scn))) {