From 9f46967d9064f8699e6d6f616796777f1a0ba925 Mon Sep 17 00:00:00 2001 From: obdev Date: Wed, 11 Jan 2023 04:08:20 +0000 Subject: [PATCH] [CP] fix tenant config deadlock --- deps/oblib/src/lib/lock/ob_drw_lock.h | 34 ++++++++++++++++++++++++--- src/observer/omt/ob_tenant_config.cpp | 6 ++--- src/observer/omt/ob_tenant_config.h | 2 +- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/deps/oblib/src/lib/lock/ob_drw_lock.h b/deps/oblib/src/lib/lock/ob_drw_lock.h index ad9b74a753..4667cbe23a 100644 --- a/deps/oblib/src/lib/lock/ob_drw_lock.h +++ b/deps/oblib/src/lib/lock/ob_drw_lock.h @@ -15,6 +15,7 @@ #include "lib/lock/ob_latch.h" #include "lib/stat/ob_latch_define.h" #include "lib/thread_local/ob_tsi_utils.h" +#include "lib/utility/utility.h" namespace oceanbase { @@ -28,7 +29,7 @@ public: inline int rdlock(); inline int try_rdlock(); inline int rdunlock(); - inline int wrlock(); + inline int wrlock(int64_t timeout=INT64_MAX); inline int wrunlock(); inline int try_wrlock(); class RDLockGuard @@ -79,6 +80,33 @@ public: private: DISALLOW_COPY_AND_ASSIGN(WRLockGuard); }; + class WRLockGuardRetryTimeout + { + public: + [[nodiscard]] explicit WRLockGuardRetryTimeout(DRWLock &rwlock, int64_t timeout): rwlock_(rwlock), ret_(OB_SUCCESS) + { + while(OB_UNLIKELY(OB_SUCCESS != (ret_ = rwlock_.wrlock(timeout)))) { + if (REACH_TIME_INTERVAL(10 * 1000 * 1000)) { + COMMON_LOG(WARN, "Fail to write lock for 10s, ", K_(ret)); + } + ob_usleep(timeout); + } + } + ~WRLockGuardRetryTimeout() + { + if (OB_LIKELY(OB_SUCCESS == ret_)) { + if (OB_UNLIKELY(OB_SUCCESS != (ret_ = rwlock_.wrunlock()))) { + COMMON_LOG(WARN, "Fail to write unlock, ", K_(ret)); + } + } + } + inline int get_ret() const { return ret_; } + private: + DRWLock &rwlock_; + int ret_; + private: + DISALLOW_COPY_AND_ASSIGN(WRLockGuardRetryTimeout); + }; private: struct AlignedLatch { @@ -103,13 +131,13 @@ inline int DRWLock::rdunlock() return latches_[get_itid() % OB_MAX_CPU_NUM].latch_.unlock(); } -inline int DRWLock::wrlock() +inline int DRWLock::wrlock(int64_t timeout) { int ret = OB_SUCCESS; int tmp_ret = OB_SUCCESS; int64_t i = 0; for (i = 0; i < OB_MAX_CPU_NUM; ++i) { - if (OB_FAIL(latches_[i].latch_.wrlock(latch_id_))) { + if (OB_FAIL(latches_[i].latch_.wrlock(latch_id_, timeout))) { COMMON_LOG(WARN, "Fail to lock latch, ", K(i), K(ret)); break; } diff --git a/src/observer/omt/ob_tenant_config.cpp b/src/observer/omt/ob_tenant_config.cpp index 6f390f0fc8..af9c1fa83d 100644 --- a/src/observer/omt/ob_tenant_config.cpp +++ b/src/observer/omt/ob_tenant_config.cpp @@ -125,7 +125,7 @@ int ObTenantConfig::read_config() ObAddr server; char local_ip[OB_MAX_SERVER_ADDR_SIZE] = ""; DRWLock::RDLockGuard lguard(ObConfigManager::get_serialize_lock()); - DRWLock::WRLockGuard guard(lock_); + DRWLock::WRLockGuardRetryTimeout guard(lock_, LOCK_TIMEOUT); server = GCTX.self_addr(); if (OB_UNLIKELY(true != server.ip_to_string(local_ip, sizeof(local_ip)))) { ret = OB_CONVERT_ERROR; @@ -380,7 +380,7 @@ int ObTenantConfig::add_extra_config(const char *config_str, MEMCPY(buf, config_str, config_str_length); buf[config_str_length] = '\0'; DRWLock::RDLockGuard lguard(ObConfigManager::get_serialize_lock()); - DRWLock::WRLockGuard guard(lock_); + DRWLock::WRLockGuardRetryTimeout guard(lock_, LOCK_TIMEOUT); token = STRTOK_R(buf, ",\n", &saveptr); while (OB_SUCC(ret) && OB_LIKELY(NULL != token)) { char *saveptr_one = NULL; @@ -461,7 +461,7 @@ OB_DEF_SERIALIZE(ObTenantConfig) OB_DEF_DESERIALIZE(ObTenantConfig) { int ret = OB_SUCCESS; - DRWLock::WRLockGuard guard(lock_); + DRWLock::WRLockGuardRetryTimeout guard(lock_, LOCK_TIMEOUT); if ('[' != *(buf + pos)) { ret = OB_INVALID_DATA; LOG_ERROR("invalid tenant config", K(ret)); diff --git a/src/observer/omt/ob_tenant_config.h b/src/observer/omt/ob_tenant_config.h index 9e6476cbf1..f6eb5ddf18 100644 --- a/src/observer/omt/ob_tenant_config.h +++ b/src/observer/omt/ob_tenant_config.h @@ -63,7 +63,7 @@ public: volatile int64_t running_task_count_; }; friend class TenantConfigUpdateTask; - + static const int64_t LOCK_TIMEOUT = 1 * 1000 * 1000; public: ObTenantConfig(); ObTenantConfig(uint64_t tenant_id);