From 160ff54fe602feace198b76e2ea92ee93be565d2 Mon Sep 17 00:00:00 2001 From: nroskill Date: Wed, 7 Feb 2024 09:02:07 +0000 Subject: [PATCH] [CP] fix error log when kill -15 --- deps/oblib/src/lib/thread/ob_pthread.cpp | 2 +- src/observer/omt/ob_tenant.cpp | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/deps/oblib/src/lib/thread/ob_pthread.cpp b/deps/oblib/src/lib/thread/ob_pthread.cpp index 265ab0a3a9..bfb88d32d4 100644 --- a/deps/oblib/src/lib/thread/ob_pthread.cpp +++ b/deps/oblib/src/lib/thread/ob_pthread.cpp @@ -37,7 +37,7 @@ int ob_pthread_create(void **ptr, void *(*start_routine) (void *), void *arg) OB_DELETE(ObPThread, SET_USE_500("PThread"), thread); } } else { - *ptr = thread; + ATOMIC_STORE(ptr, thread); OB_LOG(INFO, "ob_pthread_create succeed", KP(thread)); } return ret; diff --git a/src/observer/omt/ob_tenant.cpp b/src/observer/omt/ob_tenant.cpp index 4ed2208f7b..49191b3441 100644 --- a/src/observer/omt/ob_tenant.cpp +++ b/src/observer/omt/ob_tenant.cpp @@ -869,7 +869,7 @@ void ObTenant::sleep_and_warn(ObTenant* tenant) { ob_usleep(10_ms); const int64_t ts = ObTimeUtility::current_time() - tenant->stopped_; - if (ts >= 3_min && TC_REACH_TIME_INTERVAL(3_min)) { + if (ts >= 3L * 60 * 1000 * 1000 && TC_REACH_TIME_INTERVAL(3L * 60 * 1000 * 1000)) { LOG_ERROR_RET(OB_SUCCESS, "tenant destructed for too long time.", K_(tenant->id), K(ts)); } } @@ -942,16 +942,18 @@ void* ObTenant::wait(void* t) int ObTenant::try_wait() { int ret = OB_SUCCESS; - if (nullptr == gc_thread_) { - if (has_created_) { - LOG_WARN("try_wait after wait successfully", K(id_), K(wait_mtl_finished_)); + if (OB_ISNULL(ATOMIC_LOAD(&gc_thread_))) { + if (!ATOMIC_BCAS(&has_created_, false, true)) { + // try_wait should not return OB_SUCCESS here, but we returned OB_SUCCESS for safety quit in main thread. + // ret = OB_ERR_UNEXPECTED; + LOG_ERROR("try_wait again after wait successfully, there may be `kill -15`", K(id_), K(wait_mtl_finished_)); } else { // it may takes too much time for killing session after remove_tenant, we should recalculate. - ATOMIC_STORE(&stopped_, ObTimeUtility::current_time()); + ATOMIC_STORE(&stopped_, ObTimeUtility::current_time()); // update, it is not 0 before here. if (OB_FAIL(ob_pthread_create(&gc_thread_, wait, this))) { + ATOMIC_STORE(&has_created_, false); LOG_ERROR("tenant gc thread create failed", K(ret), K(errno), K(id_)); } else { - has_created_ = true; ret = OB_EAGAIN; LOG_INFO("tenant pthread_create gc thread successfully", K(id_), K(gc_thread_)); } @@ -960,12 +962,12 @@ int ObTenant::try_wait() if (OB_FAIL(ob_pthread_tryjoin_np(gc_thread_))) { LOG_WARN("tenant pthread_tryjoin_np failed", K(errno), K(id_)); } else { - gc_thread_ = nullptr; // avoid try_wait again after wait success + ATOMIC_STORE(&gc_thread_, nullptr); // avoid try_wait again after wait success LOG_INFO("tenant pthread_tryjoin_np successfully", K(id_)); } const int64_t ts = ObTimeUtility::current_time() - stopped_; // only warn for one time in all tenant. - if (ts >= 3_min && REACH_TIME_INTERVAL(3_min)) { + if (ts >= 3L * 60 * 1000 * 1000 && REACH_TIME_INTERVAL(3L * 60 * 1000 * 1000)) { LOG_ERROR_RET(OB_SUCCESS, "tenant destructed for too long time.", K_(id), K(ts)); } }