diff --git a/deps/oblib/src/lib/literals/ob_literals.h b/deps/oblib/src/lib/literals/ob_literals.h index 6e855cd1c..53180d00a 100644 --- a/deps/oblib/src/lib/literals/ob_literals.h +++ b/deps/oblib/src/lib/literals/ob_literals.h @@ -197,4 +197,4 @@ struct ObTimeLiteralPrettyPrinter { }; }// oceanbase -#endif \ No newline at end of file +#endif diff --git a/src/storage/memtable/ob_memtable.cpp b/src/storage/memtable/ob_memtable.cpp index b255ab71f..2ed2482a7 100755 --- a/src/storage/memtable/ob_memtable.cpp +++ b/src/storage/memtable/ob_memtable.cpp @@ -10,6 +10,8 @@ * See the Mulan PubL v2 for more details. */ +#define USING_LOG_PREFIX STORAGE + #include "common/rowkey/ob_store_rowkey.h" #include "storage/memtable/ob_memtable.h" @@ -229,18 +231,13 @@ void ObMemtable::destroy() ObTimeGuard time_guard("ObMemtable::destroy()", 100 * 1000); int ret = OB_SUCCESS; if (is_inited_) { - const common::ObTabletID tablet_id = key_.tablet_id_; const int64_t cost_time = ObTimeUtility::current_time() - mt_stat_.release_time_; if (cost_time > 1 * 1000 * 1000) { - STORAGE_LOG(WARN, "it costs too much time from release to destroy", K(cost_time), K(tablet_id), KP(this)); + STORAGE_LOG(WARN, "it costs too much time from release to destroy", K(cost_time), KP(this)); } + set_allow_freeze(true); STORAGE_LOG(INFO, "memtable destroyed", K(*this)); time_guard.click(); - ObTenantFreezer *freezer = nullptr; - freezer = MTL(ObTenantFreezer *); - if (OB_SUCCESS != freezer->unset_tenant_slow_freeze(tablet_id)) { - TRANS_LOG(WARN, "unset tenant slow freeze failed.", K(*this)); - } } ObITable::reset(); ObFreezeCheckpoint::reset(); @@ -1486,6 +1483,28 @@ void ObMemtable::resolve_left_boundary_for_active_memtable() } } +void ObMemtable::set_allow_freeze(const bool allow_freeze) +{ + int ret = OB_SUCCESS; + if (allow_freeze_ != allow_freeze) { + const common::ObTabletID tablet_id = key_.tablet_id_; + const int64_t retire_clock = local_allocator_.get_retire_clock(); + ObTenantFreezer *freezer = nullptr; + freezer = MTL(ObTenantFreezer *); + + ATOMIC_STORE(&allow_freeze_, allow_freeze); + if (allow_freeze) { + if (OB_FAIL(freezer->unset_tenant_slow_freeze(tablet_id))) { + LOG_WARN("unset tenant slow freeze failed.", KPC(this)); + } + } else { + if (OB_FAIL(freezer->set_tenant_slow_freeze(tablet_id, retire_clock))) { + LOG_WARN("set tenant slow freeze failed.", KPC(this)); + } + } + } +} + int64_t ObMemtable::inc_write_ref_() { return ATOMIC_AAF(&write_ref_cnt_, 1); diff --git a/src/storage/memtable/ob_memtable.h b/src/storage/memtable/ob_memtable.h index b175f1992..6a76fa54d 100644 --- a/src/storage/memtable/ob_memtable.h +++ b/src/storage/memtable/ob_memtable.h @@ -16,6 +16,7 @@ #include "share/ob_tenant_mgr.h" #include "share/ob_cluster_version.h" +#include "lib/literals/ob_literals.h" #include "lib/worker.h" #include "storage/memtable/ob_memtable_interface.h" #include "storage/memtable/mvcc/ob_query_engine.h" @@ -459,7 +460,7 @@ public: int resolve_right_boundary_for_migration(); void unset_logging_blocked_for_active_memtable(); void resolve_left_boundary_for_active_memtable(); - inline void set_allow_freeze(const bool allow_freeze) { ATOMIC_STORE(&allow_freeze_, allow_freeze); } + void set_allow_freeze(const bool allow_freeze); inline bool allow_freeze() const { return ATOMIC_LOAD(&allow_freeze_); } /* multi source data operations */ diff --git a/src/storage/tx_storage/ob_tenant_freezer.cpp b/src/storage/tx_storage/ob_tenant_freezer.cpp index 2e3b0a9b2..f705020a6 100755 --- a/src/storage/tx_storage/ob_tenant_freezer.cpp +++ b/src/storage/tx_storage/ob_tenant_freezer.cpp @@ -320,7 +320,7 @@ int ObTenantFreezer::tenant_freeze_() ObLSService *ls_srv = MTL(ObLSService *); FLOG_INFO("[TenantFreezer] tenant_freeze start", KR(ret)); - ObTenantFreezeGuard freeze_guard(allocator_mgr_, ret); + ObTenantFreezeGuard freeze_guard(allocator_mgr_, ret, tenant_info_); if (OB_FAIL(ls_srv->get_ls_iter(iter, ObLSGetMod::TXSTORAGE_MOD))) { LOG_WARN("[TenantFreezer] fail to get log stream iterator", KR(ret)); } else { @@ -549,9 +549,6 @@ int ObTenantFreezer::check_and_freeze_normal_data_(ObTenantFreezeCtx &ctx) LOG_WARN("[TenantFreezer] fail to get mem usage", KR(ret)); } else { need_freeze = need_freeze_(ctx); - if (need_freeze && !is_minor_need_slow_(ctx)) { - unset_tenant_slow_freeze_(); - } log_frozen_memstore_info_if_need_(ctx); halt_prewarm_if_need_(ctx); } @@ -715,58 +712,14 @@ int ObTenantFreezer::unset_tenant_freezing_(const bool rollback_freeze_cnt) int ObTenantFreezer::set_tenant_slow_freeze( const common::ObTabletID &tablet_id, - const int64_t protect_clock) + const int64_t retire_clock) { int ret = OB_SUCCESS; if (!is_inited_) { ret = OB_NOT_INIT; LOG_WARN("[TenantFreezer] tenant manager not init", KR(ret)); } else { - const uint64_t tenant_id = tenant_info_.tenant_id_; - if (!tenant_info_.slow_freeze_) { - bool success = ATOMIC_BCAS(&tenant_info_.slow_freeze_, false, true); - if (success) { - tenant_info_.slow_freeze_timestamp_ = ObTimeUtility::fast_current_time(); - tenant_info_.slow_freeze_min_protect_clock_ = protect_clock; - tenant_info_.slow_tablet_ = tablet_id; - } - } else if (tenant_info_.slow_freeze_ && - tenant_info_.slow_freeze_min_protect_clock_ > protect_clock) { - tenant_info_.slow_freeze_timestamp_ = ObTimeUtility::fast_current_time(); - tenant_info_.slow_freeze_min_protect_clock_ = protect_clock; - tenant_info_.slow_tablet_ = tablet_id; - } - } - return ret; -} - -int ObTenantFreezer::unset_tenant_slow_freeze_() -{ - // NOTE: yuanyuan.cxf do not lock to prevent deadlock. - int ret = OB_SUCCESS; - const uint64_t tenant_id = tenant_info_.tenant_id_; - if (tenant_info_.slow_freeze_) { - bool success = ATOMIC_BCAS(&tenant_info_.slow_freeze_, true, false); - if (success) { - tenant_info_.slow_freeze_timestamp_ = 0; - tenant_info_.slow_freeze_min_protect_clock_ = INT64_MAX; - tenant_info_.slow_tablet_.reset(); - } else { - ret = OB_ERR_UNEXPECTED; - LOG_ERROR("[TenantFreezer] Unexpected error", K(tenant_id), KR(ret)); - } - } - return ret; -} - -int ObTenantFreezer::unset_tenant_slow_freeze() -{ - int ret = OB_SUCCESS; - if (!is_inited_) { - ret = OB_NOT_INIT; - LOG_WARN("[TenantFreezer] tenant manager not init", KR(ret)); - } else { - ret = unset_tenant_slow_freeze_(); + tenant_info_.set_slow_freeze(tablet_id, retire_clock, FREEZE_TRIGGER_INTERVAL); } return ret; } @@ -778,18 +731,7 @@ int ObTenantFreezer::unset_tenant_slow_freeze(const common::ObTabletID &tablet_i ret = OB_NOT_INIT; LOG_WARN("[TenantFreezer] tenant manager not init", KR(ret)); } else { - const uint64_t tenant_id = tenant_info_.tenant_id_; - if (tenant_info_.slow_freeze_ && tenant_info_.slow_tablet_ == tablet_id) { - bool success = ATOMIC_BCAS(&tenant_info_.slow_freeze_, true, false); - if (success) { - tenant_info_.slow_freeze_timestamp_ = 0; - tenant_info_.slow_freeze_min_protect_clock_ = INT64_MAX; - tenant_info_.slow_tablet_.reset(); - } else { - ret = OB_ERR_UNEXPECTED; - LOG_ERROR("[TenantFreezer] Unexpected error", K(tenant_id), K(tablet_id), KR(ret)); - } - } + tenant_info_.unset_slow_freeze(tablet_id); } return ret; } @@ -1416,6 +1358,16 @@ bool ObTenantFreezer::need_freeze_(const ObTenantFreezeCtx &ctx) // 1. trigger by active memstore used. if (ctx.freezable_active_memstore_used_ > ctx.memstore_freeze_trigger_) { need_freeze = true; + } + // 2. may be slowed + if (need_freeze && tenant_info_.is_freeze_need_slow()) { + need_freeze = false; + LOG_INFO("[TenantFreezer] A minor freeze is needed but slowed.", + K_(tenant_info), + K(ctx.active_memstore_used_), + K(ctx.memstore_freeze_trigger_), K(ctx.max_cached_memstore_size_)); + } + if (need_freeze) { LOG_INFO("[TenantFreezer] A minor freeze is needed by active memstore used.", K(ctx.freezable_active_memstore_used_), K(ctx.memstore_freeze_trigger_), K(ctx.max_cached_memstore_size_)); } @@ -1433,24 +1385,6 @@ bool ObTenantFreezer::is_major_freeze_turn_() return (major_compact_trigger != 0 && freeze_cnt >= major_compact_trigger); } -bool ObTenantFreezer::is_minor_need_slow_(const ObTenantFreezeCtx &ctx) -{ - int ret = OB_SUCCESS; - bool need_slow = false; - if (tenant_info_.slow_freeze_) { - need_slow = true; - int64_t now = ObTimeUtility::fast_current_time(); - if (ctx.total_memstore_hold_ <= ctx.memstore_freeze_trigger_) { - // no need minor freeze - } else if (now - tenant_info_.slow_freeze_timestamp_ >= SLOW_FREEZE_INTERVAL) { - need_slow = false; - } else { - // no need minor freeze - } - } - return need_slow; -} - int ObTenantFreezer::do_minor_freeze_(const ObTenantFreezeCtx &ctx) { int ret = OB_SUCCESS; @@ -1474,6 +1408,7 @@ int ObTenantFreezer::do_minor_freeze_(const ObTenantFreezeCtx &ctx) rollback_freeze_cnt = true; LOG_WARN("fail to minor freeze", K(ret)); } else { + tenant_info_.update_slow_freeze_interval(); LOG_INFO("finish tenant minor freeze", K(ret)); } // clear freezing mark for tenant diff --git a/src/storage/tx_storage/ob_tenant_freezer.h b/src/storage/tx_storage/ob_tenant_freezer.h index 2d58de359..1747bbf9f 100755 --- a/src/storage/tx_storage/ob_tenant_freezer.h +++ b/src/storage/tx_storage/ob_tenant_freezer.h @@ -15,6 +15,7 @@ #include "lib/atomic/ob_atomic.h" #include "lib/list/ob_list.h" +#include "lib/literals/ob_literals.h" #include "lib/lock/ob_tc_rwlock.h" #include "lib/thread/thread_mgr.h" #include "lib/thread/thread_mgr_interface.h" @@ -86,21 +87,18 @@ public: bool is_replay_pending_log_too_large(const int64_t pending_size); // If the tenant's freeze process is slowed, we will only freeze one time every // SLOW_FREEZE_INTERVAL. - // set the tenant freeze process slowed. used while the tablet's max memtablet + // set the tenant freeze process slowed. used while the tablet's max memtable // number meet. // @param[in] tablet_id, which tablet slow the freeze process. - // @param[in] protect_clock, the memtable's min protection clock. + // @param[in] retire_clock, the memtable's retire clock. int set_tenant_slow_freeze(const common::ObTabletID &tablet_id, - const int64_t protect_clock); + const int64_t retire_clock); // uset the slow freeze flag. // if the tenant freeze process is slowed by this tablet, then unset it. // @param[in] tablet_id, the tablet who want to unset the slow freeze flag. // unset success if the tablet is the one who slow the tenant. // else do nothing. int unset_tenant_slow_freeze(const common::ObTabletID &tablet_id); - // unset the slow freeze flag. - // if the tenant is slowed. unset it and reset the slow tablet. - int unset_tenant_slow_freeze(); // set tenant mem limit, both for min and max memory limit. // @param[in] lower_limit, the min memory limit will be set. // @param[in] upper_limit, the max memory limit will be set. @@ -181,15 +179,13 @@ private: int get_tenant_mem_usage_(ObTenantFreezeCtx &ctx); int get_tenant_mem_stat_(ObTenantStatistic &stat); static int get_freeze_trigger_(ObTenantFreezeCtx &ctx); - static bool need_freeze_(const ObTenantFreezeCtx &ctx); - bool is_minor_need_slow_(const ObTenantFreezeCtx &ctx); + bool need_freeze_(const ObTenantFreezeCtx &ctx); bool is_major_freeze_turn_(); int do_major_if_need_(const bool need_freeze); int do_minor_freeze_(const ObTenantFreezeCtx &ctx); int do_major_freeze_(const int64_t try_frozen_scn); void log_frozen_memstore_info_if_need_(const ObTenantFreezeCtx &ctx); void halt_prewarm_if_need_(const ObTenantFreezeCtx &ctx); - int unset_tenant_slow_freeze_(); int check_and_freeze_normal_data_(ObTenantFreezeCtx &ctx); int check_and_freeze_tx_data_(); int check_and_freeze_mds_table_(); diff --git a/src/storage/tx_storage/ob_tenant_freezer_common.cpp b/src/storage/tx_storage/ob_tenant_freezer_common.cpp index 57c710e8c..343722103 100644 --- a/src/storage/tx_storage/ob_tenant_freezer_common.cpp +++ b/src/storage/tx_storage/ob_tenant_freezer_common.cpp @@ -104,14 +104,14 @@ void ObTenantStatistic::reset() ObTenantInfo::ObTenantInfo() : tenant_id_(INT64_MAX), is_loaded_(false), - is_freezing_(false), - last_freeze_clock_(0), frozen_scn_(0), freeze_cnt_(0), last_halt_ts_(0), slow_freeze_(false), slow_freeze_timestamp_(0), - slow_freeze_min_protect_clock_(INT64_MAX), + slow_freeze_mt_retire_clock_(0), + freeze_interval_(0), + last_freeze_timestamp_(0), mem_lower_limit_(0), mem_upper_limit_(0), mem_memstore_limit_(0) @@ -122,13 +122,14 @@ void ObTenantInfo::reset() { tenant_id_ = OB_INVALID_TENANT_ID; // i64 max as invalid. is_loaded_ = false; - is_freezing_ = false; frozen_scn_ = 0; freeze_cnt_ = 0; last_halt_ts_ = 0; slow_freeze_ = false; slow_freeze_timestamp_ = 0; - slow_freeze_min_protect_clock_ = INT64_MAX; + slow_freeze_mt_retire_clock_ = 0; + freeze_interval_ = 0; + last_freeze_timestamp_ = 0; slow_tablet_.reset(); mem_memstore_limit_ = 0; mem_lower_limit_ = 0; @@ -189,10 +190,70 @@ void ObTenantInfo::get_freeze_ctx(ObTenantFreezeCtx &ctx) const ctx.mem_memstore_limit_ = mem_memstore_limit_; } +bool ObTenantInfo::is_freeze_need_slow() const +{ + bool need_slow = false; + SpinRLockGuard guard(lock_); + if (slow_freeze_) { + int64_t now = ObTimeUtility::fast_current_time(); + if (now - last_freeze_timestamp_ >= freeze_interval_) { + need_slow = false; + } else { + // no need minor freeze + need_slow = true; + } + } + return need_slow; +} + +void ObTenantInfo::update_slow_freeze_interval() +{ + if (!slow_freeze_) { + } else { + SpinWLockGuard guard(lock_); + // if slow freeze, make freeze interval 2 times of now. + if (slow_freeze_) { + last_freeze_timestamp_ = ObTimeUtility::fast_current_time(); + freeze_interval_ = MIN(freeze_interval_ * 2, MAX_FREEZE_INTERVAL); + } + } +} + +void ObTenantInfo::set_slow_freeze( + const common::ObTabletID &tablet_id, + const int64_t retire_clock, + const int64_t default_interval) +{ + SpinWLockGuard guard(lock_); + if (!slow_freeze_) { + slow_freeze_ = true; + slow_freeze_timestamp_ = ObTimeUtility::fast_current_time(); + slow_freeze_mt_retire_clock_ = retire_clock; + slow_tablet_ = tablet_id; + last_freeze_timestamp_ = ObTimeUtility::fast_current_time(); + freeze_interval_ = default_interval; + } +} + +void ObTenantInfo::unset_slow_freeze(const common::ObTabletID &tablet_id) +{ + SpinWLockGuard guard(lock_); + if (slow_freeze_ && slow_tablet_ == tablet_id) { + slow_freeze_ = false; + slow_freeze_timestamp_ = 0; + slow_freeze_mt_retire_clock_ = 0; + last_freeze_timestamp_ = 0; + freeze_interval_ = 0; + slow_tablet_.reset(); + } +} + ObTenantFreezeGuard::ObTenantFreezeGuard(common::ObMemstoreAllocatorMgr *allocator_mgr, int &err_code, + const ObTenantInfo &tenant_info, const int64_t warn_threshold) : allocator_mgr_(nullptr), + tenant_info_(tenant_info), pre_retire_pos_(0), error_code_(err_code), time_guard_("FREEZE_CHECKER", warn_threshold) @@ -237,8 +298,13 @@ ObTenantFreezeGuard::~ObTenantFreezeGuard() const bool has_no_active_memtable = (curr_frozen_pos == 0); if (!(retired_mem_frozen || has_no_active_memtable)) { ret = OB_ERR_UNEXPECTED; - LOG_ERROR("[FREEZE_CHECKER]there may be frequent tenant freeze", KR(ret), K(curr_frozen_pos), - K_(pre_retire_pos), K(retired_mem_frozen), K(has_no_active_memtable)); + if (tenant_info_.is_freeze_slowed()) { + LOG_WARN("[FREEZE_CHECKER]there may be frequent tenant freeze, but slowed", KR(ret), K(curr_frozen_pos), + K_(pre_retire_pos), K(retired_mem_frozen), K(has_no_active_memtable), K_(tenant_info)); + } else { + LOG_ERROR("[FREEZE_CHECKER]there may be frequent tenant freeze", KR(ret), K(curr_frozen_pos), + K_(pre_retire_pos), K(retired_mem_frozen), K(has_no_active_memtable)); + } char active_mt_info[DEFAULT_BUF_LENGTH]; tenant_allocator->log_active_memstore_info(active_mt_info, sizeof(active_mt_info)); diff --git a/src/storage/tx_storage/ob_tenant_freezer_common.h b/src/storage/tx_storage/ob_tenant_freezer_common.h index 7d8c4cf8e..a8a722a77 100644 --- a/src/storage/tx_storage/ob_tenant_freezer_common.h +++ b/src/storage/tx_storage/ob_tenant_freezer_common.h @@ -10,12 +10,14 @@ * See the Mulan PubL v2 for more details. */ +#ifndef OCEABASE_STORAGE_TENANT_FREEZER_COMMON_ +#define OCEABASE_STORAGE_TENANT_FREEZER_COMMON_ + #include "share/ob_define.h" +#include "lib/literals/ob_literals.h" #include "common/ob_tablet_id.h" #include "common/storage/ob_freeze_define.h" -#ifndef OCEABASE_STORAGE_TENANT_FREEZER_COMMON_ -#define OCEABASE_STORAGE_TENANT_FREEZER_COMMON_ namespace oceanbase { namespace common @@ -112,6 +114,7 @@ private: // slow freeze flag, freezing flag and so on. class ObTenantInfo : public ObDLinkBase { + const static int64_t MAX_FREEZE_INTERVAL = 60_s; public: ObTenantInfo(); virtual ~ObTenantInfo() { reset(); } @@ -124,22 +127,37 @@ public: void update_memstore_limit(const int64_t memstore_limit_percentage); int64_t get_memstore_limit() const; void get_freeze_ctx(ObTenantFreezeCtx &ctx) const; + + // used slow freeze. + bool is_freeze_need_slow() const; + bool is_freeze_slowed() const + { + return slow_freeze_; + } + void update_slow_freeze_interval(); + void set_slow_freeze(const common::ObTabletID &tablet_id, + const int64_t retire_clock, + const int64_t default_interval); + void unset_slow_freeze(const common::ObTabletID &tablet_id); + TO_STRING_KV(K_(slow_freeze), K_(slow_freeze_timestamp), K_(freeze_interval), + K_(last_freeze_timestamp), K_(slow_tablet)); public: uint64_t tenant_id_; bool is_loaded_; // whether the memory limit set or not. - bool is_freezing_; // is the tenant freezing now. - int64_t last_freeze_clock_; int64_t frozen_scn_; // used by major, the timestamp of frozen. int64_t freeze_cnt_; // minor freeze times. int64_t last_halt_ts_; // Avoid frequent execution of abort preheating - bool slow_freeze_; // Avoid frequent freezing when abnormal - int64_t slow_freeze_timestamp_; // the last slow freeze time timestamp - int64_t slow_freeze_min_protect_clock_; - common::ObTabletID slow_tablet_; private: // protect mem_lower_limit_/mem_upper_limit_/mem_memstore_limit_ // to make sure it is consistency SpinRWLock lock_; + bool slow_freeze_; // Avoid frequent freezing when abnormal + int64_t slow_freeze_timestamp_; // the last slow freeze time timestamp + int64_t slow_freeze_mt_retire_clock_; + int64_t freeze_interval_; + int64_t last_freeze_timestamp_; + common::ObTabletID slow_tablet_; + int64_t mem_lower_limit_; // the min memory limit int64_t mem_upper_limit_; // the max memory limit // mem_memstore_limit will be checked when **leader** partitions @@ -154,10 +172,12 @@ class ObTenantFreezeGuard public: ObTenantFreezeGuard(common::ObMemstoreAllocatorMgr *allocator_mgr, int &ret, + const ObTenantInfo &tenant_info, const int64_t warn_threshold = 60 * 1000 * 1000 /* 1 min */); ~ObTenantFreezeGuard(); private: common::ObMemstoreAllocatorMgr *allocator_mgr_; + const ObTenantInfo &tenant_info_; int64_t pre_retire_pos_; int &error_code_; ObTimeGuard time_guard_;