BUGFIX: prevent requent freeze while a tablet's memtable cnt reach max

This commit is contained in:
obdev 2023-06-30 03:18:24 +00:00 committed by ob-robot
parent 65c2560d67
commit e35dc626a9
7 changed files with 150 additions and 113 deletions

View File

@ -197,4 +197,4 @@ struct ObTimeLiteralPrettyPrinter {
};
}// oceanbase
#endif
#endif

View File

@ -10,6 +10,8 @@
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX STORAGE
#include "common/rowkey/ob_store_rowkey.h"
#include "storage/memtable/ob_memtable.h"
@ -229,18 +231,13 @@ void ObMemtable::destroy()
ObTimeGuard time_guard("ObMemtable::destroy()", 100 * 1000);
int ret = OB_SUCCESS;
if (is_inited_) {
const common::ObTabletID tablet_id = key_.tablet_id_;
const int64_t cost_time = ObTimeUtility::current_time() - mt_stat_.release_time_;
if (cost_time > 1 * 1000 * 1000) {
STORAGE_LOG(WARN, "it costs too much time from release to destroy", K(cost_time), K(tablet_id), KP(this));
STORAGE_LOG(WARN, "it costs too much time from release to destroy", K(cost_time), KP(this));
}
set_allow_freeze(true);
STORAGE_LOG(INFO, "memtable destroyed", K(*this));
time_guard.click();
ObTenantFreezer *freezer = nullptr;
freezer = MTL(ObTenantFreezer *);
if (OB_SUCCESS != freezer->unset_tenant_slow_freeze(tablet_id)) {
TRANS_LOG(WARN, "unset tenant slow freeze failed.", K(*this));
}
}
ObITable::reset();
ObFreezeCheckpoint::reset();
@ -1486,6 +1483,28 @@ void ObMemtable::resolve_left_boundary_for_active_memtable()
}
}
void ObMemtable::set_allow_freeze(const bool allow_freeze)
{
int ret = OB_SUCCESS;
if (allow_freeze_ != allow_freeze) {
const common::ObTabletID tablet_id = key_.tablet_id_;
const int64_t retire_clock = local_allocator_.get_retire_clock();
ObTenantFreezer *freezer = nullptr;
freezer = MTL(ObTenantFreezer *);
ATOMIC_STORE(&allow_freeze_, allow_freeze);
if (allow_freeze) {
if (OB_FAIL(freezer->unset_tenant_slow_freeze(tablet_id))) {
LOG_WARN("unset tenant slow freeze failed.", KPC(this));
}
} else {
if (OB_FAIL(freezer->set_tenant_slow_freeze(tablet_id, retire_clock))) {
LOG_WARN("set tenant slow freeze failed.", KPC(this));
}
}
}
}
int64_t ObMemtable::inc_write_ref_()
{
return ATOMIC_AAF(&write_ref_cnt_, 1);

View File

@ -16,6 +16,7 @@
#include "share/ob_tenant_mgr.h"
#include "share/ob_cluster_version.h"
#include "lib/literals/ob_literals.h"
#include "lib/worker.h"
#include "storage/memtable/ob_memtable_interface.h"
#include "storage/memtable/mvcc/ob_query_engine.h"
@ -459,7 +460,7 @@ public:
int resolve_right_boundary_for_migration();
void unset_logging_blocked_for_active_memtable();
void resolve_left_boundary_for_active_memtable();
inline void set_allow_freeze(const bool allow_freeze) { ATOMIC_STORE(&allow_freeze_, allow_freeze); }
void set_allow_freeze(const bool allow_freeze);
inline bool allow_freeze() const { return ATOMIC_LOAD(&allow_freeze_); }
/* multi source data operations */

View File

@ -320,7 +320,7 @@ int ObTenantFreezer::tenant_freeze_()
ObLSService *ls_srv = MTL(ObLSService *);
FLOG_INFO("[TenantFreezer] tenant_freeze start", KR(ret));
ObTenantFreezeGuard freeze_guard(allocator_mgr_, ret);
ObTenantFreezeGuard freeze_guard(allocator_mgr_, ret, tenant_info_);
if (OB_FAIL(ls_srv->get_ls_iter(iter, ObLSGetMod::TXSTORAGE_MOD))) {
LOG_WARN("[TenantFreezer] fail to get log stream iterator", KR(ret));
} else {
@ -549,9 +549,6 @@ int ObTenantFreezer::check_and_freeze_normal_data_(ObTenantFreezeCtx &ctx)
LOG_WARN("[TenantFreezer] fail to get mem usage", KR(ret));
} else {
need_freeze = need_freeze_(ctx);
if (need_freeze && !is_minor_need_slow_(ctx)) {
unset_tenant_slow_freeze_();
}
log_frozen_memstore_info_if_need_(ctx);
halt_prewarm_if_need_(ctx);
}
@ -715,58 +712,14 @@ int ObTenantFreezer::unset_tenant_freezing_(const bool rollback_freeze_cnt)
int ObTenantFreezer::set_tenant_slow_freeze(
const common::ObTabletID &tablet_id,
const int64_t protect_clock)
const int64_t retire_clock)
{
int ret = OB_SUCCESS;
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("[TenantFreezer] tenant manager not init", KR(ret));
} else {
const uint64_t tenant_id = tenant_info_.tenant_id_;
if (!tenant_info_.slow_freeze_) {
bool success = ATOMIC_BCAS(&tenant_info_.slow_freeze_, false, true);
if (success) {
tenant_info_.slow_freeze_timestamp_ = ObTimeUtility::fast_current_time();
tenant_info_.slow_freeze_min_protect_clock_ = protect_clock;
tenant_info_.slow_tablet_ = tablet_id;
}
} else if (tenant_info_.slow_freeze_ &&
tenant_info_.slow_freeze_min_protect_clock_ > protect_clock) {
tenant_info_.slow_freeze_timestamp_ = ObTimeUtility::fast_current_time();
tenant_info_.slow_freeze_min_protect_clock_ = protect_clock;
tenant_info_.slow_tablet_ = tablet_id;
}
}
return ret;
}
int ObTenantFreezer::unset_tenant_slow_freeze_()
{
// NOTE: yuanyuan.cxf do not lock to prevent deadlock.
int ret = OB_SUCCESS;
const uint64_t tenant_id = tenant_info_.tenant_id_;
if (tenant_info_.slow_freeze_) {
bool success = ATOMIC_BCAS(&tenant_info_.slow_freeze_, true, false);
if (success) {
tenant_info_.slow_freeze_timestamp_ = 0;
tenant_info_.slow_freeze_min_protect_clock_ = INT64_MAX;
tenant_info_.slow_tablet_.reset();
} else {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("[TenantFreezer] Unexpected error", K(tenant_id), KR(ret));
}
}
return ret;
}
int ObTenantFreezer::unset_tenant_slow_freeze()
{
int ret = OB_SUCCESS;
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("[TenantFreezer] tenant manager not init", KR(ret));
} else {
ret = unset_tenant_slow_freeze_();
tenant_info_.set_slow_freeze(tablet_id, retire_clock, FREEZE_TRIGGER_INTERVAL);
}
return ret;
}
@ -778,18 +731,7 @@ int ObTenantFreezer::unset_tenant_slow_freeze(const common::ObTabletID &tablet_i
ret = OB_NOT_INIT;
LOG_WARN("[TenantFreezer] tenant manager not init", KR(ret));
} else {
const uint64_t tenant_id = tenant_info_.tenant_id_;
if (tenant_info_.slow_freeze_ && tenant_info_.slow_tablet_ == tablet_id) {
bool success = ATOMIC_BCAS(&tenant_info_.slow_freeze_, true, false);
if (success) {
tenant_info_.slow_freeze_timestamp_ = 0;
tenant_info_.slow_freeze_min_protect_clock_ = INT64_MAX;
tenant_info_.slow_tablet_.reset();
} else {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("[TenantFreezer] Unexpected error", K(tenant_id), K(tablet_id), KR(ret));
}
}
tenant_info_.unset_slow_freeze(tablet_id);
}
return ret;
}
@ -1416,6 +1358,16 @@ bool ObTenantFreezer::need_freeze_(const ObTenantFreezeCtx &ctx)
// 1. trigger by active memstore used.
if (ctx.freezable_active_memstore_used_ > ctx.memstore_freeze_trigger_) {
need_freeze = true;
}
// 2. may be slowed
if (need_freeze && tenant_info_.is_freeze_need_slow()) {
need_freeze = false;
LOG_INFO("[TenantFreezer] A minor freeze is needed but slowed.",
K_(tenant_info),
K(ctx.active_memstore_used_),
K(ctx.memstore_freeze_trigger_), K(ctx.max_cached_memstore_size_));
}
if (need_freeze) {
LOG_INFO("[TenantFreezer] A minor freeze is needed by active memstore used.",
K(ctx.freezable_active_memstore_used_), K(ctx.memstore_freeze_trigger_), K(ctx.max_cached_memstore_size_));
}
@ -1433,24 +1385,6 @@ bool ObTenantFreezer::is_major_freeze_turn_()
return (major_compact_trigger != 0 && freeze_cnt >= major_compact_trigger);
}
bool ObTenantFreezer::is_minor_need_slow_(const ObTenantFreezeCtx &ctx)
{
int ret = OB_SUCCESS;
bool need_slow = false;
if (tenant_info_.slow_freeze_) {
need_slow = true;
int64_t now = ObTimeUtility::fast_current_time();
if (ctx.total_memstore_hold_ <= ctx.memstore_freeze_trigger_) {
// no need minor freeze
} else if (now - tenant_info_.slow_freeze_timestamp_ >= SLOW_FREEZE_INTERVAL) {
need_slow = false;
} else {
// no need minor freeze
}
}
return need_slow;
}
int ObTenantFreezer::do_minor_freeze_(const ObTenantFreezeCtx &ctx)
{
int ret = OB_SUCCESS;
@ -1474,6 +1408,7 @@ int ObTenantFreezer::do_minor_freeze_(const ObTenantFreezeCtx &ctx)
rollback_freeze_cnt = true;
LOG_WARN("fail to minor freeze", K(ret));
} else {
tenant_info_.update_slow_freeze_interval();
LOG_INFO("finish tenant minor freeze", K(ret));
}
// clear freezing mark for tenant

View File

@ -15,6 +15,7 @@
#include "lib/atomic/ob_atomic.h"
#include "lib/list/ob_list.h"
#include "lib/literals/ob_literals.h"
#include "lib/lock/ob_tc_rwlock.h"
#include "lib/thread/thread_mgr.h"
#include "lib/thread/thread_mgr_interface.h"
@ -86,21 +87,18 @@ public:
bool is_replay_pending_log_too_large(const int64_t pending_size);
// If the tenant's freeze process is slowed, we will only freeze one time every
// SLOW_FREEZE_INTERVAL.
// set the tenant freeze process slowed. used while the tablet's max memtablet
// set the tenant freeze process slowed. used while the tablet's max memtable
// number meet.
// @param[in] tablet_id, which tablet slow the freeze process.
// @param[in] protect_clock, the memtable's min protection clock.
// @param[in] retire_clock, the memtable's retire clock.
int set_tenant_slow_freeze(const common::ObTabletID &tablet_id,
const int64_t protect_clock);
const int64_t retire_clock);
// uset the slow freeze flag.
// if the tenant freeze process is slowed by this tablet, then unset it.
// @param[in] tablet_id, the tablet who want to unset the slow freeze flag.
// unset success if the tablet is the one who slow the tenant.
// else do nothing.
int unset_tenant_slow_freeze(const common::ObTabletID &tablet_id);
// unset the slow freeze flag.
// if the tenant is slowed. unset it and reset the slow tablet.
int unset_tenant_slow_freeze();
// set tenant mem limit, both for min and max memory limit.
// @param[in] lower_limit, the min memory limit will be set.
// @param[in] upper_limit, the max memory limit will be set.
@ -181,15 +179,13 @@ private:
int get_tenant_mem_usage_(ObTenantFreezeCtx &ctx);
int get_tenant_mem_stat_(ObTenantStatistic &stat);
static int get_freeze_trigger_(ObTenantFreezeCtx &ctx);
static bool need_freeze_(const ObTenantFreezeCtx &ctx);
bool is_minor_need_slow_(const ObTenantFreezeCtx &ctx);
bool need_freeze_(const ObTenantFreezeCtx &ctx);
bool is_major_freeze_turn_();
int do_major_if_need_(const bool need_freeze);
int do_minor_freeze_(const ObTenantFreezeCtx &ctx);
int do_major_freeze_(const int64_t try_frozen_scn);
void log_frozen_memstore_info_if_need_(const ObTenantFreezeCtx &ctx);
void halt_prewarm_if_need_(const ObTenantFreezeCtx &ctx);
int unset_tenant_slow_freeze_();
int check_and_freeze_normal_data_(ObTenantFreezeCtx &ctx);
int check_and_freeze_tx_data_();
int check_and_freeze_mds_table_();

View File

@ -104,14 +104,14 @@ void ObTenantStatistic::reset()
ObTenantInfo::ObTenantInfo()
: tenant_id_(INT64_MAX),
is_loaded_(false),
is_freezing_(false),
last_freeze_clock_(0),
frozen_scn_(0),
freeze_cnt_(0),
last_halt_ts_(0),
slow_freeze_(false),
slow_freeze_timestamp_(0),
slow_freeze_min_protect_clock_(INT64_MAX),
slow_freeze_mt_retire_clock_(0),
freeze_interval_(0),
last_freeze_timestamp_(0),
mem_lower_limit_(0),
mem_upper_limit_(0),
mem_memstore_limit_(0)
@ -122,13 +122,14 @@ void ObTenantInfo::reset()
{
tenant_id_ = OB_INVALID_TENANT_ID; // i64 max as invalid.
is_loaded_ = false;
is_freezing_ = false;
frozen_scn_ = 0;
freeze_cnt_ = 0;
last_halt_ts_ = 0;
slow_freeze_ = false;
slow_freeze_timestamp_ = 0;
slow_freeze_min_protect_clock_ = INT64_MAX;
slow_freeze_mt_retire_clock_ = 0;
freeze_interval_ = 0;
last_freeze_timestamp_ = 0;
slow_tablet_.reset();
mem_memstore_limit_ = 0;
mem_lower_limit_ = 0;
@ -189,10 +190,70 @@ void ObTenantInfo::get_freeze_ctx(ObTenantFreezeCtx &ctx) const
ctx.mem_memstore_limit_ = mem_memstore_limit_;
}
bool ObTenantInfo::is_freeze_need_slow() const
{
bool need_slow = false;
SpinRLockGuard guard(lock_);
if (slow_freeze_) {
int64_t now = ObTimeUtility::fast_current_time();
if (now - last_freeze_timestamp_ >= freeze_interval_) {
need_slow = false;
} else {
// no need minor freeze
need_slow = true;
}
}
return need_slow;
}
void ObTenantInfo::update_slow_freeze_interval()
{
if (!slow_freeze_) {
} else {
SpinWLockGuard guard(lock_);
// if slow freeze, make freeze interval 2 times of now.
if (slow_freeze_) {
last_freeze_timestamp_ = ObTimeUtility::fast_current_time();
freeze_interval_ = MIN(freeze_interval_ * 2, MAX_FREEZE_INTERVAL);
}
}
}
void ObTenantInfo::set_slow_freeze(
const common::ObTabletID &tablet_id,
const int64_t retire_clock,
const int64_t default_interval)
{
SpinWLockGuard guard(lock_);
if (!slow_freeze_) {
slow_freeze_ = true;
slow_freeze_timestamp_ = ObTimeUtility::fast_current_time();
slow_freeze_mt_retire_clock_ = retire_clock;
slow_tablet_ = tablet_id;
last_freeze_timestamp_ = ObTimeUtility::fast_current_time();
freeze_interval_ = default_interval;
}
}
void ObTenantInfo::unset_slow_freeze(const common::ObTabletID &tablet_id)
{
SpinWLockGuard guard(lock_);
if (slow_freeze_ && slow_tablet_ == tablet_id) {
slow_freeze_ = false;
slow_freeze_timestamp_ = 0;
slow_freeze_mt_retire_clock_ = 0;
last_freeze_timestamp_ = 0;
freeze_interval_ = 0;
slow_tablet_.reset();
}
}
ObTenantFreezeGuard::ObTenantFreezeGuard(common::ObMemstoreAllocatorMgr *allocator_mgr,
int &err_code,
const ObTenantInfo &tenant_info,
const int64_t warn_threshold)
: allocator_mgr_(nullptr),
tenant_info_(tenant_info),
pre_retire_pos_(0),
error_code_(err_code),
time_guard_("FREEZE_CHECKER", warn_threshold)
@ -237,8 +298,13 @@ ObTenantFreezeGuard::~ObTenantFreezeGuard()
const bool has_no_active_memtable = (curr_frozen_pos == 0);
if (!(retired_mem_frozen || has_no_active_memtable)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("[FREEZE_CHECKER]there may be frequent tenant freeze", KR(ret), K(curr_frozen_pos),
K_(pre_retire_pos), K(retired_mem_frozen), K(has_no_active_memtable));
if (tenant_info_.is_freeze_slowed()) {
LOG_WARN("[FREEZE_CHECKER]there may be frequent tenant freeze, but slowed", KR(ret), K(curr_frozen_pos),
K_(pre_retire_pos), K(retired_mem_frozen), K(has_no_active_memtable), K_(tenant_info));
} else {
LOG_ERROR("[FREEZE_CHECKER]there may be frequent tenant freeze", KR(ret), K(curr_frozen_pos),
K_(pre_retire_pos), K(retired_mem_frozen), K(has_no_active_memtable));
}
char active_mt_info[DEFAULT_BUF_LENGTH];
tenant_allocator->log_active_memstore_info(active_mt_info,
sizeof(active_mt_info));

View File

@ -10,12 +10,14 @@
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEABASE_STORAGE_TENANT_FREEZER_COMMON_
#define OCEABASE_STORAGE_TENANT_FREEZER_COMMON_
#include "share/ob_define.h"
#include "lib/literals/ob_literals.h"
#include "common/ob_tablet_id.h"
#include "common/storage/ob_freeze_define.h"
#ifndef OCEABASE_STORAGE_TENANT_FREEZER_COMMON_
#define OCEABASE_STORAGE_TENANT_FREEZER_COMMON_
namespace oceanbase
{
namespace common
@ -112,6 +114,7 @@ private:
// slow freeze flag, freezing flag and so on.
class ObTenantInfo : public ObDLinkBase<ObTenantInfo>
{
const static int64_t MAX_FREEZE_INTERVAL = 60_s;
public:
ObTenantInfo();
virtual ~ObTenantInfo() { reset(); }
@ -124,22 +127,37 @@ public:
void update_memstore_limit(const int64_t memstore_limit_percentage);
int64_t get_memstore_limit() const;
void get_freeze_ctx(ObTenantFreezeCtx &ctx) const;
// used slow freeze.
bool is_freeze_need_slow() const;
bool is_freeze_slowed() const
{
return slow_freeze_;
}
void update_slow_freeze_interval();
void set_slow_freeze(const common::ObTabletID &tablet_id,
const int64_t retire_clock,
const int64_t default_interval);
void unset_slow_freeze(const common::ObTabletID &tablet_id);
TO_STRING_KV(K_(slow_freeze), K_(slow_freeze_timestamp), K_(freeze_interval),
K_(last_freeze_timestamp), K_(slow_tablet));
public:
uint64_t tenant_id_;
bool is_loaded_; // whether the memory limit set or not.
bool is_freezing_; // is the tenant freezing now.
int64_t last_freeze_clock_;
int64_t frozen_scn_; // used by major, the timestamp of frozen.
int64_t freeze_cnt_; // minor freeze times.
int64_t last_halt_ts_; // Avoid frequent execution of abort preheating
bool slow_freeze_; // Avoid frequent freezing when abnormal
int64_t slow_freeze_timestamp_; // the last slow freeze time timestamp
int64_t slow_freeze_min_protect_clock_;
common::ObTabletID slow_tablet_;
private:
// protect mem_lower_limit_/mem_upper_limit_/mem_memstore_limit_
// to make sure it is consistency
SpinRWLock lock_;
bool slow_freeze_; // Avoid frequent freezing when abnormal
int64_t slow_freeze_timestamp_; // the last slow freeze time timestamp
int64_t slow_freeze_mt_retire_clock_;
int64_t freeze_interval_;
int64_t last_freeze_timestamp_;
common::ObTabletID slow_tablet_;
int64_t mem_lower_limit_; // the min memory limit
int64_t mem_upper_limit_; // the max memory limit
// mem_memstore_limit will be checked when **leader** partitions
@ -154,10 +172,12 @@ class ObTenantFreezeGuard
public:
ObTenantFreezeGuard(common::ObMemstoreAllocatorMgr *allocator_mgr,
int &ret,
const ObTenantInfo &tenant_info,
const int64_t warn_threshold = 60 * 1000 * 1000 /* 1 min */);
~ObTenantFreezeGuard();
private:
common::ObMemstoreAllocatorMgr *allocator_mgr_;
const ObTenantInfo &tenant_info_;
int64_t pre_retire_pos_;
int &error_code_;
ObTimeGuard time_guard_;