init push

This commit is contained in:
oceanbase-admin
2021-05-31 22:56:52 +08:00
commit cea7de1475
7020 changed files with 5689869 additions and 0 deletions

View File

@ -0,0 +1,399 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_fifo_arena.h"
#include "math.h"
#include "ob_memstore_allocator_mgr.h"
#include "share/ob_tenant_mgr.h"
#include "observer/omt/ob_tenant_config_mgr.h"
#include "lib/alloc/alloc_struct.h"
using namespace oceanbase::lib;
using namespace oceanbase::omt;
namespace oceanbase {
namespace common {
#ifndef myassert
#define myassert(x) \
if (!x) { \
ob_abort(); \
}
#endif
int64_t ObFifoArena::total_hold_ = 0;
int64_t ObFifoArena::Page::get_actual_hold_size()
{
// every time of alloc_page, ruturn a chunk actually
AObject* obj = reinterpret_cast<AObject*>((char*)(this) - AOBJECT_HEADER_SIZE);
abort_unless(NULL != obj);
int64_t actual_size = obj->hold(AllocHelper::cells_per_block(obj->block()->ablock_size_));
return actual_size;
}
void ObFifoArena::ObWriteThrottleInfo::reset()
{
decay_factor_ = 0.0;
alloc_duration_ = 0;
trigger_percentage_ = 0;
memstore_threshold_ = 0;
ATOMIC_SET(&period_throttled_count_, 0);
ATOMIC_SET(&period_throttled_time_, 0);
ATOMIC_SET(&total_throttled_count_, 0);
ATOMIC_SET(&total_throttled_count_, 0);
}
void ObFifoArena::ObWriteThrottleInfo::reset_period_stat_info()
{
ATOMIC_SET(&period_throttled_count_, 0);
ATOMIC_SET(&period_throttled_time_, 0);
}
void ObFifoArena::ObWriteThrottleInfo::record_limit_event(int64_t interval)
{
ATOMIC_INC(&period_throttled_count_);
ATOMIC_FAA(&period_throttled_time_, interval);
ATOMIC_INC(&total_throttled_count_);
ATOMIC_FAA(&total_throttled_time_, interval);
}
int ObFifoArena::ObWriteThrottleInfo::check_and_calc_decay_factor(
int64_t memstore_threshold, int64_t trigger_percentage, int64_t alloc_duration)
{
int ret = OB_SUCCESS;
if (memstore_threshold != memstore_threshold_ || trigger_percentage != trigger_percentage_ ||
alloc_duration != alloc_duration_) {
memstore_threshold_ = memstore_threshold;
trigger_percentage_ = trigger_percentage;
alloc_duration_ = alloc_duration;
int64_t available_mem = (100 - trigger_percentage_) * memstore_threshold_ / 100;
double N = static_cast<double>(available_mem) / static_cast<double>(MEM_SLICE_SIZE);
decay_factor_ = (static_cast<double>(alloc_duration) - N * static_cast<double>(MIN_INTERVAL)) /
static_cast<double>((((N * (N + 1) * N * (N + 1))) / 4));
decay_factor_ = decay_factor_ < 0 ? 0 : decay_factor_;
COMMON_LOG(INFO,
"recalculate decay factor",
K(memstore_threshold_),
K(trigger_percentage_),
K(decay_factor_),
K(alloc_duration),
K(available_mem),
K(N));
}
return ret;
}
int ObFifoArena::init(uint64_t tenant_id)
{
int ret = OB_SUCCESS;
lib::ObMallocAllocator* allocator = lib::ObMallocAllocator::get_instance();
uint64_t ctx_id = ObCtxIds::MEMSTORE_CTX_ID;
if (OB_ISNULL(allocator)) {
ret = OB_INIT_FAIL;
OB_LOG(ERROR, "mallocator instance is NULLL", K(ret));
} else if (OB_ISNULL(allocator_ = allocator->get_tenant_ctx_allocator(tenant_id, ctx_id))) {
if (OB_FAIL(allocator->create_tenant_ctx_allocator(tenant_id, ctx_id))) {
OB_LOG(ERROR, "fail to create tenant allocator", K(tenant_id), K(ctx_id), K(ret));
} else if (OB_ISNULL(allocator_ = allocator->get_tenant_ctx_allocator(tenant_id, ctx_id))) {
ret = OB_ERR_UNEXPECTED;
OB_LOG(ERROR, "tenant allocator is null", K(tenant_id), K(ctx_id), K(ret));
}
}
if (OB_SUCC(ret)) {
attr_.tenant_id_ = tenant_id;
attr_.label_ = ObNewModIds::OB_MEMSTORE;
attr_.ctx_id_ = ctx_id;
}
return ret;
}
void ObFifoArena::reset()
{
COMMON_LOG(INFO, "MTALLOC.reset", "tenant_id", get_tenant_id());
shrink_cached_page(0);
}
void ObFifoArena::update_nway_per_group(int64_t nway)
{
if (nway <= 0) {
nway = 1;
} else if (nway > Handle::MAX_NWAY) {
nway = Handle::MAX_NWAY;
}
if (nway > nway_) {
ATOMIC_STORE(&nway_, nway);
} else if (nway < nway_) {
ATOMIC_STORE(&nway_, nway);
WaitQuiescent(get_qs());
shrink_cached_page(nway);
}
}
void ObFifoArena::shrink_cached_page(int64_t nway)
{
for (int64_t i = 0; i < MAX_CACHED_PAGE_COUNT; i++) {
if ((i % Handle::MAX_NWAY) >= nway) {
Page** paddr = cur_pages_ + i;
Page* page = NULL;
CriticalGuard(get_qs());
if (NULL != (page = ATOMIC_LOAD(paddr))) {
Ref* ref = page->frozen();
if (NULL != ref) {
// There may be concurrent removal, no need to pay attention to the return value
UNUSED(ATOMIC_BCAS(paddr, page, NULL));
ATOMIC_FAA(&retired_, page->hold());
release_ref(ref);
}
}
}
}
}
void* ObFifoArena::alloc(int64_t adv_idx, Handle& handle, int64_t size)
{
int ret = OB_SUCCESS;
void* ptr = NULL;
int64_t rsize = size + sizeof(Page) + sizeof(Ref);
CriticalGuard(get_qs());
int64_t way_id = get_way_id();
int64_t idx = get_idx(adv_idx, way_id);
Page** paddr = cur_pages_ + idx;
speed_limit(ATOMIC_LOAD(&hold_), size);
if (adv_idx < 0 || size < 0) {
COMMON_LOG(INFO, "invalid argument", K(adv_idx), K(size));
ret = OB_INVALID_ARGUMENT;
} else if (rsize > PAGE_SIZE) {
Page* page = NULL;
if (NULL == (page = alloc_page(rsize))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
} else {
bool need_switch = false;
handle.add_allocated(page->hold());
ptr = handle.ref_and_alloc(way_id, need_switch, page, size);
page->frozen();
retire_page(way_id, handle, page);
}
} else {
while (OB_SUCC(ret) && NULL == ptr) {
Page* page = NULL;
bool need_switch = false;
if (NULL != (page = ATOMIC_LOAD(paddr))) {
Ref* ref = handle.get_match_ref(way_id, page);
if (NULL != ref) {
ptr = handle.alloc(need_switch, ref, page, size);
} else {
LockGuard guard(handle.lock_);
if (NULL == (ref = handle.get_match_ref(way_id, page))) {
ptr = handle.ref_and_alloc(way_id, need_switch, page, size);
}
}
}
if (NULL == page || need_switch) {
Page* new_page = NULL;
int64_t alloc_size = PAGE_SIZE;
if (NULL != page) {
retire_page(way_id, handle, page);
}
if (NULL == (new_page = alloc_page(alloc_size))) {
// There may be concurrent removal, no need to pay attention to the return value
UNUSED(ATOMIC_BCAS(paddr, page, NULL));
ret = OB_ALLOCATE_MEMORY_FAILED;
} else if (ATOMIC_BCAS(paddr, page, new_page)) {
handle.add_allocated(new_page->hold());
} else {
destroy_page(new_page);
}
}
}
}
return ptr;
}
void ObFifoArena::release_ref(Ref* ref)
{
if (0 == ref->page_->xref(ref->allocated_)) {
free_page(ref->page_);
}
}
void ObFifoArena::free(Handle& handle)
{
bool wait_qs_done = false;
for (int i = 0; i < Handle::MAX_NWAY; i++) {
Ref* ref = NULL;
Ref* next_ref = handle.ref_[i];
if (NULL != next_ref && !wait_qs_done) {
WaitQuiescent(get_qs());
wait_qs_done = true;
}
while (NULL != (ref = next_ref)) {
next_ref = ref->next_;
release_ref(ref);
}
}
handle.reset();
}
ObFifoArena::Page* ObFifoArena::alloc_page(int64_t size)
{
Page* page = (Page*)allocator_->alloc(size, attr_);
if (NULL != page) {
ATOMIC_FAA(&allocated_, size);
ATOMIC_FAA(&total_hold_, size);
ATOMIC_AAF(&hold_, page->get_actual_hold_size());
page->set(size);
}
return page;
}
void ObFifoArena::free_page(Page* page)
{
if (NULL != page && NULL != allocator_) {
ATOMIC_FAA(&reclaimed_, page->hold());
ATOMIC_FAA(&total_hold_, -page->hold());
ATOMIC_FAA(&hold_, -page->get_actual_hold_size());
allocator_->free(page);
}
}
void ObFifoArena::retire_page(int64_t idx, Handle& handle, Page* page)
{
if (NULL != page) {
ATOMIC_FAA(&retired_, page->hold());
handle.add_ref(idx, &page->self_ref_);
}
}
void ObFifoArena::destroy_page(Page* page)
{
if (NULL != page && NULL != allocator_) {
ATOMIC_FAA(&allocated_, -page->hold());
ATOMIC_FAA(&total_hold_, -page->hold());
allocator_->free(page);
}
}
bool ObFifoArena::need_do_writing_throttle() const
{
int64_t trigger_percentage = get_writing_throttling_trigger_percentage_();
int64_t trigger_mem_limit = lastest_memstore_threshold_ * trigger_percentage / 100;
int64_t cur_mem_hold = ATOMIC_LOAD(&hold_);
bool need_do_writing_throttle = cur_mem_hold > trigger_mem_limit;
return need_do_writing_throttle;
}
void ObFifoArena::speed_limit(int64_t cur_mem_hold, int64_t alloc_size)
{
int ret = OB_SUCCESS;
int64_t trigger_percentage = get_writing_throttling_trigger_percentage_();
int64_t trigger_mem_limit = 0;
if (trigger_percentage < 100) {
if (OB_UNLIKELY(
cur_mem_hold < 0 || alloc_size <= 0 || lastest_memstore_threshold_ <= 0 || trigger_percentage <= 0)) {
COMMON_LOG(ERROR,
"invalid arguments",
K(cur_mem_hold),
K(alloc_size),
K(lastest_memstore_threshold_),
K(trigger_percentage));
} else if (cur_mem_hold > (trigger_mem_limit = lastest_memstore_threshold_ * trigger_percentage / 100)) {
int64_t alloc_duration = get_writing_throttling_maximum_duration_();
if (OB_FAIL(throttle_info_.check_and_calc_decay_factor(
lastest_memstore_threshold_, trigger_percentage, alloc_duration))) {
COMMON_LOG(WARN, "failed to check_and_calc_decay_factor", K(cur_mem_hold), K(alloc_size), K(throttle_info_));
} else {
int64_t throttling_interval = get_throttling_interval(cur_mem_hold, alloc_size, trigger_mem_limit);
int64_t cur_ts = ObTimeUtility::current_time();
int64_t new_base_ts = ATOMIC_AAF(&last_base_ts_, throttling_interval);
int64_t sleep_interval = new_base_ts - cur_ts;
if (sleep_interval > 0) {
ObWaitEventGuard wait_guard(
ObWaitEventIds::MEMSTORE_MEM_PAGE_ALLOC_INFO, throttling_interval, cur_mem_hold, sleep_interval, cur_ts);
usleep(1); // Here sleep 1us is to let the wait guard recognize this statistic
// The playback of a single log may allocate 2M blocks multiple times
uint32_t final_sleep_interval = static_cast<uint32_t>(
MIN((get_writing_throttling_sleep_interval() + sleep_interval - 1), MAX_WAIT_INTERVAL));
get_writing_throttling_sleep_interval() = final_sleep_interval;
throttle_info_.record_limit_event(sleep_interval - 1);
} else {
inc_update(&last_base_ts_, ObTimeUtility::current_time());
throttle_info_.reset_period_stat_info();
last_reclaimed_ = ATOMIC_LOAD(&reclaimed_);
}
if (REACH_TIME_INTERVAL(1 * 1000 * 1000L)) {
COMMON_LOG(INFO,
"report write throttle info",
K(alloc_size),
K(throttling_interval),
K(attr_),
"freed memory(MB):",
(ATOMIC_LOAD(&reclaimed_) - last_reclaimed_) / 1024 / 1024,
"last_base_ts",
ATOMIC_LOAD(&last_base_ts_),
K(cur_mem_hold),
K(throttle_info_));
}
}
} else { /*do nothing*/
}
}
}
int64_t ObFifoArena::get_throttling_interval(int64_t cur_mem_hold, int64_t alloc_size, int64_t trigger_mem_limit)
{
constexpr int64_t MIN_INTERVAL_PER_ALLOC = 20;
int64_t chunk_cnt = ((alloc_size + MEM_SLICE_SIZE - 1) / (MEM_SLICE_SIZE));
int64_t chunk_seq = ((cur_mem_hold - trigger_mem_limit) + MEM_SLICE_SIZE - 1) / (MEM_SLICE_SIZE);
int64_t ret_interval = 0;
double cur_chunk_seq = 1.0;
for (int64_t i = 0; i < chunk_cnt && cur_chunk_seq > 0.0; ++i) {
cur_chunk_seq = static_cast<double>(chunk_seq - i);
ret_interval += static_cast<int64_t>(throttle_info_.decay_factor_ * cur_chunk_seq * cur_chunk_seq * cur_chunk_seq);
}
return alloc_size * ret_interval / MEM_SLICE_SIZE + MIN_INTERVAL_PER_ALLOC;
}
void ObFifoArena::set_memstore_threshold(int64_t memstore_threshold)
{
ATOMIC_STORE(&lastest_memstore_threshold_, memstore_threshold);
}
int64_t ObFifoArena::get_writing_throttling_trigger_percentage_() const
{
static thread_local int64_t trigger_percentage = DEFAULT_TRIGGER_PERCENTAGE;
if (TC_REACH_TIME_INTERVAL(1 * 1000 * 1000)) { // 1s
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(attr_.tenant_id_));
if (!tenant_config.is_valid()) {
COMMON_LOG(INFO, "failed to get tenant config", K(attr_));
} else {
trigger_percentage = tenant_config->writing_throttling_trigger_percentage;
}
}
return trigger_percentage;
}
int64_t ObFifoArena::get_writing_throttling_maximum_duration_() const
{
static thread_local int64_t duration = DEFAULT_DURATION;
if (TC_REACH_TIME_INTERVAL(1 * 1000 * 1000)) { // 1s
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(attr_.tenant_id_));
if (!tenant_config.is_valid()) {
// keep default
COMMON_LOG(INFO, "failed to get tenant config", K(attr_));
} else {
duration = tenant_config->writing_throttling_maximum_duration;
}
}
return duration;
}
}; // namespace common
}; // end namespace oceanbase

View File

@ -0,0 +1,295 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEANBASE_SHARE_FIFO_ARENA_H_
#define OCEANBASE_SHARE_FIFO_ARENA_H_
#include "share/ob_define.h"
#include "lib/utility/ob_print_utils.h"
#include "lib/allocator/ob_qsync.h"
#include "lib/allocator/ob_malloc.h"
#include "lib/allocator/ob_allocator.h"
namespace oceanbase {
namespace common {
class ObMemstoreAllocatorMgr;
class ObActiveList;
class ObFifoArena {
public:
static int64_t total_hold_;
struct Page;
struct Ref {
void set_page(Page* page)
{
next_ = NULL;
page_ = page;
allocated_ = 0;
}
void add_alloc_size(int64_t size)
{
ATOMIC_FAA(&allocated_, size);
}
Ref* next_;
Page* page_;
int64_t allocated_;
};
struct Page {
void set(int64_t size)
{
self_ref_.set_page(this);
limit_ = size - sizeof(*this);
pos_ = 0;
ref_ = 0;
}
int64_t hold()
{
return limit_ + sizeof(*this);
}
int64_t xref(int64_t x)
{
return ATOMIC_AAF(&ref_, x);
}
char* alloc(bool& need_switch, int64_t size)
{
char* ret = NULL;
int64_t pos = 0;
int64_t limit = ATOMIC_LOAD(&limit_);
if ((pos = ATOMIC_LOAD(&pos_)) <= limit) {
pos = ATOMIC_FAA(&pos_, size);
ret = (pos + size <= limit) ? buf_ + pos : NULL;
}
need_switch = pos <= limit && (NULL == ret);
if (need_switch) {
self_ref_.add_alloc_size(-pos);
}
return ret;
}
Ref* frozen()
{
Ref* ref = NULL;
bool need_switch = false;
(void)alloc(need_switch, ATOMIC_LOAD(&limit_) + 1);
if (need_switch) {
ref = &self_ref_;
}
return ref;
}
int64_t get_actual_hold_size();
Ref self_ref_;
int64_t limit_;
int64_t pos_;
int64_t ref_;
char buf_[0];
};
struct LockGuard {
LockGuard(int64_t& lock) : lock_(lock)
{
while (ATOMIC_TAS(&lock_, 1)) {
PAUSE();
}
}
~LockGuard()
{
ATOMIC_STORE(&lock_, 0);
}
int64_t& lock_;
};
struct Handle {
enum { MAX_NWAY = 32 };
void reset()
{
lock_ = 0;
memset(ref_, 0, sizeof(ref_));
allocated_ = 0;
}
Ref* get_match_ref(int64_t idx, Page* page)
{
Ref* ref = ATOMIC_LOAD(ref_ + idx);
if (NULL != ref && page != ref->page_) {
ref = NULL;
}
return ref;
}
void* alloc(bool& need_switch, Ref* ref, Page* page, int64_t size)
{
void* ptr = NULL;
if (NULL != (ptr = page->alloc(need_switch, size))) {
ref->add_alloc_size(size);
}
return ptr;
}
void* ref_and_alloc(int64_t idx, bool& need_switch, Page* page, int64_t size)
{
void* ptr = NULL;
Ref* ref = NULL;
if (NULL != (ref = (Ref*)page->alloc(need_switch, size + sizeof(*ref)))) {
ref->set_page(page);
ref->add_alloc_size(size + sizeof(*ref));
add_ref(idx, ref);
ptr = (void*)(ref + 1);
}
return ptr;
}
void add_ref(int64_t idx, Ref* ref)
{
Ref* old_ref = ATOMIC_TAS(ref_ + idx, ref);
ATOMIC_STORE(&ref->next_, old_ref);
}
int64_t get_allocated() const
{
return ATOMIC_LOAD(&allocated_);
}
void add_allocated(int64_t size)
{
ATOMIC_FAA(&allocated_, size);
}
TO_STRING_KV(K_(allocated));
int64_t lock_;
Ref* ref_[MAX_NWAY];
int64_t allocated_;
};
public:
enum {
MAX_CACHED_GROUP_COUNT = 16,
MAX_CACHED_PAGE_COUNT = MAX_CACHED_GROUP_COUNT * Handle::MAX_NWAY,
PAGE_SIZE = OB_MALLOC_BIG_BLOCK_SIZE + sizeof(Page) + sizeof(Ref)
};
ObFifoArena()
: allocator_(NULL),
nway_(0),
allocated_(0),
reclaimed_(0),
hold_(0),
retired_(0),
last_base_ts_(0),
last_reclaimed_(0),
lastest_memstore_threshold_(0)
{
memset(cur_pages_, 0, sizeof(cur_pages_));
}
~ObFifoArena()
{
reset();
}
public:
int init(uint64_t tenant_id);
void reset();
void update_nway_per_group(int64_t nway);
void* alloc(int64_t idx, Handle& handle, int64_t size);
void free(Handle& ref);
int64_t allocated() const
{
return ATOMIC_LOAD(&allocated_);
}
int64_t retired() const
{
return ATOMIC_LOAD(&retired_);
}
int64_t hold() const
{
int64_t rsize = ATOMIC_LOAD(&reclaimed_);
int64_t asize = ATOMIC_LOAD(&allocated_);
return asize - rsize;
}
uint64_t get_tenant_id() const
{
return attr_.tenant_id_;
}
void set_memstore_threshold(int64_t memstore_threshold);
bool need_do_writing_throttle() const;
private:
ObQSync& get_qs()
{
static ObQSync s_qs;
return s_qs;
}
int64_t get_way_id()
{
return icpu_id() % ATOMIC_LOAD(&nway_);
}
int64_t get_idx(int64_t grp_id, int64_t way_id)
{
return (grp_id % MAX_CACHED_GROUP_COUNT) * Handle::MAX_NWAY + way_id;
}
struct ObWriteThrottleInfo {
public:
ObWriteThrottleInfo()
{
reset();
}
~ObWriteThrottleInfo()
{}
void reset();
void reset_period_stat_info();
void record_limit_event(int64_t interval);
int check_and_calc_decay_factor(int64_t memstore_threshold, int64_t trigger_percentage, int64_t alloc_duration);
TO_STRING_KV(K(decay_factor_), K(alloc_duration_), K(trigger_percentage_), K(memstore_threshold_),
K(period_throttled_count_), K(period_throttled_time_), K(total_throttled_count_), K(total_throttled_time_));
public:
// control info
double decay_factor_;
int64_t alloc_duration_;
int64_t trigger_percentage_;
int64_t memstore_threshold_;
// stat info
int64_t period_throttled_count_;
int64_t period_throttled_time_;
int64_t total_throttled_count_;
int64_t total_throttled_time_;
};
private:
void release_ref(Ref* ref);
Page* alloc_page(int64_t size);
void free_page(Page* ptr);
void retire_page(int64_t way_id, Handle& handle, Page* ptr);
void destroy_page(Page* page);
void shrink_cached_page(int64_t nway);
void speed_limit(int64_t cur_mem_hold, int64_t alloc_size);
int64_t get_throttling_interval(int64_t cur_mem_hold, int64_t alloc_size, int64_t trigger_mem_limit);
int64_t get_actual_hold_size(Page* page);
int64_t get_writing_throttling_trigger_percentage_() const;
int64_t get_writing_throttling_maximum_duration_() const;
private:
static const int64_t MAX_WAIT_INTERVAL = 20 * 1000 * 1000; // 20s
static const int64_t MEM_SLICE_SIZE = 2 * 1024 * 1024; // Bytes per usecond
static const int64_t MIN_INTERVAL = 20000;
static const int64_t DEFAULT_TRIGGER_PERCENTAGE = 100;
static const int64_t DEFAULT_DURATION = 60 * 60 * 1000 * 1000L; // us
lib::ObMemAttr attr_;
ObIAllocator* allocator_;
int64_t nway_;
int64_t allocated_;
int64_t reclaimed_;
int64_t hold_; // for single tenant
int64_t retired_;
int64_t last_base_ts_;
int64_t last_reclaimed_;
Page* cur_pages_[MAX_CACHED_PAGE_COUNT];
ObWriteThrottleInfo throttle_info_;
int64_t lastest_memstore_threshold_; // Save the latest memstore_threshold
DISALLOW_COPY_AND_ASSIGN(ObFifoArena);
};
} // namespace common
} // end of namespace oceanbase
#endif

View File

@ -0,0 +1,154 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_gmemstore_allocator.h"
#include "ob_memstore_allocator_mgr.h"
#include "storage/memtable/ob_memtable.h"
#include "lib/utility/ob_print_utils.h"
#include "observer/omt/ob_multi_tenant.h"
#include "observer/ob_server_struct.h"
#include "share/ob_tenant_mgr.h"
namespace oceanbase {
namespace common {
int FrozenMemstoreInfoLogger::operator()(ObDLink* link)
{
int ret = OB_SUCCESS;
ObGMemstoreAllocator::AllocHandle* handle = CONTAINER_OF(link, typeof(*handle), total_list_);
memtable::ObMemtable& mt = handle->mt_;
if (mt.is_frozen_memtable()) {
if (OB_FAIL(databuff_print_obj(buf_, limit_, pos_, mt))) {
} else {
ret = databuff_printf(buf_, limit_, pos_, ",");
}
}
return ret;
}
int ObGMemstoreAllocator::AllocHandle::init(uint64_t tenant_id)
{
int ret = OB_SUCCESS;
ObGMemstoreAllocator* host = NULL;
if (OB_FAIL(ObMemstoreAllocatorMgr::get_instance().get_tenant_memstore_allocator(tenant_id, host))) {
ret = OB_ERR_UNEXPECTED;
} else if (NULL == host) {
ret = OB_ERR_UNEXPECTED;
} else {
host->init_handle(*this, tenant_id);
if (0 == (last_freeze_timestamp_ = host->get_last_freeze_timestamp())) {
COMMON_LOG(ERROR, "unexpected value", K(last_freeze_timestamp_));
}
}
return ret;
}
void ObGMemstoreAllocator::init_handle(AllocHandle& handle, uint64_t tenant_id)
{
handle.do_reset();
handle.set_host(this);
{
int64_t nway = nway_per_group();
LockGuard guard(lock_);
hlist_.init_handle(handle);
arena_.update_nway_per_group(nway);
set_memstore_threshold_without_lock(tenant_id);
}
COMMON_LOG(TRACE, "MTALLOC.init", KP(&handle.mt_));
}
void ObGMemstoreAllocator::destroy_handle(AllocHandle& handle)
{
COMMON_LOG(TRACE, "MTALLOC.destroy", KP(&handle.mt_));
arena_.free(handle.arena_handle_);
{
LockGuard guard(lock_);
hlist_.destroy_handle(handle);
if (hlist_.is_empty()) {
arena_.reset();
}
}
handle.do_reset();
}
void* ObGMemstoreAllocator::alloc(AllocHandle& handle, int64_t size)
{
int ret = OB_SUCCESS;
int64_t align_size = upper_align(size, sizeof(int64_t));
uint64_t tenant_id = arena_.get_tenant_id();
bool is_out_of_mem = false;
if (!handle.is_id_valid()) {
COMMON_LOG(TRACE, "MTALLOC.first_alloc", KP(&handle.mt_));
LockGuard guard(lock_);
if (!handle.is_id_valid()) {
handle.set_clock(arena_.retired());
hlist_.set_active(handle);
}
}
if (OB_FAIL(ObTenantManager::get_instance().check_tenant_out_of_memstore_limit(tenant_id, is_out_of_mem))) {
COMMON_LOG(ERROR, "fail to check tenant out of mem limit", K(ret), K(tenant_id));
is_out_of_mem = true;
} else if (is_out_of_mem && REACH_TIME_INTERVAL(1 * 1000 * 1000)) {
STORAGE_LOG(WARN, "this tenant is already out of memstore limit", K(tenant_id));
}
return is_out_of_mem ? nullptr : arena_.alloc(handle.id_, handle.arena_handle_, align_size);
}
void ObGMemstoreAllocator::set_frozen(AllocHandle& handle)
{
COMMON_LOG(TRACE, "MTALLOC.set_frozen", KP(&handle.mt_));
LockGuard guard(lock_);
hlist_.set_frozen(handle);
}
static int64_t calc_nway(int64_t cpu, int64_t mem)
{
return std::min(cpu, mem / 20 / ObFifoArena::PAGE_SIZE);
}
int64_t ObGMemstoreAllocator::nway_per_group()
{
int ret = OB_SUCCESS;
uint64_t tenant_id = arena_.get_tenant_id();
double min_cpu = 0;
double max_cpu = 0;
int64_t min_memory = 0;
int64_t max_memory = 0;
omt::ObMultiTenant* omt = GCTX.omt_;
if (NULL == omt) {
ret = OB_ERR_UNEXPECTED;
} else if (OB_FAIL(omt->get_tenant_cpu(tenant_id, min_cpu, max_cpu))) {
} else if (OB_FAIL(ObTenantManager::get_instance().get_tenant_mem_limit(tenant_id, min_memory, max_memory))) {
}
return OB_SUCCESS == ret ? calc_nway((int64_t)max_cpu, min_memory) : 0;
}
int ObGMemstoreAllocator::set_memstore_threshold(uint64_t tenant_id)
{
LockGuard guard(lock_);
int ret = set_memstore_threshold_without_lock(tenant_id);
return ret;
}
int ObGMemstoreAllocator::set_memstore_threshold_without_lock(uint64_t tenant_id)
{
int ret = OB_SUCCESS;
int64_t memstore_threshold = INT64_MAX;
if (OB_FAIL(ObTenantManager::get_instance().get_tenant_memstore_limit(tenant_id, memstore_threshold))) {
COMMON_LOG(WARN, "failed to get_tenant_memstore_limit", K(tenant_id), K(ret));
} else {
arena_.set_memstore_threshold(memstore_threshold);
}
return ret;
}
}; // end namespace common
}; // end namespace oceanbase

View File

@ -0,0 +1,208 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEANBASE_ALLOCATOR_OB_GMEMSTORE_ALLOCATOR_H_
#define OCEANBASE_ALLOCATOR_OB_GMEMSTORE_ALLOCATOR_H_
#include "ob_handle_list.h"
#include "ob_fifo_arena.h"
#include "lib/lock/ob_spin_lock.h"
namespace oceanbase {
namespace memtable {
class ObMemtable;
};
namespace common {
struct FrozenMemstoreInfoLogger {
FrozenMemstoreInfoLogger(char* buf, int64_t limit) : buf_(buf), limit_(limit), pos_(0)
{}
~FrozenMemstoreInfoLogger()
{}
int operator()(ObDLink* link);
char* buf_;
int64_t limit_;
int64_t pos_;
};
class ObGMemstoreAllocator {
public:
typedef ObSpinLock Lock;
typedef ObSpinLockGuard LockGuard;
typedef ObGMemstoreAllocator GAlloc;
typedef ObFifoArena Arena;
typedef ObHandleList HandleList;
typedef HandleList::Handle ListHandle;
typedef Arena::Handle ArenaHandle;
class AllocHandle : public ListHandle, public ObIAllocator {
public:
memtable::ObMemtable& mt_;
GAlloc* host_;
ArenaHandle arena_handle_;
AllocHandle(memtable::ObMemtable& mt) : mt_(mt), host_(NULL), last_freeze_timestamp_(0)
{
do_reset();
}
void do_reset()
{
ListHandle::reset();
arena_handle_.reset();
host_ = NULL;
last_freeze_timestamp_ = 0;
}
int64_t get_group_id() const
{
return id_ < 0 ? INT64_MAX : (id_ % Arena::MAX_CACHED_GROUP_COUNT);
}
int64_t get_last_freeze_timestamp() const
{
return last_freeze_timestamp_;
}
int init(uint64_t tenant_id);
void set_host(GAlloc* host)
{
host_ = host;
}
void destroy()
{
if (NULL != host_) {
host_->destroy_handle(*this);
host_ = NULL;
}
}
int64_t get_protection_clock() const
{
return get_clock();
}
int64_t get_retire_clock() const
{
int64_t retire_clock = INT64_MAX;
if (NULL != host_) {
retire_clock = host_->get_retire_clock();
}
return retire_clock;
}
int64_t get_size() const
{
return arena_handle_.get_allocated();
}
int64_t get_occupied_size() const
{
return get_size();
}
void* alloc(int64_t size)
{
return NULL == host_ ? NULL : host_->alloc(*this, size);
}
void* alloc(const int64_t size, const ObMemAttr& attr)
{
UNUSEDx(attr);
return alloc(size);
}
void free(void* ptr)
{
UNUSED(ptr);
}
void set_frozen()
{
if (NULL != host_) {
host_->set_frozen(*this);
}
}
INHERIT_TO_STRING_KV("ListHandle", ListHandle, KP_(host), K_(arena_handle), K_(last_freeze_timestamp));
private:
int64_t last_freeze_timestamp_;
};
public:
ObGMemstoreAllocator() : hlist_(), arena_(), last_freeze_timestamp_(0)
{}
~ObGMemstoreAllocator()
{}
public:
int init(uint64_t tenant_id)
{
update_last_freeze_timestamp();
return arena_.init(tenant_id);
}
void init_handle(AllocHandle& handle, uint64_t tenant_id);
void destroy_handle(AllocHandle& handle);
void* alloc(AllocHandle& handle, int64_t size);
void set_frozen(AllocHandle& handle);
template <typename Func>
int for_each(Func& f)
{
int ret = common::OB_SUCCESS;
ObDLink* iter = NULL;
LockGuard guard(lock_);
while (OB_SUCC(ret) && NULL != (iter = hlist_.next(iter))) {
ret = f(iter);
}
return ret;
}
public:
int64_t get_mem_active_memstore_used()
{
int64_t hazard = hlist_.hazard();
return hazard == INT64_MAX ? 0 : (arena_.allocated() - hazard);
}
int64_t get_mem_total_memstore_used() const
{
return arena_.hold();
}
void log_frozen_memstore_info(char* buf, int64_t limit)
{
if (NULL != buf && limit > 0) {
FrozenMemstoreInfoLogger logger(buf, limit);
buf[0] = 0;
(void)for_each(logger);
}
}
public:
int set_memstore_threshold(uint64_t tenant_id);
bool need_do_writing_throttle() const
{
return arena_.need_do_writing_throttle();
}
int64_t get_retire_clock() const
{
return arena_.retired();
}
bool exist_active_memtable_below_clock(const int64_t clock) const
{
return hlist_.hazard() < clock;
}
int64_t get_last_freeze_timestamp()
{
return ATOMIC_LOAD(&last_freeze_timestamp_);
}
void update_last_freeze_timestamp()
{
ATOMIC_STORE(&last_freeze_timestamp_, ObTimeUtility::current_time());
}
private:
int64_t nway_per_group();
int set_memstore_threshold_without_lock(uint64_t tenant_id);
private:
Lock lock_;
HandleList hlist_;
Arena arena_;
int64_t last_freeze_timestamp_;
};
}; // end namespace common
}; // end namespace oceanbase
#endif /* OCEANBASE_ALLOCATOR_OB_GMEMSTORE_ALLOCATOR_H_ */

View File

@ -0,0 +1,67 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_handle_list.h"
namespace oceanbase {
namespace common {
void ObHandleList::init_handle(Handle& handle)
{
handle.reset();
total_list_.add(&handle.total_list_);
ATOMIC_AAF(&total_count_, 1);
}
void ObHandleList::destroy_handle(Handle& handle)
{
set_frozen(handle);
total_list_.del(&handle.total_list_);
ATOMIC_AAF(&total_count_, -1);
}
void ObHandleList::set_active(Handle& handle)
{
if (handle.set_active()) {
active_list_.add(&handle.active_list_, handle);
update_hazard();
}
handle.set_id(alloc_id());
}
void ObHandleList::set_frozen(Handle& handle)
{
if (handle.is_active()) {
active_list_.del(&handle.active_list_);
update_hazard();
}
handle.set_frozen();
}
void ObHandleList::update_hazard()
{
ATOMIC_STORE(&hazard_, calc_hazard());
}
int64_t ObHandleList::calc_hazard()
{
int64_t x = INT64_MAX;
DLink* last = active_list_.tail_.prev_;
if (&active_list_.head_ != last) {
Handle* handle = CONTAINER_OF(last, Handle, active_list_);
x = handle->get_clock();
}
COMMON_LOG(INFO, "HandleList.calc_hazard", K(x));
return x;
}
}; // end namespace common
}; // end namespace oceanbase

View File

@ -0,0 +1,187 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEANBASE_ALLOCATOR_OB_ACTIVE_LIST_H_
#define OCEANBASE_ALLOCATOR_OB_ACTIVE_LIST_H_
#include "lib/queue/ob_link.h"
#include "lib/utility/ob_print_utils.h"
namespace oceanbase {
namespace common {
class ObFifoArena;
class ObHandleList {
public:
typedef ObDLink DLink;
struct DList {
DLink head_;
DLink tail_;
DList()
{
head_.prev_ = NULL;
head_.next_ = &tail_;
tail_.prev_ = &head_;
tail_.next_ = NULL;
}
DLink* next(DLink* iter)
{
if (NULL == iter) {
iter = &head_;
}
iter = (DLink*)iter->next_;
if (&tail_ == iter) {
iter = NULL;
}
return iter;
}
static void dlink_insert(ObDLink* cur, ObDLink* x)
{
ObDLink* next = (ObDLink*)cur->next_;
x->prev_ = cur;
x->next_ = next;
next->prev_ = x;
cur->next_ = x;
}
static void dlink_del(ObDLink* x)
{
ObDLink* prev = (ObDLink*)x->prev_;
ObDLink* next = (ObDLink*)x->next_;
prev->next_ = next;
next->prev_ = prev;
}
void add(DLink* x)
{
dlink_insert(&head_, x);
}
void del(DLink* x)
{
dlink_del(x);
}
template <typename Pred>
void add(DLink* x, Pred& pred)
{
DLink* cur = &head_;
DLink* next = NULL;
while (&tail_ != (next = (DLink*)cur->next_)) {
if (pred.ge(next)) {
break;
}
cur = next;
}
dlink_insert(cur, x);
}
};
struct Handle {
enum { INIT = 0, ACTIVE = 1, FROZEN = 2 };
int freeze_stat_;
int64_t id_;
int64_t clock_;
DLink total_list_;
DLink active_list_;
void reset()
{
COMMON_LOG(DEBUG, "reset list");
freeze_stat_ = INIT;
id_ = -1;
clock_ = INT64_MAX;
total_list_.next_ = NULL;
total_list_.prev_ = NULL;
active_list_.next_ = NULL;
active_list_.prev_ = NULL;
}
bool ge(DLink* x)
{
Handle* next = CONTAINER_OF(x, Handle, active_list_);
return clock_ >= next->clock_;
}
void set_id(int64_t id)
{
ATOMIC_STORE(&id_, id);
}
int64_t get_id() const
{
return ATOMIC_LOAD(&id_);
}
bool is_id_valid() const
{
return get_id() >= 0;
}
void set_clock(int64_t clock)
{
ATOMIC_STORE(&clock_, clock);
}
int64_t get_clock() const
{
return ATOMIC_LOAD(&clock_);
}
bool set_active()
{
if (ATOMIC_LOAD(&freeze_stat_) == INIT) {
ATOMIC_STORE(&freeze_stat_, ACTIVE);
}
return is_active();
}
void set_frozen()
{
ATOMIC_STORE(&freeze_stat_, FROZEN);
}
bool is_active() const
{
return ATOMIC_LOAD(&freeze_stat_) == ACTIVE;
}
TO_STRING_KV(K_(freeze_stat), K_(id), K_(clock));
};
ObHandleList() : id_(0), hazard_(INT64_MAX), total_count_(0)
{}
~ObHandleList()
{}
void init_handle(Handle& handle);
void destroy_handle(Handle& handle);
void set_active(Handle& handle);
void set_frozen(Handle& handle);
bool is_empty() const
{
return ATOMIC_LOAD(&total_count_) <= 0;
}
int64_t hazard() const
{
return ATOMIC_LOAD(&hazard_);
}
DLink* next(DLink* iter)
{
return total_list_.next(iter);
}
protected:
void set_frozen_(Handle& handle);
int64_t alloc_id()
{
return ATOMIC_AAF(&id_, 1);
}
private:
void update_hazard();
int64_t calc_hazard();
private:
int64_t id_;
DList total_list_;
DList active_list_;
int64_t hazard_;
int64_t total_count_;
};
}; // end namespace common
}; // end namespace oceanbase
#endif /* OCEANBASE_ALLOCATOR_OB_ACTIVE_LIST_H_ */

View File

@ -0,0 +1,125 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SHARE
#include "share/allocator/ob_memstore_allocator_mgr.h"
#include "share/allocator/ob_gmemstore_allocator.h"
#include "lib/alloc/alloc_struct.h"
using namespace oceanbase::lib;
using namespace oceanbase::common;
int64_t ObMemstoreAllocatorMgr::get_all_tenants_memstore_used()
{
return ATOMIC_LOAD(&ObFifoArena::total_hold_);
}
ObMemstoreAllocatorMgr::ObMemstoreAllocatorMgr()
: is_inited_(false), allocators_(), allocator_map_(), malloc_allocator_(NULL), all_tenants_memstore_used_(0)
{
set_malloc_allocator(ObMallocAllocator::get_instance());
}
ObMemstoreAllocatorMgr::~ObMemstoreAllocatorMgr()
{}
int ObMemstoreAllocatorMgr::init()
{
int ret = OB_SUCCESS;
if (OB_FAIL(allocator_map_.create(ALLOCATOR_MAP_BUCKET_NUM, ObModIds::OB_MEMSTORE_ALLOCATOR))) {
LOG_WARN("failed to create allocator_map", K(ret));
} else {
is_inited_ = true;
}
return ret;
}
int ObMemstoreAllocatorMgr::get_tenant_memstore_allocator(const uint64_t tenant_id, TAllocator*& out_allocator)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(tenant_id <= 0)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid tenant id", K(tenant_id), K(ret));
} else if (tenant_id < PRESERVED_TENANT_COUNT) {
if (NULL == (out_allocator = ATOMIC_LOAD(&allocators_[tenant_id]))) {
ObMemAttr attr;
attr.tenant_id_ = OB_SERVER_TENANT_ID;
attr.label_ = ObModIds::OB_MEMSTORE_ALLOCATOR;
void* buf = ob_malloc(sizeof(TAllocator), attr);
if (NULL != buf) {
TAllocator* allocator = new (buf) TAllocator();
bool cas_succeed = false;
if (OB_SUCC(ret)) {
if (OB_FAIL(allocator->init(tenant_id))) {
LOG_WARN("failed to init tenant memstore allocator", K(tenant_id), K(ret));
} else {
LOG_INFO("succ to init tenant memstore allocator", K(tenant_id), K(ret));
cas_succeed = ATOMIC_BCAS(&allocators_[tenant_id], NULL, allocator);
}
}
if (OB_FAIL(ret) || !cas_succeed) {
allocator->~TAllocator();
ob_free(buf);
out_allocator = ATOMIC_LOAD(&allocators_[tenant_id]);
} else {
out_allocator = allocator;
}
} else {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("failed to allocate memory", K(tenant_id), K(ret));
}
}
} else if (OB_FAIL(allocator_map_.get_refactored(tenant_id, out_allocator))) {
if (OB_HASH_NOT_EXIST != ret) {
LOG_WARN("failed to get tenant memstore allocator", K(tenant_id), K(ret));
} else {
ret = OB_SUCCESS;
ObMemAttr attr;
attr.tenant_id_ = OB_SERVER_TENANT_ID;
attr.label_ = ObModIds::OB_MEMSTORE_ALLOCATOR;
void* buf = ob_malloc(sizeof(TAllocator), attr);
if (NULL != buf) {
TAllocator* new_allocator = new (buf) TAllocator();
if (OB_FAIL(new_allocator->init(tenant_id))) {
LOG_WARN("failed to init tenant memstore allocator", K(tenant_id), K(ret));
} else if (OB_FAIL(allocator_map_.set_refactored(tenant_id, new_allocator))) {
if (OB_HASH_EXIST == ret) {
if (OB_FAIL(allocator_map_.get_refactored(tenant_id, out_allocator))) {
LOG_WARN("failed to get refactor", K(tenant_id), K(ret));
}
} else {
LOG_WARN("failed to set refactor", K(tenant_id), K(ret));
}
new_allocator->~TAllocator();
ob_free(buf);
} else {
out_allocator = new_allocator;
}
} else {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("failed to allocate memory", K(tenant_id), K(ret));
}
}
} else if (OB_ISNULL(out_allocator)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("got allocator is NULL", K(tenant_id), K(ret));
}
return ret;
}
ObMemstoreAllocatorMgr& ObMemstoreAllocatorMgr::get_instance()
{
static ObMemstoreAllocatorMgr instance_;
return instance_;
}

View File

@ -0,0 +1,59 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef _OB_SHARE_MEMSTORE_ALLOCATOR_MGR_H_
#define _OB_SHARE_MEMSTORE_ALLOCATOR_MGR_H_
#include "lib/allocator/ob_allocator.h"
#include "lib/alloc/alloc_func.h"
#include "lib/hash/ob_hashmap.h"
namespace oceanbase {
namespace lib {
class ObMallocAllocator;
}
namespace common {
class ObGMemstoreAllocator;
class ObMemstoreAllocatorMgr {
public:
typedef ObGMemstoreAllocator TAllocator;
typedef common::hash::ObHashMap<uint64_t, TAllocator*> TenantMemostoreAllocatorMap;
ObMemstoreAllocatorMgr();
virtual ~ObMemstoreAllocatorMgr();
int init();
int get_tenant_memstore_allocator(uint64_t tenant_id, TAllocator*& out_allocator);
int64_t get_all_tenants_memstore_used();
static ObMemstoreAllocatorMgr& get_instance();
public:
void set_malloc_allocator(lib::ObMallocAllocator* malloc_allocator)
{
malloc_allocator_ = malloc_allocator;
}
private:
static const uint64_t PRESERVED_TENANT_COUNT = 10000;
static const uint64_t ALLOCATOR_MAP_BUCKET_NUM = 64;
bool is_inited_;
TAllocator* allocators_[PRESERVED_TENANT_COUNT];
TenantMemostoreAllocatorMap allocator_map_;
lib::ObMallocAllocator* malloc_allocator_;
int64_t all_tenants_memstore_used_;
private:
DISALLOW_COPY_AND_ASSIGN(ObMemstoreAllocatorMgr);
}; // end of class ObMemstoreAllocatorMgr
} // namespace common
} // end of namespace oceanbase
#endif /* _OB_SHARE_MEMSTORE_ALLOCATOR_MGR_H_ */

View File

@ -0,0 +1,360 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_tenant_mutil_allocator.h"
#include "lib/objectpool/ob_concurrency_objpool.h"
namespace oceanbase {
using namespace clog;
using namespace election;
using namespace share;
namespace common {
int ObTenantMutilAllocator::choose_blk_size(int obj_size)
{
static const int MIN_SLICE_CNT = 64;
int blk_size = OB_MALLOC_NORMAL_BLOCK_SIZE; // default blk size is 8KB
if (obj_size <= 0) {
} else if (MIN_SLICE_CNT <= (OB_MALLOC_NORMAL_BLOCK_SIZE / obj_size)) {
} else if (MIN_SLICE_CNT <= (OB_MALLOC_MIDDLE_BLOCK_SIZE / obj_size)) {
blk_size = OB_MALLOC_MIDDLE_BLOCK_SIZE;
} else {
blk_size = OB_MALLOC_BIG_BLOCK_SIZE;
}
return blk_size;
}
void* ObTenantMutilAllocator::alloc_log_task_buf()
{
void* ptr = log_task_alloc_.alloc();
if (NULL != ptr) {
ATOMIC_INC(&log_task_alloc_count_);
} else {
if (REACH_TIME_INTERVAL(1000 * 1000)) {
OB_LOG(WARN,
"alloc_log_task_buf failed",
K(tenant_id_),
K(log_task_alloc_count_),
"hold",
log_task_alloc_.hold(),
"limit",
log_task_alloc_.limit());
}
}
return ptr;
}
void ObTenantMutilAllocator::free_log_task_buf(void* ptr)
{
if (OB_LIKELY(NULL != ptr)) {
ATOMIC_DEC(&log_task_alloc_count_);
log_task_alloc_.free(ptr);
}
}
void* ObTenantMutilAllocator::ge_alloc(const int64_t size)
{
void* ptr = NULL;
ptr = clog_ge_alloc_.alloc(size);
return ptr;
}
void ObTenantMutilAllocator::ge_free(void* ptr)
{
clog_ge_alloc_.free(ptr);
}
ObLogFlushTask* ObTenantMutilAllocator::alloc_log_flush_task()
{
ObLogFlushTask* ret_ptr = NULL;
void* ptr = log_flush_task_alloc_.alloc();
if (NULL != ptr) {
ret_ptr = new (ptr) ObLogFlushTask();
}
return ret_ptr;
}
void ObTenantMutilAllocator::free_log_flush_task(ObLogFlushTask* ptr)
{
if (OB_LIKELY(NULL != ptr)) {
ptr->~ObLogFlushTask();
log_flush_task_alloc_.free(ptr);
}
}
ObFetchLogTask* ObTenantMutilAllocator::alloc_fetch_log_task()
{
ObFetchLogTask* ret_ptr = NULL;
void* ptr = fetch_log_task_alloc_.alloc();
if (NULL != ptr) {
ret_ptr = new (ptr) ObFetchLogTask();
}
return ret_ptr;
}
void ObTenantMutilAllocator::free_fetch_log_task(ObFetchLogTask* ptr)
{
if (OB_LIKELY(NULL != ptr)) {
ptr->~ObFetchLogTask();
fetch_log_task_alloc_.free(ptr);
}
}
ObLogStateEventTaskV2* ObTenantMutilAllocator::alloc_log_event_task()
{
ObLogStateEventTaskV2* ret_ptr = NULL;
void* ptr = log_event_task_alloc_.alloc();
if (NULL != ptr) {
ret_ptr = new (ptr) ObLogStateEventTaskV2();
}
return ret_ptr;
}
void ObTenantMutilAllocator::free_log_event_task(ObLogStateEventTaskV2* ptr)
{
if (OB_LIKELY(NULL != ptr)) {
ptr->~ObLogStateEventTaskV2();
log_event_task_alloc_.free(ptr);
}
}
ObTraceProfile* ObTenantMutilAllocator::alloc_trace_profile()
{
ObTraceProfile* ret_ptr = NULL;
void* ptr = trace_profile_alloc_.alloc();
if (NULL != ptr) {
ret_ptr = new (ptr) ObTraceProfile();
}
return ret_ptr;
}
void ObTenantMutilAllocator::free_trace_profile(ObTraceProfile* ptr)
{
if (OB_LIKELY(NULL != ptr)) {
ptr->~ObTraceProfile();
trace_profile_alloc_.free(ptr);
}
}
ObBatchSubmitCtx* ObTenantMutilAllocator::alloc_batch_submit_ctx()
{
ObBatchSubmitCtx* ret_ptr = NULL;
void* ptr = batch_submit_ctx_alloc_.alloc();
if (NULL != ptr) {
ret_ptr = new (ptr) ObBatchSubmitCtx();
}
return ret_ptr;
}
void ObTenantMutilAllocator::free_batch_submit_ctx(ObBatchSubmitCtx* ptr)
{
if (OB_LIKELY(NULL != ptr)) {
ptr->~ObBatchSubmitCtx();
batch_submit_ctx_alloc_.free(ptr);
}
}
ObBatchSubmitDiskTask* ObTenantMutilAllocator::alloc_batch_submit_dtask()
{
ObBatchSubmitDiskTask* ret_ptr = NULL;
void* ptr = batch_submit_dtask_alloc_.alloc();
if (NULL != ptr) {
ret_ptr = new (ptr) ObBatchSubmitDiskTask();
}
return ret_ptr;
}
const ObBlockAllocMgr& ObTenantMutilAllocator::get_clog_blk_alloc_mgr() const
{
return clog_blk_alloc_;
}
void ObTenantMutilAllocator::free_batch_submit_dtask(ObBatchSubmitDiskTask* ptr)
{
if (OB_LIKELY(NULL != ptr)) {
ptr->~ObBatchSubmitDiskTask();
batch_submit_dtask_alloc_.free(ptr);
}
}
ObElection* ObTenantMutilAllocator::alloc_election()
{
ObElection* ret_ptr = NULL;
void* ptr = election_alloc_.alloc();
if (NULL != ptr) {
ret_ptr = new (ptr) ObElection();
}
return ret_ptr;
}
void ObTenantMutilAllocator::free_election(ObElection* ptr)
{
if (OB_LIKELY(NULL != ptr)) {
ptr->~ObElection();
election_alloc_.free(ptr);
}
}
ObElectionGroup* ObTenantMutilAllocator::alloc_election_group()
{
void* ptr = election_group_alloc_.alloc();
ObElectionGroup* ret_ptr = NULL;
if (NULL != ptr) {
ret_ptr = new (ptr) ObElectionGroup();
}
return ret_ptr;
}
void ObTenantMutilAllocator::free_election_group(ObElectionGroup* ptr)
{
if (OB_LIKELY(NULL != ptr)) {
ptr->~ObElectionGroup();
election_group_alloc_.free(ptr);
}
}
ObIPartitionLogService* ObTenantMutilAllocator::alloc_partition_log_service()
{
void* ptr = partition_log_service_alloc_.alloc();
ObIPartitionLogService* ret_ptr = NULL;
if (NULL != ptr) {
ret_ptr = new (ptr) ObPartitionLogService();
}
return ret_ptr;
}
void ObTenantMutilAllocator::free_partition_log_service(ObIPartitionLogService* ptr)
{
if (OB_LIKELY(NULL != ptr)) {
ptr->~ObIPartitionLogService();
partition_log_service_alloc_.free(ptr);
}
}
void* ObTenantMutilAllocator::alloc_replay_task_buf(const bool is_inner_table, const int64_t size)
{
void* ptr = NULL;
ObVSliceAlloc& allocator = is_inner_table ? inner_table_replay_task_alloc_ : user_table_replay_task_alloc_;
ptr = allocator.alloc(size);
return ptr;
}
void ObTenantMutilAllocator::free_replay_task(const bool is_inner_table, void* ptr)
{
if (OB_LIKELY(NULL != ptr)) {
ObVSliceAlloc& allocator = is_inner_table ? inner_table_replay_task_alloc_ : user_table_replay_task_alloc_;
allocator.free(ptr);
}
}
bool ObTenantMutilAllocator::can_alloc_replay_task(const bool is_inner_table, int64_t size) const
{
const ObVSliceAlloc& allocator = is_inner_table ? inner_table_replay_task_alloc_ : user_table_replay_task_alloc_;
return allocator.can_alloc_block(size);
}
void ObTenantMutilAllocator::inc_pending_replay_mutator_size(int64_t size)
{
ATOMIC_AAF(&pending_replay_mutator_size_, size);
}
void ObTenantMutilAllocator::dec_pending_replay_mutator_size(int64_t size)
{
ATOMIC_SAF(&pending_replay_mutator_size_, size);
}
int64_t ObTenantMutilAllocator::get_pending_replay_mutator_size() const
{
return ATOMIC_LOAD(&pending_replay_mutator_size_);
}
void ObTenantMutilAllocator::set_nway(const int32_t nway)
{
if (nway > 0) {
log_task_alloc_.set_nway(nway);
log_flush_task_alloc_.set_nway(nway);
fetch_log_task_alloc_.set_nway(nway);
log_event_task_alloc_.set_nway(nway);
// trace_profile_alloc_.set_nway(nway);;
batch_submit_ctx_alloc_.set_nway(nway);
batch_submit_dtask_alloc_.set_nway(nway);
clog_ge_alloc_.set_nway(nway);
election_alloc_.set_nway(nway);
election_group_alloc_.set_nway(nway);
clog_ge_alloc_.set_nway(nway);
inner_table_replay_task_alloc_.set_nway(nway);
user_table_replay_task_alloc_.set_nway(nway);
OB_LOG(INFO, "finish set nway", K(tenant_id_), K(nway));
}
}
void ObTenantMutilAllocator::set_limit(const int64_t total_limit)
{
if (total_limit > 0 && total_limit != ATOMIC_LOAD(&total_limit_)) {
ATOMIC_STORE(&total_limit_, total_limit);
const int64_t clog_limit = total_limit / 100 * CLOG_MEM_LIMIT_PERCENT;
const int64_t replay_limit = std::min(total_limit / 100 * REPLAY_MEM_LIMIT_PERCENT, REPLAY_MEM_LIMIT_THRESHOLD);
const int64_t inner_table_replay_limit = replay_limit * INNER_TABLE_REPLAY_MEM_PERCENT / 100;
const int64_t user_table_replay_limit = replay_limit * (100 - INNER_TABLE_REPLAY_MEM_PERCENT) / 100;
const int64_t common_limit = total_limit - (clog_limit + replay_limit);
clog_blk_alloc_.set_limit(clog_limit);
inner_table_replay_blk_alloc_.set_limit(inner_table_replay_limit);
user_table_replay_blk_alloc_.set_limit(user_table_replay_limit);
common_blk_alloc_.set_limit(common_limit);
OB_LOG(INFO,
"ObTenantMutilAllocator set tenant mem limit finished",
K(tenant_id_),
K(total_limit),
K(clog_limit),
K(replay_limit),
K(common_limit),
K(inner_table_replay_limit),
K(user_table_replay_limit));
}
}
int64_t ObTenantMutilAllocator::get_limit() const
{
return ATOMIC_LOAD(&total_limit_);
}
int64_t ObTenantMutilAllocator::get_hold() const
{
return clog_blk_alloc_.hold() + inner_table_replay_blk_alloc_.hold() + user_table_replay_blk_alloc_.hold() +
common_blk_alloc_.hold();
}
#define SLICE_FREE_OBJ(name, cls) \
void ob_slice_free_##name(typeof(cls)* ptr) \
{ \
if (NULL != ptr) { \
ObBlockSlicer::Item* item = (ObBlockSlicer::Item*)ptr - 1; \
if (NULL != item->host_) { \
ObTenantMutilAllocator* tma = reinterpret_cast<ObTenantMutilAllocator*>(item->host_->get_tmallocator()); \
if (NULL != tma) { \
tma->free_##name(ptr); \
} \
} \
} \
}
SLICE_FREE_OBJ(election, ObElection);
SLICE_FREE_OBJ(election_group, ObElectionGroup);
SLICE_FREE_OBJ(log_flush_task, ObLogFlushTask);
SLICE_FREE_OBJ(fetch_log_task, ObFetchLogTask);
SLICE_FREE_OBJ(log_event_task, ObLogStateEventTaskV2);
SLICE_FREE_OBJ(trace_profile, ObTraceProfile);
SLICE_FREE_OBJ(batch_submit_ctx, ObBatchSubmitCtx);
SLICE_FREE_OBJ(batch_submit_dtask, ObBatchSubmitDiskTask);
SLICE_FREE_OBJ(partition_log_service, ObIPartitionLogService);
} // namespace common
} // namespace oceanbase

View File

@ -0,0 +1,242 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef _OB_SHARE_TENANT_MUTIL_ALLOCATOR_H_
#define _OB_SHARE_TENANT_MUTIL_ALLOCATOR_H_
#include "common/ob_trace_profile.h"
#include "lib/alloc/alloc_struct.h"
#include "lib/allocator/ob_block_alloc_mgr.h"
#include "lib/allocator/ob_slice_alloc.h"
#include "lib/allocator/ob_vslice_alloc.h"
#include "lib/queue/ob_link.h"
#include "clog/ob_log_task.h"
#include "clog/ob_log_flush_task.h"
#include "clog/ob_log_event_task_V2.h"
#include "clog/ob_fetch_log_engine.h"
#include "clog/ob_batch_submit_ctx.h"
#include "clog/ob_batch_submit_task.h"
#include "clog/ob_partition_log_service.h"
#include "election/ob_election.h"
#include "election/ob_election_group.h"
#include "observer/ob_server_struct.h"
#include "observer/omt/ob_multi_tenant.h"
namespace oceanbase {
namespace common {
// Interface for Clog module
class ObILogAllocator {
public:
ObILogAllocator()
{}
virtual ~ObILogAllocator()
{}
public:
virtual void* alloc_log_task_buf() = 0;
virtual void free_log_task_buf(void* ptr) = 0;
virtual void* ge_alloc(const int64_t size) = 0;
virtual void ge_free(void* ptr) = 0;
virtual clog::ObLogFlushTask* alloc_log_flush_task() = 0;
virtual void free_log_flush_task(clog::ObLogFlushTask* ptr) = 0;
virtual clog::ObFetchLogTask* alloc_fetch_log_task() = 0;
virtual void free_fetch_log_task(clog::ObFetchLogTask* ptr) = 0;
virtual clog::ObLogStateEventTaskV2* alloc_log_event_task() = 0;
virtual void free_log_event_task(clog::ObLogStateEventTaskV2* ptr) = 0;
virtual common::ObTraceProfile* alloc_trace_profile() = 0;
virtual void free_trace_profile(common::ObTraceProfile* ptr) = 0;
virtual clog::ObBatchSubmitCtx* alloc_batch_submit_ctx() = 0;
virtual void free_batch_submit_ctx(clog::ObBatchSubmitCtx* ptr) = 0;
virtual clog::ObBatchSubmitDiskTask* alloc_batch_submit_dtask() = 0;
virtual void free_batch_submit_dtask(clog::ObBatchSubmitDiskTask* ptr) = 0;
virtual clog::ObIPartitionLogService* alloc_partition_log_service() = 0;
virtual void free_partition_log_service(clog::ObIPartitionLogService* ptr) = 0;
virtual const ObBlockAllocMgr& get_clog_blk_alloc_mgr() const = 0;
};
// Interface for ReplayEngine module
class ObIReplayTaskAllocator {
public:
ObIReplayTaskAllocator()
{}
virtual ~ObIReplayTaskAllocator()
{}
public:
virtual void* alloc_replay_task_buf(const bool is_inner_table, const int64_t size) = 0;
virtual void free_replay_task(const bool is_inner_table, void* ptr) = 0;
virtual bool can_alloc_replay_task(const bool is_inner_table, int64_t size) const = 0;
virtual void inc_pending_replay_mutator_size(int64_t size) = 0;
virtual void dec_pending_replay_mutator_size(int64_t size) = 0;
virtual int64_t get_pending_replay_mutator_size() const = 0;
};
class ObTenantMutilAllocator : public ObILogAllocator, public ObIReplayTaskAllocator, public common::ObLink {
public:
const int LOG_TASK_SIZE = sizeof(clog::ObLogTask);
const int LOG_FLUSH_TASK_SIZE = sizeof(clog::ObLogFlushTask);
const int LOG_FETCH_TASK_SIZE = sizeof(clog::ObFetchLogTask);
const int LOG_EVENT_TASK_SIZE = sizeof(clog::ObLogStateEventTaskV2);
const int TRACE_PROFILE_SIZE = sizeof(common::ObTraceProfile);
const int BATCH_SUBMIT_CTX_SIZE = sizeof(clog::ObBatchSubmitCtx);
const int BATCH_SUBMIT_DTASK_SIZE = sizeof(clog::ObBatchSubmitDiskTask);
const int ELECTION_SIZE = sizeof(election::ObElection);
const int ELECTION_GROUP_SIZE = sizeof(election::ObElectionGroup);
const int PARTITION_LOG_SERVICE_SIZE = sizeof(clog::ObPartitionLogService);
// The memory percent of clog
const int64_t CLOG_MEM_LIMIT_PERCENT = 30;
// The memory percent of replay engine
const int64_t REPLAY_MEM_LIMIT_PERCENT = 25;
// The memory limit of replay engine
const int64_t REPLAY_MEM_LIMIT_THRESHOLD = 512 * 1024 * 1024ll;
// The memory percent of replay engine for inner_table
const int64_t INNER_TABLE_REPLAY_MEM_PERCENT = 20;
static int choose_blk_size(int obj_size);
public:
explicit ObTenantMutilAllocator(uint64_t tenant_id)
: tenant_id_(tenant_id),
total_limit_(INT64_MAX),
log_task_alloc_count_(0),
pending_replay_mutator_size_(0),
clog_blk_alloc_(),
inner_table_replay_blk_alloc_(REPLAY_MEM_LIMIT_THRESHOLD * INNER_TABLE_REPLAY_MEM_PERCENT / 100),
user_table_replay_blk_alloc_(REPLAY_MEM_LIMIT_THRESHOLD * (100 - INNER_TABLE_REPLAY_MEM_PERCENT) / 100),
common_blk_alloc_(),
unlimited_blk_alloc_(),
log_task_alloc_(LOG_TASK_SIZE, ObMemAttr(tenant_id, ObModIds::OB_LOG_TASK), choose_blk_size(LOG_TASK_SIZE),
clog_blk_alloc_, this),
log_flush_task_alloc_(LOG_FLUSH_TASK_SIZE, ObMemAttr(tenant_id, ObModIds::OB_LOG_FLUSH_TASK),
choose_blk_size(LOG_FLUSH_TASK_SIZE), clog_blk_alloc_, this),
fetch_log_task_alloc_(LOG_FETCH_TASK_SIZE, ObMemAttr(tenant_id, ObModIds::OB_LOG_FETCH_TASK),
choose_blk_size(LOG_FETCH_TASK_SIZE), clog_blk_alloc_, this),
log_event_task_alloc_(LOG_EVENT_TASK_SIZE, ObMemAttr(tenant_id, ObModIds::OB_LOG_EVENT_TASK),
choose_blk_size(LOG_EVENT_TASK_SIZE), common_blk_alloc_, this),
trace_profile_alloc_(TRACE_PROFILE_SIZE, ObMemAttr(tenant_id, ObModIds::OB_LOG_TRACE_PROFILE),
choose_blk_size(TRACE_PROFILE_SIZE), clog_blk_alloc_, this),
batch_submit_ctx_alloc_(BATCH_SUBMIT_CTX_SIZE, ObMemAttr(tenant_id, ObModIds::OB_CLOG_BATCH_SUBMIT_CTX),
choose_blk_size(BATCH_SUBMIT_CTX_SIZE), clog_blk_alloc_, this),
batch_submit_dtask_alloc_(BATCH_SUBMIT_DTASK_SIZE,
ObMemAttr(tenant_id, ObModIds::OB_CLOG_BATCH_SUBMIT_DISK_TASK), choose_blk_size(BATCH_SUBMIT_DTASK_SIZE),
clog_blk_alloc_, this),
clog_ge_alloc_(ObMemAttr(tenant_id, ObModIds::OB_CLOG_GE), ObVSliceAlloc::DEFAULT_BLOCK_SIZE, clog_blk_alloc_),
election_alloc_(ELECTION_SIZE, ObMemAttr(tenant_id, ObModIds::OB_ELECTION), choose_blk_size(ELECTION_SIZE),
common_blk_alloc_, this),
election_group_alloc_(ELECTION_GROUP_SIZE, ObMemAttr(tenant_id, ObModIds::OB_ELECTION_GROUP),
choose_blk_size(ELECTION_GROUP_SIZE), common_blk_alloc_, this),
inner_table_replay_task_alloc_(ObMemAttr(tenant_id, ObModIds::OB_LOG_REPLAY_ENGINE),
ObVSliceAlloc::DEFAULT_BLOCK_SIZE, inner_table_replay_blk_alloc_),
user_table_replay_task_alloc_(ObMemAttr(tenant_id, ObModIds::OB_LOG_REPLAY_ENGINE),
ObVSliceAlloc::DEFAULT_BLOCK_SIZE, user_table_replay_blk_alloc_),
partition_log_service_alloc_(PARTITION_LOG_SERVICE_SIZE,
ObMemAttr(tenant_id, ObModIds::OB_PARTITION_LOG_SERVICE), choose_blk_size(PARTITION_LOG_SERVICE_SIZE),
unlimited_blk_alloc_, this)
{
// set_nway according to tenant's max_cpu
double min_cpu = 0;
double max_cpu = 0;
omt::ObMultiTenant* omt = GCTX.omt_;
if (NULL == omt) {
} else if (OB_SUCCESS != omt->get_tenant_cpu(tenant_id, min_cpu, max_cpu)) {
} else {
const int32_t nway = (int32_t)max_cpu;
set_nway(nway);
}
}
~ObTenantMutilAllocator()
{}
// update nway when tenant's max_cpu changed
void set_nway(const int32_t nway);
// update limit when tenant's memory_limit changed
void set_limit(const int64_t total_limit);
int64_t get_limit() const;
int64_t get_hold() const;
uint64_t get_tenant_id() const
{
return tenant_id_;
}
inline ObTenantMutilAllocator*& get_next()
{
return reinterpret_cast<ObTenantMutilAllocator*&>(next_);
}
// interface for clog
void* alloc_log_task_buf();
void free_log_task_buf(void* ptr);
void* ge_alloc(const int64_t size);
void ge_free(void* ptr);
clog::ObLogFlushTask* alloc_log_flush_task();
void free_log_flush_task(clog::ObLogFlushTask* ptr);
clog::ObFetchLogTask* alloc_fetch_log_task();
void free_fetch_log_task(clog::ObFetchLogTask* ptr);
clog::ObLogStateEventTaskV2* alloc_log_event_task();
void free_log_event_task(clog::ObLogStateEventTaskV2* ptr);
common::ObTraceProfile* alloc_trace_profile();
void free_trace_profile(common::ObTraceProfile* ptr);
clog::ObBatchSubmitCtx* alloc_batch_submit_ctx();
void free_batch_submit_ctx(clog::ObBatchSubmitCtx* ptr);
clog::ObBatchSubmitDiskTask* alloc_batch_submit_dtask();
void free_batch_submit_dtask(clog::ObBatchSubmitDiskTask* ptr);
clog::ObIPartitionLogService* alloc_partition_log_service();
void free_partition_log_service(clog::ObIPartitionLogService* ptr);
const ObBlockAllocMgr& get_clog_blk_alloc_mgr() const;
// interface for election
election::ObElection* alloc_election();
void free_election(election::ObElection* ptr);
election::ObElectionGroup* alloc_election_group();
void free_election_group(election::ObElectionGroup* ptr);
void* alloc_replay_task_buf(const bool is_inner_table, const int64_t size);
void free_replay_task(const bool is_inner_table, void* ptr);
bool can_alloc_replay_task(const bool is_inner_table, int64_t size) const;
void inc_pending_replay_mutator_size(int64_t size);
void dec_pending_replay_mutator_size(int64_t size);
int64_t get_pending_replay_mutator_size() const;
private:
uint64_t tenant_id_ CACHE_ALIGNED;
int64_t total_limit_;
int64_t log_task_alloc_count_;
int64_t pending_replay_mutator_size_;
ObBlockAllocMgr clog_blk_alloc_;
ObBlockAllocMgr inner_table_replay_blk_alloc_;
ObBlockAllocMgr user_table_replay_blk_alloc_;
ObBlockAllocMgr common_blk_alloc_;
ObBlockAllocMgr unlimited_blk_alloc_;
ObSliceAlloc log_task_alloc_;
ObSliceAlloc log_flush_task_alloc_;
ObSliceAlloc fetch_log_task_alloc_;
ObSliceAlloc log_event_task_alloc_;
ObSliceAlloc trace_profile_alloc_;
ObSliceAlloc batch_submit_ctx_alloc_; // for ObBatchSubmitCtx
ObSliceAlloc batch_submit_dtask_alloc_; // for ObBatchSubmitDiskTask
ObVSliceAlloc clog_ge_alloc_;
ObSliceAlloc election_alloc_;
ObSliceAlloc election_group_alloc_;
ObVSliceAlloc inner_table_replay_task_alloc_;
ObVSliceAlloc user_table_replay_task_alloc_;
ObSliceAlloc partition_log_service_alloc_;
};
// Free interface for class-object allocated by slice_alloc
void ob_slice_free_election(election::ObElection* ptr);
void ob_slice_free_election_group(election::ObElectionGroup* ptr);
void ob_slice_free_log_flush_task(clog::ObLogFlushTask* ptr);
void ob_slice_free_fetch_log_task(clog::ObFetchLogTask* ptr);
void ob_slice_free_log_event_task(clog::ObLogStateEventTaskV2* ptr);
void ob_slice_free_trace_profile(common::ObTraceProfile* ptr);
void ob_slice_free_batch_submit_ctx(clog::ObBatchSubmitCtx* ptr);
void ob_slice_free_batch_submit_dtask(clog::ObBatchSubmitDiskTask* ptr);
void ob_slice_free_partition_log_service(clog::ObIPartitionLogService* ptr);
} // end of namespace common
} // end of namespace oceanbase
#endif /* _OB_SHARE_TENANT_MUTIL_ALLOCATOR_H_ */

View File

@ -0,0 +1,339 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "share/allocator/ob_tenant_mutil_allocator_mgr.h"
#include "lib/allocator/ob_malloc.h"
#include "share/config/ob_server_config.h"
#include "share/allocator/ob_tenant_mutil_allocator.h"
#include "ob_gmemstore_allocator.h"
#include "ob_memstore_allocator_mgr.h"
namespace oceanbase {
namespace common {
int ObTenantMutilAllocatorMgr::init()
{
int ret = OB_SUCCESS;
if (is_inited_) {
ret = OB_INIT_TWICE;
} else {
for (int64_t i = 0; i < PRESERVED_TENANT_COUNT; ++i) {
tma_array_[i] = NULL;
}
is_inited_ = true;
}
return ret;
}
// Get the log allocator for specified tenant, create it when tenant not exist
int ObTenantMutilAllocatorMgr::get_tenant_log_allocator(const uint64_t tenant_id, ObILogAllocator*& out_allocator)
{
int ret = OB_SUCCESS;
ObTenantMutilAllocator* allocator = NULL;
if (OB_FAIL(get_tenant_mutil_allocator(tenant_id, allocator))) {
} else {
out_allocator = allocator;
}
return ret;
}
// Get the replay allocator for specified tenant, create it when tenant not exist
int ObTenantMutilAllocatorMgr::get_tenant_replay_allocator(
const uint64_t tenant_id, ObIReplayTaskAllocator*& out_allocator)
{
int ret = OB_SUCCESS;
ObTenantMutilAllocator* allocator = NULL;
if (OB_FAIL(get_tenant_mutil_allocator(tenant_id, allocator))) {
} else {
out_allocator = allocator;
}
return ret;
}
int ObTenantMutilAllocatorMgr::get_tenant_mutil_allocator(
const uint64_t tenant_id, ObTenantMutilAllocator*& out_allocator)
{
int ret = OB_SUCCESS;
if (!is_inited_) {
ret = OB_NOT_INIT;
} else if (OB_UNLIKELY(tenant_id <= 0)) {
ret = OB_INVALID_ARGUMENT;
OB_LOG(WARN, "invalid arguments", K(ret), K(tenant_id));
} else if (tenant_id < PRESERVED_TENANT_COUNT) {
// Don't need lock
if (NULL == (out_allocator = ATOMIC_LOAD(&tma_array_[tenant_id]))) {
if (OB_FAIL(create_tenant_mutil_allocator_(tenant_id, out_allocator))) {
OB_LOG(WARN, "fail to create_tenant_mutil_allocator_", K(ret), K(tenant_id));
}
}
} else {
// Need lock
const int64_t slot = tenant_id % PRESERVED_TENANT_COUNT;
bool is_need_create = false;
do {
// rdlock
obsys::CRLockGuard guard(locks_[slot]);
TMA** cur = &tma_array_[slot];
while ((NULL != cur) && (NULL != *cur) && (*cur)->get_tenant_id() < tenant_id) {
cur = &((*cur)->get_next());
}
if (NULL != cur) {
if (NULL != (*cur) && (*cur)->get_tenant_id() == tenant_id) {
out_allocator = *cur;
} else {
// (*cur) is NULL || (*cur)->tenant_id_ > tenant_id
is_need_create = true;
}
}
} while (0);
if (is_need_create) {
if (OB_FAIL(create_tenant_mutil_allocator_(tenant_id, out_allocator))) {
OB_LOG(WARN, "fail to create_tenant_mutil_allocator_", K(ret), K(tenant_id));
}
}
}
if (OB_SUCC(ret) && OB_ISNULL(out_allocator)) {
ret = OB_ERR_UNEXPECTED;
OB_LOG(WARN, "got allocator is NULL", K(ret), K(tenant_id));
}
return ret;
}
int ObTenantMutilAllocatorMgr::construct_allocator_(const uint64_t tenant_id, TMA*& out_allocator)
{
int ret = OB_SUCCESS;
if (!is_inited_) {
ret = OB_NOT_INIT;
} else if (OB_UNLIKELY(tenant_id <= 0)) {
ret = OB_INVALID_ARGUMENT;
OB_LOG(WARN, "invalid arguments", K(ret), K(tenant_id));
} else {
ObMemAttr attr(OB_SERVER_TENANT_ID, ObModIds::OB_TENANT_MUTIL_ALLOCATOR);
void* buf = ob_malloc(sizeof(TMA), attr);
if (NULL == buf) {
ret = OB_ALLOCATE_MEMORY_FAILED;
OB_LOG(WARN, "failed to alloc memory", K(ret), K(tenant_id));
} else {
TMA* allocator = new (buf) TMA(tenant_id);
out_allocator = allocator;
OB_LOG(INFO, "ObTenantMutilAllocator init success", K(tenant_id));
}
}
return ret;
}
int ObTenantMutilAllocatorMgr::create_tenant_mutil_allocator_(const uint64_t tenant_id, TMA*& out_allocator)
{
int ret = OB_SUCCESS;
if (!is_inited_) {
ret = OB_NOT_INIT;
} else if (OB_UNLIKELY(tenant_id <= 0)) {
ret = OB_INVALID_ARGUMENT;
OB_LOG(WARN, "invalid arguments", K(ret), K(tenant_id));
} else if (tenant_id < PRESERVED_TENANT_COUNT) {
if (NULL != (out_allocator = ATOMIC_LOAD(&tma_array_[tenant_id]))) {
} else {
TMA* tmp_tma = NULL;
if (OB_FAIL(construct_allocator_(tenant_id, tmp_tma))) {
OB_LOG(WARN, "fail to construct_allocator_", K(ret), K(tenant_id));
} else if (!ATOMIC_BCAS(&tma_array_[tenant_id], NULL, tmp_tma)) {
out_allocator = ATOMIC_LOAD(&tma_array_[tenant_id]);
if (NULL != tmp_tma) {
tmp_tma->~TMA();
ob_free(tmp_tma);
}
} else {
out_allocator = ATOMIC_LOAD(&tma_array_[tenant_id]);
}
}
} else {
const int64_t slot = tenant_id % PRESERVED_TENANT_COUNT;
if (NULL == ATOMIC_LOAD(&tma_array_[slot])) {
// slot's head node is NULL, need construct
TMA* tmp_tma = NULL;
if (OB_FAIL(construct_allocator_(slot, tmp_tma))) {
OB_LOG(WARN, "fail to construct_allocator_", K(ret), K(slot));
} else if (!ATOMIC_BCAS(&tma_array_[slot], NULL, tmp_tma)) {
if (NULL != tmp_tma) {
tmp_tma->~TMA();
ob_free(tmp_tma);
}
} else {
}
}
do {
// Need lock when modify slog list
obsys::CWLockGuard guard(locks_[slot]);
if (OB_SUCC(ret)) {
bool is_need_create = false;
TMA** cur = &tma_array_[slot];
while ((NULL != cur) && (NULL != *cur) && (*cur)->get_tenant_id() < tenant_id) {
cur = &((*cur)->get_next());
}
if (NULL != cur) {
if (NULL != (*cur) && (*cur)->get_tenant_id() == tenant_id) {
out_allocator = *cur;
} else {
is_need_create = true;
}
}
if (is_need_create) {
TMA* tmp_tma = NULL;
if (OB_FAIL(construct_allocator_(tenant_id, tmp_tma))) {
OB_LOG(WARN, "fail to construct_allocator_", K(ret), K(tenant_id));
} else {
TMA* next_allocator = *cur;
*cur = tmp_tma;
((*cur)->get_next()) = next_allocator;
out_allocator = tmp_tma;
}
}
}
} while (0);
}
return ret;
}
ObTenantMutilAllocatorMgr& ObTenantMutilAllocatorMgr::get_instance()
{
static ObTenantMutilAllocatorMgr instance_;
return instance_;
}
int ObTenantMutilAllocatorMgr::get_tenant_limit(const uint64_t tenant_id, int64_t& limit)
{
int ret = OB_SUCCESS;
ObTenantMutilAllocator* allocator = NULL;
if (!is_inited_) {
ret = OB_NOT_INIT;
} else if (OB_UNLIKELY(tenant_id <= 0)) {
ret = OB_INVALID_ARGUMENT;
} else if (OB_FAIL(get_tenant_mutil_allocator(tenant_id, allocator))) {
ret = OB_TENANT_NOT_EXIST;
} else {
limit = allocator->get_limit();
}
return ret;
}
int ObTenantMutilAllocatorMgr::set_tenant_limit(const uint64_t tenant_id, const int64_t new_limit)
{
int ret = OB_SUCCESS;
ObTenantMutilAllocator* allocator = NULL;
if (!is_inited_) {
ret = OB_NOT_INIT;
} else if (OB_UNLIKELY(tenant_id <= 0) || OB_UNLIKELY(new_limit <= 0)) {
ret = OB_INVALID_ARGUMENT;
} else if (OB_FAIL(get_tenant_mutil_allocator(tenant_id, allocator))) {
} else if (OB_ISNULL(allocator)) {
ret = OB_TENANT_NOT_EXIST;
} else {
allocator->set_limit(new_limit);
}
return ret;
}
void ObTenantMutilAllocatorMgr::free_log_task_buf(void* p)
{
typedef ObBlockSlicer Block;
if (NULL != p) {
Block::Item* item = (Block::Item*)p - 1;
Block* blk = item->host_;
ObTenantMutilAllocator* tmallocator = static_cast<ObTenantMutilAllocator*>(blk->get_tmallocator());
if (NULL != tmallocator) {
tmallocator->free_log_task_buf(p);
} else {
OB_LOG(ERROR, "tmallocator is NULL, unexpected");
}
}
}
int ObTenantMutilAllocatorMgr::update_tenant_mem_limit(const share::TenantUnits& all_tenant_units)
{
// Update mem_limit for each tenant, called when the chane unit specifications or
// memstore_limite_percentage
int ret = OB_SUCCESS;
const int64_t cur_memstore_limit_percent = ObServerConfig::get_instance().memstore_limit_percentage;
if (!is_inited_) {
ret = OB_NOT_INIT;
} else {
int64_t unit_cnt = all_tenant_units.count();
for (int64_t i = 0; i < unit_cnt && OB_SUCC(ret); ++i) {
const share::ObUnitInfoGetter::ObTenantConfig& tenant_config = all_tenant_units.at(i);
const uint64_t tenant_id = tenant_config.tenant_id_;
const bool has_memstore = tenant_config.has_memstore_;
int32_t nway = (int32_t)(tenant_config.config_.max_cpu_);
if (nway == 0) {
nway = 1;
}
const int64_t max_memory = tenant_config.config_.max_memory_;
int64_t new_tma_limit = max_memory;
if (has_memstore) {
// If the unit type of tenant is not Log, need to subtract
// the reserved memory of memstore
if (cur_memstore_limit_percent > 100 || cur_memstore_limit_percent <= 0) {
OB_LOG(WARN, "memstore_limit_percentage val is unexpected", K(cur_memstore_limit_percent));
} else {
new_tma_limit = max_memory / 100 * (100 - cur_memstore_limit_percent);
}
}
int tmp_ret = OB_SUCCESS;
ObTenantMutilAllocator* tma = NULL;
if (OB_SUCCESS != (tmp_ret = get_tenant_mutil_allocator(tenant_id, tma))) {
OB_LOG(WARN, "get_tenant_mutil_allocator failed", K(tmp_ret), K(tenant_id));
} else if (NULL == tma) {
OB_LOG(WARN, "get_tenant_mutil_allocator failed", K(tenant_id));
} else {
tma->set_nway(nway);
int64_t pre_tma_limit = tma->get_limit();
if (pre_tma_limit != new_tma_limit) {
tma->set_limit(new_tma_limit);
}
OB_LOG(INFO,
"ObTenantMutilAllocator update tenant mem_limit finished",
K(ret),
K(tenant_id),
K(nway),
K(new_tma_limit),
K(pre_tma_limit),
K(cur_memstore_limit_percent),
K(tenant_config));
}
// update memstore threshold of GmemstoreAllocator
ObGMemstoreAllocator* memstore_allocator = NULL;
if (OB_SUCCESS != (tmp_ret = ObMemstoreAllocatorMgr::get_instance().get_tenant_memstore_allocator(
tenant_id, memstore_allocator))) {
} else if (OB_ISNULL(memstore_allocator)) {
OB_LOG(WARN, "get_tenant_mutil_allocator failed", K(tenant_id));
} else if (OB_FAIL(memstore_allocator->set_memstore_threshold(tenant_id))) {
OB_LOG(WARN, "failed to set_memstore_threshold of memstore allocator", K(tenant_id), K(ret));
} else {
OB_LOG(INFO, "succ to set_memstore_threshold of memstore allocator", K(tenant_id), K(ret));
}
}
}
return ret;
}
} // namespace common
} // namespace oceanbase

View File

@ -0,0 +1,84 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef _OB_SHARE_TENANT_MUTIL_ALLOCATOR_MGR_H_
#define _OB_SHARE_TENANT_MUTIL_ALLOCATOR_MGR_H_
#include "share/ob_define.h"
#include "share/ob_unit_getter.h"
#include "lib/allocator/ob_block_alloc_mgr.h"
#include "lib/allocator/ob_vslice_alloc.h"
namespace oceanbase {
namespace common {
class ObILogAllocator;
class ObIReplayTaskAllocator;
class ObTenantMutilAllocator;
class ObTenantMutilAllocatorMgr {
public:
typedef ObTenantMutilAllocator TMA;
ObTenantMutilAllocatorMgr()
: is_inited_(false),
locks_(),
tma_array_(),
clog_entry_alloc_(ObMemAttr(OB_SERVER_TENANT_ID, ObModIds::OB_LOG_TASK_BODY), OB_MALLOC_MIDDLE_BLOCK_SIZE,
clog_body_blk_alloc_)
{}
~ObTenantMutilAllocatorMgr()
{}
int init();
int get_tenant_mutil_allocator(const uint64_t tenant_id, ObTenantMutilAllocator*& out_allocator);
int get_tenant_log_allocator(const uint64_t tenant_id, ObILogAllocator*& out_allocator);
int get_tenant_replay_allocator(const uint64_t tenant_id, ObIReplayTaskAllocator*& out_allocator);
int get_tenant_limit(const uint64_t tenant_id, int64_t& limit);
int set_tenant_limit(const uint64_t tenant_id, const int64_t new_limit);
void* alloc_log_entry_buf(const int64_t size)
{
return clog_entry_alloc_.alloc(size);
}
void free_log_entry_buf(void* ptr)
{
if (NULL != ptr) {
clog_entry_alloc_.free(ptr);
}
}
// a tricky interface, ugly but save memory
void free_log_task_buf(void* ptr);
int update_tenant_mem_limit(const share::TenantUnits& all_tenant_units);
public:
static ObTenantMutilAllocatorMgr& get_instance();
private:
int construct_allocator_(const uint64_t tenant_id, TMA*& out_allocator);
int create_tenant_mutil_allocator_(const uint64_t tenant_id, TMA*& out_allocator);
private:
static const uint64_t PRESERVED_TENANT_COUNT = 10000;
private:
bool is_inited_;
obsys::CRWLock locks_[PRESERVED_TENANT_COUNT];
ObTenantMutilAllocator* tma_array_[PRESERVED_TENANT_COUNT];
ObBlockAllocMgr clog_body_blk_alloc_;
ObVSliceAlloc clog_entry_alloc_;
private:
DISALLOW_COPY_AND_ASSIGN(ObTenantMutilAllocatorMgr);
}; // end of class ObTenantMutilAllocatorMgr
#define TMA_MGR_INSTANCE (::oceanbase::common::ObTenantMutilAllocatorMgr::get_instance())
} // end of namespace common
} // end of namespace oceanbase
#endif /* _OB_SHARE_TENANT_MUTIL_ALLOCATOR_MGR_H_ */