[MDS] add monitor to watch MDS memory

This commit is contained in:
fengdeyiji 2023-09-05 07:17:03 +00:00 committed by ob-robot
parent 13a7f463fb
commit 013386a767
5 changed files with 149 additions and 16 deletions

View File

@ -45,6 +45,7 @@ public:
int fill_virtual_info(ObIArray<MdsNodeInfoForVirtualTable> &mds_node_info_array) const;
int mark_removed_from_t3m(ObTabletPointer *pointer) const;
int mark_switched_to_empty_shell() const;
int is_switched_to_empty_shell(bool &is_switched_to_empty_shell) const;
template <int N>
int forcely_reset_mds_table(const char (&reason)[N]);
/******************************Single Key Unit Access Interface**********************************/

View File

@ -711,6 +711,19 @@ inline int MdsTableHandle::mark_switched_to_empty_shell() const
return ret;
}
inline int MdsTableHandle::is_switched_to_empty_shell(bool &is_switched_to_empty_shell) const
{
bool ret = OB_SUCCESS;
CHECK_MDS_TABLE_INIT();
if (!p_mds_table_base_.is_valid()) {
ret = OB_BAD_NULL_ERROR;
MDS_LOG(WARN, "p_mds_table_base_ is invalid", K(*this));
} else {
is_switched_to_empty_shell = p_mds_table_base_->is_switched_to_empty_shell();
}
return ret;
}
template <int N>
inline int MdsTableHandle::forcely_reset_mds_table(const char (&reason)[N])
{

View File

@ -89,7 +89,6 @@ int ObTenantMdsAllocator::init()
{
int ret = OB_SUCCESS;
ObMemAttr mem_attr;
// TODO : @gengli new ctx id?
mem_attr.tenant_id_ = MTL_ID();
mem_attr.ctx_id_ = ObCtxIds::MDS_DATA_ID;
mem_attr.label_ = "MdsTable";
@ -103,17 +102,34 @@ int ObTenantMdsAllocator::init()
return ret;
}
constexpr int64_t HEADER_SIZE = sizeof(const char *) + sizeof(uint16_t);
void *ObTenantMdsAllocator::alloc(const int64_t size)
{
void *obj = allocator_.alloc(size);
MDS_LOG(DEBUG, "mds alloc ", K(size), KP(obj));
if (OB_NOT_NULL(obj)) {
MTL(ObTenantMdsService*)->record_alloc_backtrace(obj,
__thread_mds_tag__,
__thread_mds_alloc_type__,
__thread_mds_alloc_file__,
__thread_mds_alloc_func__,
__thread_mds_alloc_line__);// for debug mem leak
const char **related_mds_type = nullptr;
uint16_t *alloc_size = 0;
char *obj = nullptr;// C++ standard: sizeof(char) == 1
if (size >= UINT16_MAX) {
MDS_LOG_RET(ERROR, OB_ERR_UNEXPECTED,
"too big mds data size, if you make sure you need so big data, do not use mds allocator", K(size));
} else {
obj = (char *)allocator_.alloc(HEADER_SIZE + size);
related_mds_type = (const char **)obj;
obj += sizeof(const char *);
alloc_size = (uint16_t *)obj;
obj += sizeof(uint16_t);
*related_mds_type = __thread_mds_alloc_type__;
*alloc_size = size;
alloc_info_map_.record_alloc_info(*related_mds_type, *alloc_size);
MDS_LOG(DEBUG, "mds alloc ", K(size), KP(obj));
if (OB_NOT_NULL(obj)) {
MTL(ObTenantMdsService*)->record_alloc_backtrace(obj,
__thread_mds_tag__,
__thread_mds_alloc_type__,
__thread_mds_alloc_file__,
__thread_mds_alloc_func__,
__thread_mds_alloc_line__);// for debug mem leak
}
}
return obj;
}
@ -128,8 +144,14 @@ void *ObTenantMdsAllocator::alloc(const int64_t size, const ObMemAttr &attr)
void ObTenantMdsAllocator::free(void *ptr)
{
allocator_.free(ptr);
MTL(ObTenantMdsService*)->erase_alloc_backtrace(ptr);
char *free_ptr = (char *)ptr;
if (OB_NOT_NULL(free_ptr)) {
alloc_info_map_.record_free_info(*(const char **)(free_ptr - sizeof(uint16_t) - sizeof(const char *)),
*(uint16_t*)(free_ptr - sizeof(uint16_t)));
free_ptr -= HEADER_SIZE;
allocator_.free(free_ptr);
MTL(ObTenantMdsService*)->erase_alloc_backtrace(ptr);
}
}
void ObTenantMdsAllocator::set_attr(const ObMemAttr &attr)
@ -201,10 +223,6 @@ int ObTenantMdsService::mtl_start(ObTenantMdsService *&mds_service)
3_s,
[mds_service]() -> bool {
ObCurTraceId::init(GCONF.self_addr_);
if (REACH_TIME_INTERVAL(30_s)) {
observer::ObMdsEventBuffer::dump_statistics();
mds_service->dump_map_holding_item(5_min);
}
mds_service->mds_timer_.try_recycle_mds_table_task();
return false;// won't stop until tenant exit
}
@ -220,6 +238,19 @@ int ObTenantMdsService::mtl_start(ObTenantMdsService *&mds_service)
}
))) {
MDS_LOG(ERROR, "fail to register dump mds table status task to timer", KR(ret), KPC(mds_service));
} else if (MDS_FAIL(mds_service->mds_timer_.timer_.schedule_task_repeat(
mds_service->mds_timer_.dump_memory_statistics_task_handle_,
30_s,
[mds_service]() -> bool {
mds_service->mds_allocator_.dump();
mds_service->dump_map_holding_item(5_min);
if (REACH_TIME_INTERVAL(30_s)) {
observer::ObMdsEventBuffer::dump_statistics();
}
return false;// won't stop until tenant exit
}
))) {
MDS_LOG(ERROR, "fail to register memory dump timer task", KR(ret), KPC(mds_service));
}
return ret;
}
@ -229,6 +260,7 @@ void ObTenantMdsService::mtl_stop(ObTenantMdsService *&mds_service)
if (nullptr != mds_service) {
mds_service->mds_timer_.recycle_task_handle_.stop();
mds_service->mds_timer_.dump_special_mds_table_status_task_handle_.stop();
mds_service->mds_timer_.dump_memory_statistics_task_handle_.stop();
}
}
@ -237,6 +269,7 @@ void ObTenantMdsService::mtl_wait(ObTenantMdsService *&mds_service)
if (nullptr != mds_service) {
mds_service->mds_timer_.recycle_task_handle_.wait();
mds_service->mds_timer_.dump_special_mds_table_status_task_handle_.wait();
mds_service->mds_timer_.dump_memory_statistics_task_handle_.wait();
}
}

View File

@ -98,6 +98,83 @@ struct ObMdsMemoryLeakDebugInfo
class ObTenantMdsAllocator : public ObIAllocator
{
friend class ObTenantMdsService;
static constexpr int64_t ALLOC_INFO_BUCKET_SIZE = (1ULL << 7);
struct AllocInfoBucket {
AllocInfoBucket()
: alloc_type_(nullptr),
total_alloc_size_(0),
total_free_size_(0),
total_alloc_times_(0),
total_free_times_(0) {}
void record_alloc_info(int64_t alloc_size) {
ATOMIC_AAF(&total_alloc_times_, 1);
ATOMIC_AAF(&total_alloc_size_, alloc_size);
}
void record_free_info(int64_t free_size) {
ATOMIC_AAF(&total_free_times_, 1);
ATOMIC_AAF(&total_free_size_, free_size);
}
const char *alloc_type_;
int64_t total_alloc_size_;
int64_t total_free_size_;
int64_t total_alloc_times_;
int64_t total_free_times_;
};
struct AllockInfoMap {
AllockInfoMap() = default;
void record_alloc_info(const char *type, int64_t alloc_size) {
int64_t idx = find_idx_(type);
if (idx != -1) {
bucket[idx].record_alloc_info(alloc_size);
}
}
void record_free_info(const char *type, int64_t free_size) {
int64_t idx = find_idx_(type);
if (idx != -1) {
bucket[idx].record_free_info(free_size);
}
}
int64_t find_idx_(const char *type) {
int64_t ret_idx = -1;
if (OB_NOT_NULL(type)) {
constexpr int64_t MASK = ALLOC_INFO_BUCKET_SIZE - 1;
int64_t hash_idx = ((int64_t)type);
for (int64_t idx = hash_idx; idx < hash_idx + ALLOC_INFO_BUCKET_SIZE && ret_idx == -1; ++idx) {
int64_t redirected_idx = (idx & MASK);
const char *alloc_type = ATOMIC_LOAD(&bucket[redirected_idx].alloc_type_);
if (OB_LIKELY(alloc_type == type)) {// find existing bucket
ret_idx = redirected_idx;
} else if (OB_LIKELY(alloc_type == nullptr)) {// take a new place
if (nullptr == ATOMIC_CAS(&bucket[redirected_idx].alloc_type_, nullptr, type)) {
ret_idx = redirected_idx;
}
}
}
}
return ret_idx;
}
void dump() const {
for (int64_t idx = 0; idx < ALLOC_INFO_BUCKET_SIZE; ++idx) {
const char *alloc_type = ATOMIC_LOAD(&bucket[idx].alloc_type_);
int64_t total_alloc_size = ATOMIC_LOAD(&bucket[idx].total_alloc_size_);
int64_t total_free_size = ATOMIC_LOAD(&bucket[idx].total_free_size_);
int64_t total_alloc_times = ATOMIC_LOAD(&bucket[idx].total_alloc_times_);
int64_t total_free_times = ATOMIC_LOAD(&bucket[idx].total_free_times_);
int64_t active_size = total_alloc_size - total_free_size;
int64_t active_alloc_times = total_alloc_times - total_free_times;
if (OB_NOT_NULL(alloc_type)) {
MDS_LOG_RET(INFO, OB_SUCCESS, "[MDS_MEMORY]dump memory useage", K(alloc_type),
"active_alloc_size", to_cstring(ObSizeLiteralPrettyPrinter(active_size)),
"active_alloc_times", active_alloc_times,
"average_active_size", active_alloc_times == 0 ? "INVALID" : to_cstring(ObSizeLiteralPrettyPrinter(active_size / active_alloc_times)),
"history_alloc_size", to_cstring(ObSizeLiteralPrettyPrinter(total_alloc_size)),
"history_alloc_times", total_alloc_times,
"average_history_size", total_alloc_times == 0 ? "INVALID" : to_cstring(ObSizeLiteralPrettyPrinter(total_alloc_size/total_alloc_times)));
}
}
}
AllocInfoBucket bucket[ALLOC_INFO_BUCKET_SIZE];
};
private:
static const int64_t MDS_ALLOC_CONCURRENCY = 32;
public:
@ -109,10 +186,12 @@ public:
virtual void free(void *ptr) override;
virtual void set_attr(const ObMemAttr &attr) override;
int64_t hold() { return allocator_.hold(); }
void dump() { alloc_info_map_.dump(); }
TO_STRING_KV(KP(this));
private:
common::ObBlockAllocMgr block_alloc_;
common::ObVSliceAlloc allocator_;
AllockInfoMap alloc_info_map_;
};
struct ObTenantBufferCtxAllocator : public ObIAllocator// for now, it is just a wrapper of mtl_malloc
@ -134,6 +213,7 @@ struct ObTenantMdsTimer
TO_STRING_KV(KP(this), K_(recycle_task_handle))
common::ObOccamTimerTaskRAIIHandle recycle_task_handle_;
common::ObOccamTimerTaskRAIIHandle dump_special_mds_table_status_task_handle_;
common::ObOccamTimerTaskRAIIHandle dump_memory_statistics_task_handle_;
common::ObOccamTimer timer_;
private:
int process_with_tablet_(ObTablet &tablet);

View File

@ -4622,6 +4622,7 @@ int ObTablet::get_mds_table_rec_log_scn(SCN &rec_scn)
int ret = OB_SUCCESS;
const share::ObLSID &ls_id = tablet_meta_.ls_id_;
const common::ObTabletID &tablet_id = tablet_meta_.tablet_id_;
bool is_switched_to_empty_shell = false;
mds::MdsTableHandle mds_table;
rec_scn = SCN::max_scn();
if (IS_NOT_INIT) {
@ -4642,6 +4643,11 @@ int ObTablet::get_mds_table_rec_log_scn(SCN &rec_scn)
} else if (!rec_scn.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get invalid scn from mds table", K(ret));
} else if (OB_FAIL(mds_table.is_switched_to_empty_shell(is_switched_to_empty_shell))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to get mds table empty shell status", K(ret));
} else if (is_switched_to_empty_shell) {
rec_scn = share::SCN::max_scn();
}
return ret;
}