diagnose memory bloat

This commit is contained in:
tushicheng
2024-04-15 03:20:56 +00:00
committed by ob-robot
parent 8b461d5c8b
commit 4bb2e95295
12 changed files with 101 additions and 58 deletions

View File

@ -64,6 +64,12 @@ public:
return reason_ == lib::PHYSICAL_MEMORY_EXHAUST;
}
bool reach_limit_except_ctx() const
{
return reason_ == lib::TENANT_HOLD_REACH_LIMIT ||
reason_ == lib::SERVER_HOLD_REACH_LIMIT ||
reason_ == lib::PHYSICAL_MEMORY_EXHAUST;
}
};
char *alloc_failed_msg();

View File

@ -21,8 +21,10 @@
#include "lib/thread/ob_thread_name.h"
#include "lib/thread/thread_mgr.h"
#include "lib/utility/ob_print_utils.h"
#include "lib/container/ob_vector.h"
#include "rpc/obrpc/ob_rpc_packet.h"
#include "common/ob_clock_generator.h"
#include "common/ob_smart_var.h"
namespace oceanbase
{
@ -257,6 +259,57 @@ int ObMemoryDump::load_malloc_sample_map(ObMallocSampleMap &malloc_sample_map)
return ret;
}
void ObMemoryDump::print_malloc_sample_info()
{
int ret = OB_SUCCESS;
typedef ObSortedVector<ObMallocSamplePair*> MallocSamplePairVector;
ObLatchRGuard guard(iter_lock_, ObLatchIds::MEM_DUMP_ITER_LOCK);
ObMallocSampleMap &map = r_stat_->malloc_sample_map_;
ObMemAttr attr(OB_SERVER_TENANT_ID, "MallocSampleInf", ObCtxIds::DEFAULT_CTX_ID, lib::OB_HIGH_ALLOC);
MallocSamplePairVector vector(map.size(), nullptr, attr);
for (ObMallocSampleIter it = map.begin(); OB_SUCC(ret) && it != map.end(); ++it) {
MallocSamplePairVector::iterator pos;
ret = vector.insert(&(*it), pos, ObMallocSamplePairCmp());
}
int64_t print_pos = 0;
int64_t tenant_id = OB_SERVER_TENANT_ID;
int64_t ctx_id = ObCtxIds::DEFAULT_CTX_ID;
const char *label = "";
int64_t bt_cnt = 0;
const int64_t MAX_LABEL_BT_CNT = 5;
for (MallocSamplePairVector::iterator it = vector.begin(); OB_SUCC(ret) && it != vector.end(); ++it) {
if ((*it)->first.tenant_id_ != tenant_id || (*it)->first.ctx_id_ != ctx_id) {
if (print_pos > 0) {
_LOG_INFO("\n[MEMORY][BT] tenant_id=%5ld ctx_id=%25s\n%.*s",
tenant_id, get_global_ctx_info().get_ctx_name(ctx_id), static_cast<int>(print_pos), print_buf_);
print_pos = 0;
}
tenant_id = (*it)->first.tenant_id_;
ctx_id = (*it)->first.ctx_id_;
label = (*it)->first.label_;
bt_cnt = 0;
} else if (0 != STRCMP(label, (*it)->first.label_)) {
label = (*it)->first.label_;
bt_cnt = 0;
}
if (bt_cnt++ < MAX_LABEL_BT_CNT) {
char bt[MAX_BACKTRACE_LENGTH];
parray(bt, sizeof(bt), (int64_t*)(*it)->first.bt_, AOBJECT_BACKTRACE_COUNT);
ret = databuff_printf(print_buf_, PRINT_BUF_LEN, print_pos, "[MEMORY][BT] mod=%15s, alloc_bytes=% '15ld, alloc_count=% '15ld, bt=%s\n",
label, (*it)->second.alloc_bytes_, (*it)->second.alloc_count_, bt);
if (OB_SUCC(ret) && print_pos > PRINT_BUF_LEN / 2) {
_LOG_INFO("\n[MEMORY][BT] tenant_id=%5ld ctx_id=%25s\n%.*s",
tenant_id, get_global_ctx_info().get_ctx_name(ctx_id), static_cast<int>(print_pos), print_buf_);
print_pos = 0;
}
}
}
if (OB_SUCC(ret) && print_pos > 0) {
_LOG_INFO("\n[MEMORY][BT] tenant_id=%5ld ctx_id=%25s\n%.*s",
tenant_id, get_global_ctx_info().get_ctx_name(ctx_id), static_cast<int>(print_pos), print_buf_);
}
}
void ObMemoryDump::run1()
{
SANITY_DISABLE_CHECK_RANGE(); // prevent sanity_check_range
@ -522,14 +575,13 @@ void ObMemoryDump::handle(void *task)
ObMemoryCheckContext *memory_check_ctx = m_task->memory_check_ctx_;
ObSqlMemoryLeakChecker::get_instance().update_check_range(NULL == memory_check_ctx || !memory_check_ctx->is_sql_memory_leak(),
min_check_version, max_check_version);
ObMallocAllocator *ma = ObMallocAllocator::get_instance();
for (int tenant_idx = 0; tenant_idx < tenant_cnt; tenant_idx++) {
uint64_t tenant_id = tenant_ids_[tenant_idx];
for (int ctx_id = 0; ctx_id < ObCtxIds::MAX_CTX_ID; ctx_id++) {
auto ta =
ObMallocAllocator::get_instance()->get_tenant_ctx_allocator(tenant_id, ctx_id);
ObTenantCtxAllocatorGuard ta = ma->get_tenant_ctx_allocator(tenant_id, ctx_id);
if (nullptr == ta) {
ta = ObMallocAllocator::get_instance()->get_tenant_ctx_allocator_unrecycled(tenant_id,
ctx_id);
ta = ma->get_tenant_ctx_allocator_unrecycled(tenant_id, ctx_id);
}
if (nullptr == ta) {
continue;
@ -641,6 +693,16 @@ void ObMemoryDump::handle(void *task)
}
memory_check_ctx = NULL;
}
for (int tenant_idx = 0; tenant_idx < tenant_cnt; tenant_idx++) {
uint64_t tenant_id = tenant_ids_[tenant_idx];
ma->print_tenant_memory_usage(tenant_id);
ma->print_tenant_ctx_memory_usage(tenant_id);
}
#ifdef FATAL_ERROR_HANG
print_malloc_sample_info();
#endif
} else {
int fd = -1;
if (-1 == (fd = ::open(LOG_FILE,

View File

@ -213,6 +213,8 @@ public:
private:
void run1() override;
void handle(void *task);
void print_malloc_sample_info();
private:
AChunk *find_chunk(void *ptr);
private:

View File

@ -67,7 +67,25 @@ struct ObMallocSampleValue
typedef hash::ObHashMap<ObMallocSampleKey, ObMallocSampleValue,
hash::NoPthreadDefendMode> ObMallocSampleMap;
typedef hash::HashMapPair<ObMallocSampleKey, ObMallocSampleValue> ObMallocSamplePair;
typedef ObMallocSampleMap::iterator ObMallocSampleIter;
struct ObMallocSamplePairCmp
{
bool operator()(const ObMallocSamplePair *left, const ObMallocSamplePair *right)
{
bool bret = true;
if (left->first.tenant_id_ != right->first.tenant_id_) {
bret = left->first.tenant_id_ < right->first.tenant_id_;
} else if (left->first.ctx_id_ != right->first.ctx_id_) {
bret = left->first.ctx_id_ < right->first.ctx_id_;
} else if (0 != STRCMP(left->first.label_, right->first.label_)) {
bret = STRCMP(left->first.label_, right->first.label_) < 0;
} else if (left->second.alloc_bytes_ != right->second.alloc_bytes_) {
bret = left->second.alloc_bytes_ > right->second.alloc_bytes_;
}
return bret;
}
};
inline uint64_t ob_malloc_sample_hash(const char* data)
{

View File

@ -481,6 +481,13 @@ void* ObTenantCtxAllocator::common_realloc(const void *ptr, const int64_t size,
SANITY_POISON(obj, obj->nobjs_ * AOBJECT_CELL_BYTES);
SANITY_UNPOISON(obj->data_, size);
} else if (TC_REACH_TIME_INTERVAL(1 * 1000 * 1000)) {
#ifdef FATAL_ERROR_HANG
if (g_alloc_failed_ctx().reach_limit_except_ctx() &&
REACH_TIME_INTERVAL(60 * 1000 * 1000)) {
ObMemoryDump::get_instance().generate_mod_stat_task();
sleep(1);
}
#endif
const char *msg = is_errsim ? "[ERRSIM] errsim inject memory error" : alloc_failed_msg();
LOG_DBA_WARN(OB_ALLOCATE_MEMORY_FAILED, "[OOPS]", "alloc failed reason", KCSTRING(msg));
_OB_LOG_RET(WARN, OB_ALLOCATE_MEMORY_FAILED, "oops, alloc failed, tenant_id=%ld, ctx_id=%ld, ctx_name=%s, ctx_hold=%ld, "