fix bad MEMORY_LOG when ServerGTimer hang

This commit is contained in:
obdev
2023-02-10 08:41:32 +00:00
committed by ob-robot
parent 63440bd16e
commit 364fdb8222
12 changed files with 107 additions and 65 deletions

View File

@ -509,7 +509,7 @@ void ObMallocAllocator::print_tenant_memory_usage(uint64_t tenant_id) const
ObPageManagerCenter::get_instance().print_tenant_stat(tenant_id, buf, BUFLEN, ctx_pos);
}
buf[std::min(ctx_pos, BUFLEN - 1)] = '\0';
allow_next_syslog();
_LOG_INFO("[MEMORY] tenant: %lu, limit: %'lu hold: %'lu rpc_hold: %'lu cache_hold: %'lu "
"cache_used: %'lu cache_item_count: %'lu \n%s",
tenant_id,

View File

@ -23,7 +23,6 @@
using namespace oceanbase::lib;
using namespace oceanbase::common;
void *ObTenantCtxAllocator::alloc(const int64_t size, const ObMemAttr &attr)
{
SANITY_DISABLE_CHECK_RANGE(); // prevent sanity_check_range
@ -214,6 +213,7 @@ void ObTenantCtxAllocator::print_usage() const
}
if (ctx_hold_bytes > 0 || sum_item.used_ > 0) {
allow_next_syslog();
_LOG_INFO("\n[MEMORY] tenant_id=%5ld ctx_id=%25s hold=% '15ld used=% '15ld limit=% '15ld"
"\n[MEMORY] idle_size=% '10ld free_size=% '10ld"
"\n[MEMORY] wash_related_chunks=% '10ld washed_blocks=% '10ld washed_size=% '10ld\n%s",

View File

@ -39,6 +39,11 @@ namespace oceanbase
namespace common
{
void __attribute__((weak)) allow_next_syslog(int64_t)
{
// do nothing
}
const char* __attribute__((weak)) ob_strerror(const int oberr)
{
const char* ret = "ob_strerror";

View File

@ -61,6 +61,8 @@ class ObVSliceAlloc;
class ObBlockAllocMgr;
class ObFIFOAllocator;
class ObPLogItem;
extern void allow_next_syslog(int64_t count = 1);
extern int logdata_vprintf(char *buf, const int64_t buf_len, int64_t &pos, const char *fmt, va_list args);
extern ObPLogFDType get_fd_type(const char *mod_name);

View File

@ -309,6 +309,20 @@ LOG_MOD_END(PL)
#define CANCLE_OB_LOG_TRACE_MODE() OB_LOGGER.set_trace_mode(false)
#define PRINT_OB_LOG_TRACE_BUF(mod_name, level) \
(OB_LOG_NEED_TO_PRINT(level) ? OB_LOGGER.print_trace_buffer("["#mod_name"] ", OB_LOG_LEVEL_DIRECT_NO_ERRCODE(level)) : (void) 0)
#define PRINT_WITH_TRACE_MODE(mod_name, level, body) \
{ \
bool need_cancle_trace_mode = false; \
if (!IS_OB_LOG_TRACE_MODE()) { \
need_cancle_trace_mode = true; \
} \
SET_OB_LOG_TRACE_MODE(); \
PRINT_OB_LOG_TRACE_BUF(COMMON, INFO); \
body; \
PRINT_OB_LOG_TRACE_BUF(mod_name, level); \
if (need_cancle_trace_mode) { \
CANCLE_OB_LOG_TRACE_MODE(); \
} \
}
//for tests/ob_log_test or others
#define OB_LOG_MOD_NEED_TO_PRINT(parMod, level) \

View File

@ -54,14 +54,9 @@ void flush_trace()
auto& trace = *OBTRACE;
auto& current_span = trace.current_span_;
if (trace.is_inited() && !current_span.is_empty()) {
auto func = [&] {
auto* span = current_span.get_first();
::oceanbase::trace::ObSpanCtx* next = nullptr;
bool need_cancle_trace_mode = false;
if (!IS_OB_LOG_TRACE_MODE()) {
need_cancle_trace_mode = true;
}
SET_OB_LOG_TRACE_MODE();
PRINT_OB_LOG_TRACE_BUF(COMMON, INFO);
while (current_span.get_header() != span) {
auto* next = span->get_next();
if (nullptr != span->tags_ || 0 != span->end_ts_) {
@ -115,10 +110,8 @@ void flush_trace()
}
span = next;
}
PRINT_OB_LOG_TRACE_BUF(OBTRACE, INFO);
if (need_cancle_trace_mode) {
CANCLE_OB_LOG_TRACE_MODE();
}
};
PRINT_WITH_TRACE_MODE(OBTRACE, INFO, func());
trace.offset_ = trace.buffer_size_ / 2;
}
}

View File

@ -614,6 +614,8 @@ int MockTenantModuleEnv::init_before_start_mtl()
STORAGE_LOG(ERROR, "failed to init bandwidth_throttle_", K(ret));
} else if (OB_FAIL(TG_START(lib::TGDefIDs::ServerGTimer))) {
STORAGE_LOG(ERROR, "init timer fail", KR(ret));
} else if (OB_FAIL(TG_START(lib::TGDefIDs::MemDumpTimer))) {
STORAGE_LOG(ERROR, "init memory dump timer fail", KR(ret));
} else if (OB_FAIL(ObOptStatMonitorManager::get_instance().init(&sql_proxy_))) {
STORAGE_LOG(ERROR, "failed to init opt stat monitor manager", KR(ret));
} else {
@ -761,6 +763,11 @@ void MockTenantModuleEnv::destroy()
TG_WAIT(lib::TGDefIDs::ServerGTimer);
TG_DESTROY(lib::TGDefIDs::ServerGTimer);
TG_STOP(lib::TGDefIDs::MemDumpTimer);
TG_WAIT(lib::TGDefIDs::MemDumpTimer);
TG_DESTROY(lib::TGDefIDs::MemDumpTimer);
destroyed_ = true;
chdir(curr_dir_);

View File

@ -502,6 +502,10 @@ void ObServer::destroy()
TG_DESTROY(lib::TGDefIDs::CTASCleanUpTimer);
FLOG_INFO("ctas clean up timer destroyed");
FLOG_INFO("begin to destroy memory dump timer");
TG_DESTROY(lib::TGDefIDs::MemDumpTimer);
FLOG_INFO("memory dump timer destroyed");
FLOG_INFO("begin to destroy redef heart beat task");
TG_DESTROY(lib::TGDefIDs::RedefHeartBeatTask);
FLOG_INFO("redef heart beat task destroyed");
@ -1039,6 +1043,10 @@ int ObServer::stop()
TG_STOP(lib::TGDefIDs::CTASCleanUpTimer);
FLOG_INFO("ctas clean up timer stopped");
FLOG_INFO("begin to stop memory dump timer");
TG_STOP(lib::TGDefIDs::MemDumpTimer);
FLOG_INFO("memory dump timer stopped");
FLOG_INFO("begin to stop ctas clean up timer");
TG_STOP(lib::TGDefIDs::HeartBeatCheckTask);
FLOG_INFO("ctas clean up timer stopped");
@ -1239,6 +1247,10 @@ int ObServer::wait()
TG_WAIT(lib::TGDefIDs::CTASCleanUpTimer);
FLOG_INFO("wait ctas clean up timer success");
FLOG_INFO("begin to wait memory dump timer");
TG_WAIT(lib::TGDefIDs::MemDumpTimer);
FLOG_INFO("wait memory dump timer success");
FLOG_INFO("begin to wait root service");
root_service_.wait();
FLOG_INFO("wait root service success");
@ -1527,6 +1539,8 @@ int ObServer::init_config()
LOG_ERROR("fail to init server trace timer", KR(ret));
} else if (OB_FAIL(TG_START(lib::TGDefIDs::CTASCleanUpTimer))) {
LOG_ERROR("fail to init ctas clean up timer", KR(ret));
} else if (OB_FAIL(TG_START(lib::TGDefIDs::MemDumpTimer))) {
LOG_ERROR("fail to init memory dump timer", KR(ret));
} else if (OB_FAIL(config_mgr_.base_init())) {
LOG_ERROR("config_mgr_ base_init failed", KR(ret));
} else if (OB_FAIL(config_mgr_.init(sql_proxy_, self_addr_))) {

View File

@ -390,7 +390,7 @@ int ObMultiTenant::start()
} else if (OB_FAIL(ObTenantNodeBalancer::get_instance().start())) {
LOG_ERROR("start tenant node balancer thread failed", K(ret));
// start memstore print timer.
} else if (OB_FAIL(printer.register_timer_task(lib::TGDefIDs::ServerGTimer))) {
} else if (OB_FAIL(printer.register_timer_task(lib::TGDefIDs::MemDumpTimer))) {
LOG_ERROR("Fail to register timer task", K(ret));
} else {
LOG_INFO("succ to start multi tenant");

View File

@ -27,6 +27,13 @@ using namespace oceanbase::lib;
using namespace oceanbase::common;
namespace oceanbase {
namespace common {
void allow_next_syslog(int64_t count)
{
share::ObTaskController::get().allow_next_syslog(count);
}
} // common
namespace share {
class ObSyslogPerErrLimiter : public common::ObISyslogRateLimiter

View File

@ -128,4 +128,5 @@ TG_DEF(CDCService, CDCSrv, "", TG_STATIC, OB_THREAD_POOL, ThreadCountPair(1, 1))
TG_DEF(LogUpdater, LogUpdater, "", TG_STATIC, TIMER)
TG_DEF(HeartBeatCheckTask, HeartBeatCheckTask, "", TG_STATIC, TIMER)
TG_DEF(RedefHeartBeatTask, RedefHeartBeatTask, "", TG_STATIC, TIMER)
TG_DEF(MemDumpTimer, MemDumpTimer, "", TG_STATIC, TIMER)
#endif

View File

@ -25,12 +25,11 @@ namespace oceanbase
using namespace share;
namespace storage
{
void ObPrintTenantMemoryUsage::runTimerTask()
{
LOG_INFO("=== Run print tenant memory usage task ===");
ObTenantMemoryPrinter &printer = ObTenantMemoryPrinter::get_instance();
printer.print_tenant_usage();
PRINT_WITH_TRACE_MODE(LIB, INFO, printer.print_tenant_usage());
ObObjFreeListList::get_freelists().dump();
}