fix bad MEMORY_LOG when ServerGTimer hang

This commit is contained in:
obdev
2023-02-10 08:41:32 +00:00
committed by ob-robot
parent 63440bd16e
commit 364fdb8222
12 changed files with 107 additions and 65 deletions

View File

@ -509,7 +509,7 @@ void ObMallocAllocator::print_tenant_memory_usage(uint64_t tenant_id) const
ObPageManagerCenter::get_instance().print_tenant_stat(tenant_id, buf, BUFLEN, ctx_pos); ObPageManagerCenter::get_instance().print_tenant_stat(tenant_id, buf, BUFLEN, ctx_pos);
} }
buf[std::min(ctx_pos, BUFLEN - 1)] = '\0'; buf[std::min(ctx_pos, BUFLEN - 1)] = '\0';
allow_next_syslog();
_LOG_INFO("[MEMORY] tenant: %lu, limit: %'lu hold: %'lu rpc_hold: %'lu cache_hold: %'lu " _LOG_INFO("[MEMORY] tenant: %lu, limit: %'lu hold: %'lu rpc_hold: %'lu cache_hold: %'lu "
"cache_used: %'lu cache_item_count: %'lu \n%s", "cache_used: %'lu cache_item_count: %'lu \n%s",
tenant_id, tenant_id,

View File

@ -23,7 +23,6 @@
using namespace oceanbase::lib; using namespace oceanbase::lib;
using namespace oceanbase::common; using namespace oceanbase::common;
void *ObTenantCtxAllocator::alloc(const int64_t size, const ObMemAttr &attr) void *ObTenantCtxAllocator::alloc(const int64_t size, const ObMemAttr &attr)
{ {
SANITY_DISABLE_CHECK_RANGE(); // prevent sanity_check_range SANITY_DISABLE_CHECK_RANGE(); // prevent sanity_check_range
@ -214,6 +213,7 @@ void ObTenantCtxAllocator::print_usage() const
} }
if (ctx_hold_bytes > 0 || sum_item.used_ > 0) { if (ctx_hold_bytes > 0 || sum_item.used_ > 0) {
allow_next_syslog();
_LOG_INFO("\n[MEMORY] tenant_id=%5ld ctx_id=%25s hold=% '15ld used=% '15ld limit=% '15ld" _LOG_INFO("\n[MEMORY] tenant_id=%5ld ctx_id=%25s hold=% '15ld used=% '15ld limit=% '15ld"
"\n[MEMORY] idle_size=% '10ld free_size=% '10ld" "\n[MEMORY] idle_size=% '10ld free_size=% '10ld"
"\n[MEMORY] wash_related_chunks=% '10ld washed_blocks=% '10ld washed_size=% '10ld\n%s", "\n[MEMORY] wash_related_chunks=% '10ld washed_blocks=% '10ld washed_size=% '10ld\n%s",

View File

@ -39,6 +39,11 @@ namespace oceanbase
namespace common namespace common
{ {
void __attribute__((weak)) allow_next_syslog(int64_t)
{
// do nothing
}
const char* __attribute__((weak)) ob_strerror(const int oberr) const char* __attribute__((weak)) ob_strerror(const int oberr)
{ {
const char* ret = "ob_strerror"; const char* ret = "ob_strerror";

View File

@ -61,6 +61,8 @@ class ObVSliceAlloc;
class ObBlockAllocMgr; class ObBlockAllocMgr;
class ObFIFOAllocator; class ObFIFOAllocator;
class ObPLogItem; class ObPLogItem;
extern void allow_next_syslog(int64_t count = 1);
extern int logdata_vprintf(char *buf, const int64_t buf_len, int64_t &pos, const char *fmt, va_list args); extern int logdata_vprintf(char *buf, const int64_t buf_len, int64_t &pos, const char *fmt, va_list args);
extern ObPLogFDType get_fd_type(const char *mod_name); extern ObPLogFDType get_fd_type(const char *mod_name);

View File

@ -309,6 +309,20 @@ LOG_MOD_END(PL)
#define CANCLE_OB_LOG_TRACE_MODE() OB_LOGGER.set_trace_mode(false) #define CANCLE_OB_LOG_TRACE_MODE() OB_LOGGER.set_trace_mode(false)
#define PRINT_OB_LOG_TRACE_BUF(mod_name, level) \ #define PRINT_OB_LOG_TRACE_BUF(mod_name, level) \
(OB_LOG_NEED_TO_PRINT(level) ? OB_LOGGER.print_trace_buffer("["#mod_name"] ", OB_LOG_LEVEL_DIRECT_NO_ERRCODE(level)) : (void) 0) (OB_LOG_NEED_TO_PRINT(level) ? OB_LOGGER.print_trace_buffer("["#mod_name"] ", OB_LOG_LEVEL_DIRECT_NO_ERRCODE(level)) : (void) 0)
#define PRINT_WITH_TRACE_MODE(mod_name, level, body) \
{ \
bool need_cancle_trace_mode = false; \
if (!IS_OB_LOG_TRACE_MODE()) { \
need_cancle_trace_mode = true; \
} \
SET_OB_LOG_TRACE_MODE(); \
PRINT_OB_LOG_TRACE_BUF(COMMON, INFO); \
body; \
PRINT_OB_LOG_TRACE_BUF(mod_name, level); \
if (need_cancle_trace_mode) { \
CANCLE_OB_LOG_TRACE_MODE(); \
} \
}
//for tests/ob_log_test or others //for tests/ob_log_test or others
#define OB_LOG_MOD_NEED_TO_PRINT(parMod, level) \ #define OB_LOG_MOD_NEED_TO_PRINT(parMod, level) \

View File

@ -54,71 +54,64 @@ void flush_trace()
auto& trace = *OBTRACE; auto& trace = *OBTRACE;
auto& current_span = trace.current_span_; auto& current_span = trace.current_span_;
if (trace.is_inited() && !current_span.is_empty()) { if (trace.is_inited() && !current_span.is_empty()) {
auto* span = current_span.get_first(); auto func = [&] {
::oceanbase::trace::ObSpanCtx* next = nullptr; auto* span = current_span.get_first();
bool need_cancle_trace_mode = false; ::oceanbase::trace::ObSpanCtx* next = nullptr;
if (!IS_OB_LOG_TRACE_MODE()) { while (current_span.get_header() != span) {
need_cancle_trace_mode = true; auto* next = span->get_next();
} if (nullptr != span->tags_ || 0 != span->end_ts_) {
SET_OB_LOG_TRACE_MODE(); int64_t pos = 0;
PRINT_OB_LOG_TRACE_BUF(COMMON, INFO); thread_local char buf[MAX_TRACE_LOG_SIZE];
while (current_span.get_header() != span) { int ret = OB_SUCCESS;
auto* next = span->get_next(); auto* tag = span->tags_;
if (nullptr != span->tags_ || 0 != span->end_ts_) { bool first = true;
int64_t pos = 0; INIT_SPAN(span);
thread_local char buf[MAX_TRACE_LOG_SIZE]; while (OB_SUCC(ret) && OB_NOT_NULL(tag)) {
int ret = OB_SUCCESS; if (pos + 10 >= MAX_TRACE_LOG_SIZE) {
auto* tag = span->tags_; ret = OB_BUF_NOT_ENOUGH;
bool first = true; } else {
INIT_SPAN(span); buf[pos++] = ',';
while (OB_SUCC(ret) && OB_NOT_NULL(tag)) { if (first) {
if (pos + 10 >= MAX_TRACE_LOG_SIZE) { strcpy(buf + pos, "\"tags\":[");
ret = OB_BUF_NOT_ENOUGH; pos += 8;
} else { first = false;
buf[pos++] = ','; }
if (first) { ret = tag->tostring(buf, MAX_TRACE_LOG_SIZE, pos);
strcpy(buf + pos, "\"tags\":["); tag = tag->next_;
pos += 8;
first = false;
} }
ret = tag->tostring(buf, MAX_TRACE_LOG_SIZE, pos);
tag = tag->next_;
} }
} if (0 != pos) {
if (0 != pos) { if (pos + 1 < MAX_TRACE_LOG_SIZE) {
if (pos + 1 < MAX_TRACE_LOG_SIZE) { buf[pos++] = ']';
buf[pos++] = ']'; buf[pos++] = 0;
buf[pos++] = 0; } else {
} else { buf[MAX_TRACE_LOG_SIZE - 2] = ']';
buf[MAX_TRACE_LOG_SIZE - 2] = ']'; buf[MAX_TRACE_LOG_SIZE - 1] = 0;
buf[MAX_TRACE_LOG_SIZE - 1] = 0; }
} }
INIT_SPAN(span->source_span_);
_OBTRACE_LOG(INFO,
TRACE_PATTERN "%s}",
UUID_TOSTRING(trace.get_trace_id()),
__span_type_mapper[span->span_type_],
UUID_TOSTRING(span->span_id_),
span->start_ts_,
span->end_ts_,
UUID_TOSTRING(OB_ISNULL(span->source_span_) ? OBTRACE->get_root_span_id() : span->source_span_->span_id_),
span->is_follow_ ? "true" : "false",
buf);
buf[0] = '\0';
IGNORE_RETURN sql::handle_span_record(sql::get_flt_span_manager(), buf, pos, span);
if (0 != span->end_ts_) {
current_span.remove(span);
trace.freed_span_.add_first(span);
}
span->tags_ = nullptr;
} }
INIT_SPAN(span->source_span_); span = next;
_OBTRACE_LOG(INFO,
TRACE_PATTERN "%s}",
UUID_TOSTRING(trace.get_trace_id()),
__span_type_mapper[span->span_type_],
UUID_TOSTRING(span->span_id_),
span->start_ts_,
span->end_ts_,
UUID_TOSTRING(OB_ISNULL(span->source_span_) ? OBTRACE->get_root_span_id() : span->source_span_->span_id_),
span->is_follow_ ? "true" : "false",
buf);
buf[0] = '\0';
IGNORE_RETURN sql::handle_span_record(sql::get_flt_span_manager(), buf, pos, span);
if (0 != span->end_ts_) {
current_span.remove(span);
trace.freed_span_.add_first(span);
}
span->tags_ = nullptr;
} }
span = next; };
} PRINT_WITH_TRACE_MODE(OBTRACE, INFO, func());
PRINT_OB_LOG_TRACE_BUF(OBTRACE, INFO);
if (need_cancle_trace_mode) {
CANCLE_OB_LOG_TRACE_MODE();
}
trace.offset_ = trace.buffer_size_ / 2; trace.offset_ = trace.buffer_size_ / 2;
} }
} }

View File

@ -614,6 +614,8 @@ int MockTenantModuleEnv::init_before_start_mtl()
STORAGE_LOG(ERROR, "failed to init bandwidth_throttle_", K(ret)); STORAGE_LOG(ERROR, "failed to init bandwidth_throttle_", K(ret));
} else if (OB_FAIL(TG_START(lib::TGDefIDs::ServerGTimer))) { } else if (OB_FAIL(TG_START(lib::TGDefIDs::ServerGTimer))) {
STORAGE_LOG(ERROR, "init timer fail", KR(ret)); STORAGE_LOG(ERROR, "init timer fail", KR(ret));
} else if (OB_FAIL(TG_START(lib::TGDefIDs::MemDumpTimer))) {
STORAGE_LOG(ERROR, "init memory dump timer fail", KR(ret));
} else if (OB_FAIL(ObOptStatMonitorManager::get_instance().init(&sql_proxy_))) { } else if (OB_FAIL(ObOptStatMonitorManager::get_instance().init(&sql_proxy_))) {
STORAGE_LOG(ERROR, "failed to init opt stat monitor manager", KR(ret)); STORAGE_LOG(ERROR, "failed to init opt stat monitor manager", KR(ret));
} else { } else {
@ -761,6 +763,11 @@ void MockTenantModuleEnv::destroy()
TG_WAIT(lib::TGDefIDs::ServerGTimer); TG_WAIT(lib::TGDefIDs::ServerGTimer);
TG_DESTROY(lib::TGDefIDs::ServerGTimer); TG_DESTROY(lib::TGDefIDs::ServerGTimer);
TG_STOP(lib::TGDefIDs::MemDumpTimer);
TG_WAIT(lib::TGDefIDs::MemDumpTimer);
TG_DESTROY(lib::TGDefIDs::MemDumpTimer);
destroyed_ = true; destroyed_ = true;
chdir(curr_dir_); chdir(curr_dir_);

View File

@ -502,6 +502,10 @@ void ObServer::destroy()
TG_DESTROY(lib::TGDefIDs::CTASCleanUpTimer); TG_DESTROY(lib::TGDefIDs::CTASCleanUpTimer);
FLOG_INFO("ctas clean up timer destroyed"); FLOG_INFO("ctas clean up timer destroyed");
FLOG_INFO("begin to destroy memory dump timer");
TG_DESTROY(lib::TGDefIDs::MemDumpTimer);
FLOG_INFO("memory dump timer destroyed");
FLOG_INFO("begin to destroy redef heart beat task"); FLOG_INFO("begin to destroy redef heart beat task");
TG_DESTROY(lib::TGDefIDs::RedefHeartBeatTask); TG_DESTROY(lib::TGDefIDs::RedefHeartBeatTask);
FLOG_INFO("redef heart beat task destroyed"); FLOG_INFO("redef heart beat task destroyed");
@ -1039,6 +1043,10 @@ int ObServer::stop()
TG_STOP(lib::TGDefIDs::CTASCleanUpTimer); TG_STOP(lib::TGDefIDs::CTASCleanUpTimer);
FLOG_INFO("ctas clean up timer stopped"); FLOG_INFO("ctas clean up timer stopped");
FLOG_INFO("begin to stop memory dump timer");
TG_STOP(lib::TGDefIDs::MemDumpTimer);
FLOG_INFO("memory dump timer stopped");
FLOG_INFO("begin to stop ctas clean up timer"); FLOG_INFO("begin to stop ctas clean up timer");
TG_STOP(lib::TGDefIDs::HeartBeatCheckTask); TG_STOP(lib::TGDefIDs::HeartBeatCheckTask);
FLOG_INFO("ctas clean up timer stopped"); FLOG_INFO("ctas clean up timer stopped");
@ -1239,6 +1247,10 @@ int ObServer::wait()
TG_WAIT(lib::TGDefIDs::CTASCleanUpTimer); TG_WAIT(lib::TGDefIDs::CTASCleanUpTimer);
FLOG_INFO("wait ctas clean up timer success"); FLOG_INFO("wait ctas clean up timer success");
FLOG_INFO("begin to wait memory dump timer");
TG_WAIT(lib::TGDefIDs::MemDumpTimer);
FLOG_INFO("wait memory dump timer success");
FLOG_INFO("begin to wait root service"); FLOG_INFO("begin to wait root service");
root_service_.wait(); root_service_.wait();
FLOG_INFO("wait root service success"); FLOG_INFO("wait root service success");
@ -1527,6 +1539,8 @@ int ObServer::init_config()
LOG_ERROR("fail to init server trace timer", KR(ret)); LOG_ERROR("fail to init server trace timer", KR(ret));
} else if (OB_FAIL(TG_START(lib::TGDefIDs::CTASCleanUpTimer))) { } else if (OB_FAIL(TG_START(lib::TGDefIDs::CTASCleanUpTimer))) {
LOG_ERROR("fail to init ctas clean up timer", KR(ret)); LOG_ERROR("fail to init ctas clean up timer", KR(ret));
} else if (OB_FAIL(TG_START(lib::TGDefIDs::MemDumpTimer))) {
LOG_ERROR("fail to init memory dump timer", KR(ret));
} else if (OB_FAIL(config_mgr_.base_init())) { } else if (OB_FAIL(config_mgr_.base_init())) {
LOG_ERROR("config_mgr_ base_init failed", KR(ret)); LOG_ERROR("config_mgr_ base_init failed", KR(ret));
} else if (OB_FAIL(config_mgr_.init(sql_proxy_, self_addr_))) { } else if (OB_FAIL(config_mgr_.init(sql_proxy_, self_addr_))) {

View File

@ -390,7 +390,7 @@ int ObMultiTenant::start()
} else if (OB_FAIL(ObTenantNodeBalancer::get_instance().start())) { } else if (OB_FAIL(ObTenantNodeBalancer::get_instance().start())) {
LOG_ERROR("start tenant node balancer thread failed", K(ret)); LOG_ERROR("start tenant node balancer thread failed", K(ret));
// start memstore print timer. // start memstore print timer.
} else if (OB_FAIL(printer.register_timer_task(lib::TGDefIDs::ServerGTimer))) { } else if (OB_FAIL(printer.register_timer_task(lib::TGDefIDs::MemDumpTimer))) {
LOG_ERROR("Fail to register timer task", K(ret)); LOG_ERROR("Fail to register timer task", K(ret));
} else { } else {
LOG_INFO("succ to start multi tenant"); LOG_INFO("succ to start multi tenant");

View File

@ -27,6 +27,13 @@ using namespace oceanbase::lib;
using namespace oceanbase::common; using namespace oceanbase::common;
namespace oceanbase { namespace oceanbase {
namespace common {
void allow_next_syslog(int64_t count)
{
share::ObTaskController::get().allow_next_syslog(count);
}
} // common
namespace share { namespace share {
class ObSyslogPerErrLimiter : public common::ObISyslogRateLimiter class ObSyslogPerErrLimiter : public common::ObISyslogRateLimiter

View File

@ -128,4 +128,5 @@ TG_DEF(CDCService, CDCSrv, "", TG_STATIC, OB_THREAD_POOL, ThreadCountPair(1, 1))
TG_DEF(LogUpdater, LogUpdater, "", TG_STATIC, TIMER) TG_DEF(LogUpdater, LogUpdater, "", TG_STATIC, TIMER)
TG_DEF(HeartBeatCheckTask, HeartBeatCheckTask, "", TG_STATIC, TIMER) TG_DEF(HeartBeatCheckTask, HeartBeatCheckTask, "", TG_STATIC, TIMER)
TG_DEF(RedefHeartBeatTask, RedefHeartBeatTask, "", TG_STATIC, TIMER) TG_DEF(RedefHeartBeatTask, RedefHeartBeatTask, "", TG_STATIC, TIMER)
TG_DEF(MemDumpTimer, MemDumpTimer, "", TG_STATIC, TIMER)
#endif #endif

View File

@ -25,12 +25,11 @@ namespace oceanbase
using namespace share; using namespace share;
namespace storage namespace storage
{ {
void ObPrintTenantMemoryUsage::runTimerTask() void ObPrintTenantMemoryUsage::runTimerTask()
{ {
LOG_INFO("=== Run print tenant memory usage task ==="); LOG_INFO("=== Run print tenant memory usage task ===");
ObTenantMemoryPrinter &printer = ObTenantMemoryPrinter::get_instance(); ObTenantMemoryPrinter &printer = ObTenantMemoryPrinter::get_instance();
printer.print_tenant_usage(); PRINT_WITH_TRACE_MODE(LIB, INFO, printer.print_tenant_usage());
ObObjFreeListList::get_freelists().dump(); ObObjFreeListList::get_freelists().dump();
} }