diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 7c64596bac..cfb002a4c4 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -245,7 +245,7 @@ void Daemon::memory_gc_thread() { // No longer full gc and minor gc during sleep. memory_full_gc_sleep_time_ms = config::memory_gc_sleep_time_ms; memory_minor_gc_sleep_time_ms = config::memory_gc_sleep_time_ms; - doris::MemTrackerLimiter::print_log_process_usage("process full gc", false); + doris::MemTrackerLimiter::print_log_process_usage("Start Full GC", false); if (doris::MemInfo::process_full_gc()) { // If there is not enough memory to be gc, the process memory usage will not be printed in the next continuous gc. doris::MemTrackerLimiter::enable_print_log_process_usage(); @@ -255,7 +255,7 @@ void Daemon::memory_gc_thread() { proc_mem_no_allocator_cache >= doris::MemInfo::soft_mem_limit())) { // No minor gc during sleep, but full gc is possible. memory_minor_gc_sleep_time_ms = config::memory_gc_sleep_time_ms; - doris::MemTrackerLimiter::print_log_process_usage("process minor gc", false); + doris::MemTrackerLimiter::print_log_process_usage("Start Minor GC", false); if (doris::MemInfo::process_minor_gc()) { doris::MemTrackerLimiter::enable_print_log_process_usage(); } diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp index d03bd1ac00..683971ecac 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.cpp +++ b/be/src/runtime/memory/mem_tracker_limiter.cpp @@ -137,7 +137,7 @@ void MemTrackerLimiter::make_process_snapshots(std::vector process_mem_sum += it.second->current_value(); } - snapshot.type = "tc/jemalloc_cache"; + snapshot.type = "tc/jemalloc_free_memory"; snapshot.label = ""; snapshot.limit = -1; snapshot.cur_consumption = MemInfo::allocator_cache_mem(); diff --git a/be/src/util/mem_info.cpp b/be/src/util/mem_info.cpp index f50f41e198..200d346ded 100644 --- a/be/src/util/mem_info.cpp +++ b/be/src/util/mem_info.cpp @@ -77,14 +77,18 @@ int64_t MemInfo::_s_process_full_gc_size = -1; void MemInfo::refresh_allocator_mem() { #if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER) #elif defined(USE_JEMALLOC) + // 'epoch' is a special mallctl -- it updates the statistics. Without it, all + // the following calls will return stale values. It increments and returns + // the current epoch number, which might be useful to log as a sanity check. uint64_t epoch = 0; size_t sz = sizeof(epoch); jemallctl("epoch", &epoch, &sz, &epoch, sz); // https://jemalloc.net/jemalloc.3.html - _s_allocator_cache_mem = - get_je_metrics(fmt::format("stats.arenas.{}.tcache_bytes", MALLCTL_ARENAS_ALL)) + - get_je_metrics("stats.metadata"); + // https://www.bookstack.cn/read/aliyun-rds-core/4a0cdf677f62feb3.md + _s_allocator_cache_mem = get_je_all_arena_metrics("tcache_bytes") + + get_je_metrics("stats.metadata") + + get_je_all_arena_metrics("pdirty") * get_page_size(); _s_allocator_cache_mem_str = PrettyPrinter::print(static_cast(_s_allocator_cache_mem), TUnit::BYTES); _s_virtual_memory_used = get_je_metrics("stats.mapped"); @@ -125,6 +129,7 @@ void MemInfo::process_cache_gc(int64_t& freed_mem) { segment_v2::PRIMARY_KEY_INDEX_PAGE); StoragePageCache::instance()->prune(segment_v2::PRIMARY_KEY_INDEX_PAGE); } + je_purge_all_arena_dirty_pages(); } // step1: free all cache @@ -139,7 +144,8 @@ bool MemInfo::process_minor_gc() { std::string mem_available_str = MemInfo::sys_mem_available_str(); Defer defer {[&]() { - LOG(INFO) << fmt::format("Process Minor GC Free Memory {} Bytes. cost(us): {}", freed_mem, + je_purge_all_arena_dirty_pages(); + LOG(INFO) << fmt::format("End Minor GC, Free Memory {} Bytes. cost(us): {}", freed_mem, watch.elapsed_time() / 1000); }}; @@ -181,7 +187,8 @@ bool MemInfo::process_full_gc() { std::string mem_available_str = MemInfo::sys_mem_available_str(); Defer defer {[&]() { - LOG(INFO) << fmt::format("Process Full GC Free Memory {} Bytes. cost(us): {}", freed_mem, + je_purge_all_arena_dirty_pages(); + LOG(INFO) << fmt::format("End Full GC Free, Memory {} Bytes. cost(us): {}", freed_mem, watch.elapsed_time() / 1000); }}; diff --git a/be/src/util/mem_info.h b/be/src/util/mem_info.h index 12c70d8cc4..89a66b0658 100644 --- a/be/src/util/mem_info.h +++ b/be/src/util/mem_info.h @@ -26,6 +26,12 @@ #include #include +#if !defined(__APPLE__) || !defined(_POSIX_C_SOURCE) +#include +#else +#include +#endif + #include "common/logging.h" #ifdef USE_JEMALLOC #include "jemalloc/jemalloc.h" @@ -46,6 +52,14 @@ public: static inline bool initialized() { return _s_initialized; } + static int get_page_size() { +#if !defined(__APPLE__) || !defined(_POSIX_C_SOURCE) + return getpagesize(); +#else + return vm_page_size; +#endif + } + // Get total physical memory in bytes (if has cgroups memory limits, return the limits). static inline int64_t physical_mem() { DCHECK(_s_initialized); @@ -83,6 +97,22 @@ public: #endif return 0; } + + static inline int64_t get_je_all_arena_metrics(const std::string& name) { +#ifdef USE_JEMALLOC + return get_je_metrics(fmt::format("stats.arenas.{}.{}", MALLCTL_ARENAS_ALL, name)); +#endif + return 0; + } + + static inline void je_purge_all_arena_dirty_pages() { +#ifdef USE_JEMALLOC + // Purge all unused dirty pages for arena , or for all arenas if equals MALLCTL_ARENAS_ALL. + jemallctl(fmt::format("arena.{}.purge", MALLCTL_ARENAS_ALL).c_str(), nullptr, nullptr, + nullptr, 0); +#endif + } + static inline size_t allocator_virtual_mem() { return _s_virtual_memory_used; } static inline size_t allocator_cache_mem() { return _s_allocator_cache_mem; } static inline std::string allocator_cache_mem_str() { return _s_allocator_cache_mem_str; } @@ -94,6 +124,13 @@ public: // obtained by the process malloc, not the physical memory actually used by the process in the OS. static void refresh_allocator_mem(); + /** jemalloc pdirty is number of pages within unused extents that are potentially + * dirty, and for which madvise() or similar has not been called. + * + * So they will be subtracted from RSS to make accounting more + * accurate, since those pages are not really RSS but a memory + * that can be used at anytime via jemalloc. + */ static inline void refresh_proc_mem_no_allocator_cache() { _s_proc_mem_no_allocator_cache = PerfCounters::get_vm_rss() - static_cast(_s_allocator_cache_mem); diff --git a/be/src/util/system_metrics.cpp b/be/src/util/system_metrics.cpp index fa8f5a181a..ee7db9494c 100644 --- a/be/src/util/system_metrics.cpp +++ b/be/src/util/system_metrics.cpp @@ -117,6 +117,12 @@ DEFINE_MEMORY_GAUGE_METRIC(jemalloc_metadata_bytes, MetricUnit::BYTES); DEFINE_MEMORY_GAUGE_METRIC(jemalloc_resident_bytes, MetricUnit::BYTES); DEFINE_MEMORY_GAUGE_METRIC(jemalloc_mapped_bytes, MetricUnit::BYTES); DEFINE_MEMORY_GAUGE_METRIC(jemalloc_retained_bytes, MetricUnit::BYTES); +DEFINE_MEMORY_GAUGE_METRIC(jemalloc_tcache_bytes, MetricUnit::BYTES); +DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pactive_num, MetricUnit::NOUNIT); +DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pdirty_num, MetricUnit::NOUNIT); +DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pmuzzy_num, MetricUnit::NOUNIT); +DEFINE_MEMORY_GAUGE_METRIC(jemalloc_dirty_purged_num, MetricUnit::NOUNIT); +DEFINE_MEMORY_GAUGE_METRIC(jemalloc_muzzy_purged_num, MetricUnit::NOUNIT); #endif struct MemoryMetrics { @@ -142,6 +148,12 @@ struct MemoryMetrics { INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_resident_bytes); INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_mapped_bytes); INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_retained_bytes); + INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_tcache_bytes); + INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pactive_num); + INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pdirty_num); + INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pmuzzy_num); + INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_dirty_purged_num); + INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_muzzy_purged_num); #endif } @@ -167,6 +179,12 @@ struct MemoryMetrics { IntGauge* memory_jemalloc_resident_bytes; IntGauge* memory_jemalloc_mapped_bytes; IntGauge* memory_jemalloc_retained_bytes; + IntGauge* memory_jemalloc_tcache_bytes; + IntGauge* memory_jemalloc_pactive_num; + IntGauge* memory_jemalloc_pdirty_num; + IntGauge* memory_jemalloc_pmuzzy_num; + IntGauge* memory_jemalloc_dirty_purged_num; + IntGauge* memory_jemalloc_muzzy_purged_num; #endif }; @@ -457,6 +475,18 @@ void SystemMetrics::update_allocator_metrics() { MemInfo::get_je_metrics("stats.mapped")); _memory_metrics->memory_jemalloc_retained_bytes->set_value( MemInfo::get_je_metrics("stats.retained")); + _memory_metrics->memory_jemalloc_tcache_bytes->set_value( + MemInfo::get_je_all_arena_metrics("tcache_bytes")); + _memory_metrics->memory_jemalloc_pactive_num->set_value( + MemInfo::get_je_all_arena_metrics("pactive")); + _memory_metrics->memory_jemalloc_pdirty_num->set_value( + MemInfo::get_je_all_arena_metrics("pdirty")); + _memory_metrics->memory_jemalloc_pmuzzy_num->set_value( + MemInfo::get_je_all_arena_metrics("pmuzzy")); + _memory_metrics->memory_jemalloc_dirty_purged_num->set_value( + MemInfo::get_je_all_arena_metrics("dirty_purged")); + _memory_metrics->memory_jemalloc_muzzy_purged_num->set_value( + MemInfo::get_je_all_arena_metrics("muzzy_purged")); #else _memory_metrics->memory_tcmalloc_allocated_bytes->set_value( MemInfo::get_tc_metrics("generic.total_physical_bytes"));