[branch-2.1](memory) Modify thedefault JEMALLOC_CONF and support flush Jemalloc tcache (#39829)

pick #38185
This commit is contained in:
Xinyi Zou
2024-08-23 17:21:42 +08:00
committed by GitHub
parent c40246efa9
commit baf5b71b39
10 changed files with 5766 additions and 6 deletions

View File

@ -92,4 +92,5 @@ header:
- "pytest/qe"
- "pytest/sys/data"
- "pytest/deploy/*.conf"
- "tools/jeprof"
comment: on-failure

View File

@ -143,8 +143,11 @@ public:
if (config::enable_je_purge_dirty_pages) {
try {
// Purge all unused dirty pages for arena <i>, or for all arenas if <i> equals MALLCTL_ARENAS_ALL.
jemallctl(fmt::format("arena.{}.purge", MALLCTL_ARENAS_ALL).c_str(), nullptr,
nullptr, nullptr, 0);
int err = jemallctl(fmt::format("arena.{}.purge", MALLCTL_ARENAS_ALL).c_str(),
nullptr, nullptr, nullptr, 0);
if (err) {
LOG(WARNING) << "Jemalloc purge all unused dirty pages failed";
}
} catch (...) {
LOG(WARNING) << "Purge all unused dirty pages for all arenas failed";
}
@ -152,6 +155,22 @@ public:
#endif
}
// the limit of `tcache` is the number of pages, not the total number of page bytes.
// `tcache` has two cleaning opportunities: 1. the number of memory alloc and releases reaches a certain number,
// recycle pages that has not been used for a long time; 2. recycle all `tcache` when the thread exits.
// here add a total size limit.
static inline void je_thread_tcache_flush() {
#ifdef USE_JEMALLOC
constexpr size_t TCACHE_LIMIT = (1ULL << 30); // 1G
if (allocator_cache_mem() - je_dirty_pages_mem() > TCACHE_LIMIT) {
int err = jemallctl("thread.tcache.flush", nullptr, nullptr, nullptr, 0);
if (err) {
LOG(WARNING) << "Jemalloc thread.tcache.flush failed";
}
}
#endif
}
static std::mutex je_purge_dirty_pages_lock;
static std::condition_variable je_purge_dirty_pages_cv;
static std::atomic<bool> je_purge_dirty_pages_notify;

View File

@ -89,6 +89,8 @@ void Allocator<clear_memory_, mmap_populate, use_mmap, MemoryAllocator>::sys_mem
print_id(doris::thread_context()->task_id()),
doris::thread_context()->get_thread_id(),
doris::config::thread_wait_gc_max_milliseconds, err_msg);
// only query thread exceeded memory limit for the first time and wait_gc is true.
doris::MemInfo::je_thread_tcache_flush();
if (!doris::config::disable_memory_gc) {
while (wait_milliseconds < doris::config::thread_wait_gc_max_milliseconds) {
std::this_thread::sleep_for(std::chrono::milliseconds(100));

View File

@ -280,7 +280,7 @@ export LIBHDFS_OPTS="${final_java_opt}"
#echo "LIBHDFS_OPTS: ${LIBHDFS_OPTS}"
# see https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile
export JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:30000,dirty_decay_ms:30000,oversize_threshold:0,lg_tcache_max:16,prof_prefix:jeprof.out"
export JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1"
export AWS_EC2_METADATA_DISABLED=true
export AWS_MAX_ATTEMPTS=2

View File

@ -358,7 +358,7 @@ export LIBHDFS_OPTS="${final_java_opt}"
# log "LIBHDFS_OPTS: ${LIBHDFS_OPTS}"
if [[ -z ${JEMALLOC_CONF} ]]; then
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,lg_tcache_max:20,prof:false,lg_prof_interval:32,lg_prof_sample:19,prof_gdump:false,prof_accum:false,prof_leak:false,prof_final:false"
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1"
fi
if [[ -z ${JEMALLOC_PROF_PRFIX} ]]; then

View File

@ -33,8 +33,8 @@ JAVA_OPTS_FOR_JDK_17="-Xmx1024m -DlogPath=$LOG_DIR/jni.log -Xlog:gc:$LOG_DIR/be.
# https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile
# https://jemalloc.net/jemalloc.3.html
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,prof:false,lg_prof_interval:32,lg_prof_sample:19,prof_gdump:false,prof_accum:false,prof_leak:false,prof_final:false"
JEMALLOC_PROF_PRFIX=""
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1"
JEMALLOC_PROF_PRFIX="jemalloc_heap_profile_"
# ports for admin, web, heartbeat service
be_port = 9060

View File

@ -20,6 +20,11 @@ PPROF_TMPDIR="$DORIS_HOME/log/"
# For jdk 17+, this JAVA_OPTS will be used as default JVM options
JAVA_OPTS_FOR_JDK_17="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xlog:gc:$DORIS_HOME/log/be.gc.log.$CUR_DATE -Djavax.security.auth.useSubjectCredsOnly=false -Dsun.security.krb5.debug=true -Dsun.java.command=DorisBE -XX:-CriticalJNINatives --add-opens=java.base/java.net=ALL-UNNAMED"
# https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile
# https://jemalloc.net/jemalloc.3.html
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1"
JEMALLOC_PROF_PRFIX="jemalloc_heap_profile_"
# INFO, WARNING, ERROR, FATAL
sys_log_level = INFO

View File

@ -20,6 +20,11 @@ PPROF_TMPDIR="$DORIS_HOME/log/"
# For jdk 17+, this JAVA_OPTS will be used as default JVM options
JAVA_OPTS_FOR_JDK_17="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xlog:gc:$DORIS_HOME/log/be.gc.log.$CUR_DATE -Djavax.security.auth.useSubjectCredsOnly=false -Dsun.security.krb5.debug=true -Dsun.java.command=DorisBE -XX:-CriticalJNINatives --add-opens=java.base/java.net=ALL-UNNAMED"
# https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile
# https://jemalloc.net/jemalloc.3.html
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1"
JEMALLOC_PROF_PRFIX="jemalloc_heap_profile_"
# INFO, WARNING, ERROR, FATAL
sys_log_level = INFO

View File

@ -17,6 +17,11 @@
PPROF_TMPDIR="$DORIS_HOME/log/"
# https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile
# https://jemalloc.net/jemalloc.3.html
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1"
JEMALLOC_PROF_PRFIX="jemalloc_heap_profile_"
# INFO, WARNING, ERROR, FATAL
sys_log_level = INFO
sys_log_verbose_modules = vrow_distribution

5723
tools/jeprof Executable file

File diff suppressed because it is too large Load Diff