Files
doris/be/src/common/daemon.cpp
Xinyi Zou 01d012bab7 [fix](memory) Remove page cache regular clear, disabled jemalloc prof by default (#18218)
Remove page cache regular clear
Now the page cache is turned off by default. If the user manually opens the page cache, it can be considered that the user can accept the memory usage of the page cache, and then can consider adding a manual clear command to the cache.

fix memory gc cancel top memory query

jemalloc prof is not enabled by default
2023-03-30 09:39:37 +08:00

484 lines
20 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "common/daemon.h"
#include <gflags/gflags.h>
#include <gperftools/malloc_extension.h>
#include <signal.h>
#include "common/config.h"
#include "common/logging.h"
#include "exprs/math_functions.h"
#include "exprs/string_functions.h"
#include "olap/options.h"
#include "olap/storage_engine.h"
#include "runtime/block_spill_manager.h"
#include "runtime/exec_env.h"
#include "runtime/fragment_mgr.h"
#include "runtime/load_channel_mgr.h"
#include "runtime/memory/chunk_allocator.h"
#include "runtime/user_function_cache.h"
#include "service/backend_options.h"
#include "util/cpu_info.h"
#include "util/debug_util.h"
#include "util/disk_info.h"
#include "util/doris_metrics.h"
#include "util/mem_info.h"
#include "util/network_util.h"
#include "util/system_metrics.h"
#include "util/thrift_util.h"
#include "util/time.h"
namespace doris {
bool k_doris_exit = false;
void Daemon::tcmalloc_gc_thread() {
// TODO All cache GC wish to be supported
#if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER) && \
!defined(USE_JEMALLOC)
// Limit size of tcmalloc cache via release_rate and max_cache_percent.
// We adjust release_rate according to memory_pressure, which is usage percent of memory.
int64_t max_cache_percent = 60;
double release_rates[10] = {1.0, 1.0, 1.0, 5.0, 5.0, 20.0, 50.0, 100.0, 500.0, 2000.0};
int64_t pressure_limit = 90;
bool is_performance_mode = false;
int64_t physical_limit_bytes =
std::min(MemInfo::physical_mem() - MemInfo::sys_mem_available_low_water_mark(),
MemInfo::mem_limit());
if (config::memory_mode == std::string("performance")) {
max_cache_percent = 100;
pressure_limit = 90;
is_performance_mode = true;
physical_limit_bytes = std::min(MemInfo::mem_limit(), MemInfo::physical_mem());
} else if (config::memory_mode == std::string("compact")) {
max_cache_percent = 20;
pressure_limit = 80;
}
int last_ms = 0;
const int kMaxLastMs = 30000;
const int kIntervalMs = 10;
size_t init_aggressive_decommit = 0;
size_t current_aggressive_decommit = 0;
size_t expected_aggressive_decommit = 0;
int64_t last_memory_pressure = 0;
MallocExtension::instance()->GetNumericProperty("tcmalloc.aggressive_memory_decommit",
&init_aggressive_decommit);
current_aggressive_decommit = init_aggressive_decommit;
while (!_stop_background_threads_latch.wait_for(std::chrono::milliseconds(kIntervalMs))) {
size_t tc_used_bytes = 0;
size_t tc_alloc_bytes = 0;
size_t rss = PerfCounters::get_vm_rss();
MallocExtension::instance()->GetNumericProperty("generic.total_physical_bytes",
&tc_alloc_bytes);
MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes",
&tc_used_bytes);
int64_t tc_cached_bytes = (int64_t)tc_alloc_bytes - (int64_t)tc_used_bytes;
int64_t to_free_bytes =
(int64_t)tc_cached_bytes - ((int64_t)tc_used_bytes * max_cache_percent / 100);
to_free_bytes = std::max(to_free_bytes, (int64_t)0);
int64_t memory_pressure = 0;
int64_t rss_pressure = 0;
int64_t alloc_bytes = std::max(rss, tc_alloc_bytes);
memory_pressure = alloc_bytes * 100 / physical_limit_bytes;
rss_pressure = rss * 100 / physical_limit_bytes;
expected_aggressive_decommit = init_aggressive_decommit;
if (memory_pressure > pressure_limit) {
// We are reaching oom, so release cache aggressively.
// Ideally, we should reuse cache and not allocate from system any more,
// however, it is hard to set limit on cache of tcmalloc and doris
// use mmap in vectorized mode.
// Limit cache capactiy is enough.
if (rss_pressure > pressure_limit) {
int64_t min_free_bytes = alloc_bytes - physical_limit_bytes * 9 / 10;
to_free_bytes = std::max(to_free_bytes, min_free_bytes);
to_free_bytes = std::max(to_free_bytes, tc_cached_bytes * 30 / 100);
// We assure that we have at least 500M bytes in cache.
to_free_bytes = std::min(to_free_bytes, tc_cached_bytes - 500 * 1024 * 1024);
expected_aggressive_decommit = 1;
}
last_ms = kMaxLastMs;
} else if (memory_pressure > (pressure_limit - 10)) {
// In most cases, adjusting release rate is enough, if memory are consumed quickly
// we should release manually.
if (last_memory_pressure <= (pressure_limit - 10)) {
to_free_bytes = std::max(to_free_bytes, tc_cached_bytes * 10 / 100);
}
}
int release_rate_index = memory_pressure / 10;
double release_rate = 1.0;
if (release_rate_index >= sizeof(release_rates) / sizeof(release_rates[0])) {
release_rate = 2000.0;
} else {
release_rate = release_rates[release_rate_index];
}
MallocExtension::instance()->SetMemoryReleaseRate(release_rate);
if ((current_aggressive_decommit != expected_aggressive_decommit) && !is_performance_mode) {
MallocExtension::instance()->SetNumericProperty("tcmalloc.aggressive_memory_decommit",
expected_aggressive_decommit);
current_aggressive_decommit = expected_aggressive_decommit;
}
last_memory_pressure = memory_pressure;
// We release at least 2% bytes once, frequent releasing hurts performance.
if (to_free_bytes > (physical_limit_bytes * 2 / 100)) {
last_ms += kIntervalMs;
if (last_ms >= kMaxLastMs) {
LOG(INFO) << "generic.current_allocated_bytes " << tc_used_bytes
<< ", generic.total_physical_bytes " << tc_alloc_bytes << ", rss " << rss
<< ", max_cache_percent " << max_cache_percent << ", release_rate "
<< release_rate << ", memory_pressure " << memory_pressure
<< ", physical_limit_bytes " << physical_limit_bytes << ", to_free_bytes "
<< to_free_bytes << ", current_aggressive_decommit "
<< current_aggressive_decommit;
MallocExtension::instance()->ReleaseToSystem(to_free_bytes);
last_ms = 0;
}
} else {
last_ms = 0;
}
}
#endif
}
void Daemon::memory_maintenance_thread() {
int32_t interval_milliseconds = config::memory_maintenance_sleep_time_ms;
int64_t last_print_proc_mem = PerfCounters::get_vm_rss();
while (!_stop_background_threads_latch.wait_for(
std::chrono::milliseconds(interval_milliseconds))) {
if (!MemInfo::initialized()) {
continue;
}
// Refresh process memory metrics.
doris::PerfCounters::refresh_proc_status();
doris::MemInfo::refresh_proc_meminfo();
doris::MemInfo::refresh_proc_mem_no_allocator_cache();
// Update and print memory stat when the memory changes by 100M.
if (abs(last_print_proc_mem - PerfCounters::get_vm_rss()) > 104857600) {
last_print_proc_mem = PerfCounters::get_vm_rss();
doris::MemTrackerLimiter::enable_print_log_process_usage();
// Refresh mem tracker each type counter.
doris::MemTrackerLimiter::refresh_global_counter();
// Refresh allocator memory metrics.
#if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER)
doris::MemInfo::refresh_allocator_mem();
if (config::enable_system_metrics) {
DorisMetrics::instance()->system_metrics()->update_allocator_metrics();
}
#endif
if (doris::config::memory_debug) {
LOG(INFO) << MemTrackerLimiter::process_mem_log_str();
LOG_EVERY_N(INFO, 10)
<< doris::MemTrackerLimiter::log_process_usage_str("memory debug", false);
}
}
}
}
void Daemon::memory_gc_thread() {
int32_t interval_milliseconds = config::memory_maintenance_sleep_time_ms;
int32_t memory_minor_gc_sleep_time_ms = 0;
int32_t memory_full_gc_sleep_time_ms = 0;
while (!_stop_background_threads_latch.wait_for(
std::chrono::milliseconds(interval_milliseconds))) {
if (!MemInfo::initialized() || !ExecEnv::GetInstance()->initialized()) {
continue;
}
if (memory_full_gc_sleep_time_ms <= 0 &&
(doris::MemInfo::sys_mem_available() <
doris::MemInfo::sys_mem_available_low_water_mark() ||
doris::MemInfo::proc_mem_no_allocator_cache() >= doris::MemInfo::mem_limit())) {
// No longer full gc and minor gc during sleep.
memory_full_gc_sleep_time_ms = config::memory_gc_sleep_time_s * 1000;
memory_minor_gc_sleep_time_ms = config::memory_gc_sleep_time_s * 1000;
doris::MemTrackerLimiter::print_log_process_usage("process full gc", false);
if (doris::MemInfo::process_full_gc()) {
// If there is not enough memory to be gc, the process memory usage will not be printed in the next continuous gc.
doris::MemTrackerLimiter::enable_print_log_process_usage();
}
} else if (memory_minor_gc_sleep_time_ms <= 0 &&
(doris::MemInfo::sys_mem_available() <
doris::MemInfo::sys_mem_available_warning_water_mark() ||
doris::MemInfo::proc_mem_no_allocator_cache() >=
doris::MemInfo::soft_mem_limit())) {
// No minor gc during sleep, but full gc is possible.
memory_minor_gc_sleep_time_ms = config::memory_gc_sleep_time_s * 1000;
doris::MemTrackerLimiter::print_log_process_usage("process minor gc", false);
if (doris::MemInfo::process_minor_gc()) {
doris::MemTrackerLimiter::enable_print_log_process_usage();
}
} else {
if (memory_full_gc_sleep_time_ms > 0) {
memory_full_gc_sleep_time_ms -= interval_milliseconds;
}
if (memory_minor_gc_sleep_time_ms > 0) {
memory_minor_gc_sleep_time_ms -= interval_milliseconds;
}
}
}
}
void Daemon::load_channel_tracker_refresh_thread() {
// Refresh the memory statistics of the load channel tracker more frequently,
// which helps to accurately control the memory of LoadChannelMgr.
while (!_stop_background_threads_latch.wait_for(
std::chrono::milliseconds(config::load_channel_memory_refresh_sleep_time_ms))) {
if (ExecEnv::GetInstance()->initialized()) {
doris::ExecEnv::GetInstance()->load_channel_mgr()->refresh_mem_tracker();
}
}
}
void Daemon::memory_tracker_profile_refresh_thread() {
while (!_stop_background_threads_latch.wait_for(std::chrono::milliseconds(50))) {
MemTracker::refresh_all_tracker_profile();
MemTrackerLimiter::refresh_all_tracker_profile();
}
}
/*
* this thread will calculate some metrics at a fix interval(15 sec)
* 1. push bytes per second
* 2. scan bytes per second
* 3. max io util of all disks
* 4. max network send bytes rate
* 5. max network receive bytes rate
*/
void Daemon::calculate_metrics_thread() {
int64_t last_ts = -1L;
int64_t lst_query_bytes = -1;
std::map<std::string, int64_t> lst_disks_io_time;
std::map<std::string, int64_t> lst_net_send_bytes;
std::map<std::string, int64_t> lst_net_receive_bytes;
do {
DorisMetrics::instance()->metric_registry()->trigger_all_hooks(true);
if (last_ts == -1L) {
last_ts = GetMonoTimeMicros() / 1000;
lst_query_bytes = DorisMetrics::instance()->query_scan_bytes->value();
if (config::enable_system_metrics) {
DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time);
DorisMetrics::instance()->system_metrics()->get_network_traffic(
&lst_net_send_bytes, &lst_net_receive_bytes);
}
} else {
int64_t current_ts = GetMonoTimeMicros() / 1000;
long interval = (current_ts - last_ts) / 1000;
last_ts = current_ts;
// 1. query bytes per second
int64_t current_query_bytes = DorisMetrics::instance()->query_scan_bytes->value();
int64_t qps = (current_query_bytes - lst_query_bytes) / (interval + 1);
DorisMetrics::instance()->query_scan_bytes_per_second->set_value(qps < 0 ? 0 : qps);
lst_query_bytes = current_query_bytes;
if (config::enable_system_metrics) {
// 2. max disk io util
DorisMetrics::instance()->system_metrics()->update_max_disk_io_util_percent(
lst_disks_io_time, 15);
// update lst map
DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time);
// 3. max network traffic
int64_t max_send = 0;
int64_t max_receive = 0;
DorisMetrics::instance()->system_metrics()->get_max_net_traffic(
lst_net_send_bytes, lst_net_receive_bytes, 15, &max_send, &max_receive);
DorisMetrics::instance()->system_metrics()->update_max_network_send_bytes_rate(
max_send);
DorisMetrics::instance()->system_metrics()->update_max_network_receive_bytes_rate(
max_receive);
// update lst map
DorisMetrics::instance()->system_metrics()->get_network_traffic(
&lst_net_send_bytes, &lst_net_receive_bytes);
}
DorisMetrics::instance()->all_rowsets_num->set_value(
StorageEngine::instance()->tablet_manager()->get_rowset_nums());
DorisMetrics::instance()->all_segments_num->set_value(
StorageEngine::instance()->tablet_manager()->get_segment_nums());
}
} while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(15)));
}
// clean up stale spilled files
void Daemon::block_spill_gc_thread() {
while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(60))) {
if (ExecEnv::GetInstance()->initialized()) {
ExecEnv::GetInstance()->block_spill_mgr()->gc(200);
}
}
}
static void init_doris_metrics(const std::vector<StorePath>& store_paths) {
bool init_system_metrics = config::enable_system_metrics;
std::set<std::string> disk_devices;
std::vector<std::string> network_interfaces;
std::vector<std::string> paths;
for (auto& store_path : store_paths) {
paths.emplace_back(store_path.path);
}
if (init_system_metrics) {
auto st = DiskInfo::get_disk_devices(paths, &disk_devices);
if (!st.ok()) {
LOG(WARNING) << "get disk devices failed, status=" << st;
return;
}
st = get_inet_interfaces(&network_interfaces, BackendOptions::is_bind_ipv6());
if (!st.ok()) {
LOG(WARNING) << "get inet interfaces failed, status=" << st;
return;
}
}
DorisMetrics::instance()->initialize(init_system_metrics, disk_devices, network_interfaces);
}
void signal_handler(int signal) {
if (signal == SIGINT || signal == SIGTERM) {
k_doris_exit = true;
}
}
int install_signal(int signo, void (*handler)(int)) {
struct sigaction sa;
memset(&sa, 0, sizeof(struct sigaction));
sa.sa_handler = handler;
sigemptyset(&sa.sa_mask);
auto ret = sigaction(signo, &sa, nullptr);
if (ret != 0) {
char buf[64];
LOG(ERROR) << "install signal failed, signo=" << signo << ", errno=" << errno
<< ", errmsg=" << strerror_r(errno, buf, sizeof(buf));
}
return ret;
}
void init_signals() {
auto ret = install_signal(SIGINT, signal_handler);
if (ret < 0) {
exit(-1);
}
ret = install_signal(SIGTERM, signal_handler);
if (ret < 0) {
exit(-1);
}
}
void Daemon::init(int argc, char** argv, const std::vector<StorePath>& paths) {
// google::SetVersionString(get_build_version(false));
// google::ParseCommandLineFlags(&argc, &argv, true);
google::ParseCommandLineFlags(&argc, &argv, true);
init_glog("be");
LOG(INFO) << get_version_string(false);
init_thrift_logging();
CpuInfo::init();
DiskInfo::init();
MemInfo::init();
UserFunctionCache::instance()->init(config::user_function_dir);
LOG(INFO) << CpuInfo::debug_string();
LOG(INFO) << DiskInfo::debug_string();
LOG(INFO) << MemInfo::debug_string();
init_doris_metrics(paths);
init_signals();
}
void Daemon::start() {
Status st;
st = Thread::create(
"Daemon", "tcmalloc_gc_thread", [this]() { this->tcmalloc_gc_thread(); },
&_tcmalloc_gc_thread);
CHECK(st.ok()) << st;
st = Thread::create(
"Daemon", "memory_maintenance_thread", [this]() { this->memory_maintenance_thread(); },
&_memory_maintenance_thread);
CHECK(st.ok()) << st;
st = Thread::create(
"Daemon", "memory_gc_thread", [this]() { this->memory_gc_thread(); },
&_memory_gc_thread);
CHECK(st.ok()) << st;
st = Thread::create(
"Daemon", "load_channel_tracker_refresh_thread",
[this]() { this->load_channel_tracker_refresh_thread(); },
&_load_channel_tracker_refresh_thread);
CHECK(st.ok()) << st;
st = Thread::create(
"Daemon", "memory_tracker_profile_refresh_thread",
[this]() { this->memory_tracker_profile_refresh_thread(); },
&_memory_tracker_profile_refresh_thread);
CHECK(st.ok()) << st;
if (config::enable_metric_calculator) {
st = Thread::create(
"Daemon", "calculate_metrics_thread",
[this]() { this->calculate_metrics_thread(); }, &_calculate_metrics_thread);
CHECK(st.ok()) << st;
}
st = Thread::create(
"Daemon", "block_spill_gc_thread", [this]() { this->block_spill_gc_thread(); },
&_block_spill_gc_thread);
CHECK(st.ok()) << st;
}
void Daemon::stop() {
_stop_background_threads_latch.count_down();
if (_tcmalloc_gc_thread) {
_tcmalloc_gc_thread->join();
}
if (_memory_maintenance_thread) {
_memory_maintenance_thread->join();
}
if (_memory_gc_thread) {
_memory_gc_thread->join();
}
if (_load_channel_tracker_refresh_thread) {
_load_channel_tracker_refresh_thread->join();
}
if (_memory_tracker_profile_refresh_thread) {
_memory_tracker_profile_refresh_thread->join();
}
if (_calculate_metrics_thread) {
_calculate_metrics_thread->join();
}
if (_block_spill_gc_thread) {
_block_spill_gc_thread->join();
}
}
} // namespace doris