From 0b945fe3619cffb7b41333ded81aeee3b8971587 Mon Sep 17 00:00:00 2001 From: Xinyi Zou Date: Tue, 8 Nov 2022 09:52:33 +0800 Subject: [PATCH] [enhancement](memtracker) Refactor mem tracker hierarchy (#13585) mem tracker can be logically divided into 4 layers: 1)process 2)type 3)query/load/compation task etc. 4)exec node etc. type includes enum Type { GLOBAL = 0, // Life cycle is the same as the process, e.g. Cache and default Orphan QUERY = 1, // Count the memory consumption of all Query tasks. LOAD = 2, // Count the memory consumption of all Load tasks. COMPACTION = 3, // Count the memory consumption of all Base and Cumulative tasks. SCHEMA_CHANGE = 4, // Count the memory consumption of all SchemaChange tasks. CLONE = 5, // Count the memory consumption of all EngineCloneTask. Note: Memory that does not contain make/release snapshots. BATCHLOAD = 6, // Count the memory consumption of all EngineBatchLoadTask. CONSISTENCY = 7 // Count the memory consumption of all EngineChecksumTask. } Object pointers are no longer saved between each layer, and the values of process and each type are periodically aggregated. other fix: In [fix](memtracker) Fix transmit_tracker null pointer because phamp is not thread safe #13528, I tried to separate the memory that was manually abandoned in the query from the orphan mem tracker. But in the actual test, the accuracy of this part of the memory cannot be guaranteed, so put it back to the orphan mem tracker again. --- be/src/exec/hash_table.cpp | 2 +- be/src/exec/olap_scan_node.cpp | 2 +- be/src/exec/partitioned_aggregation_node.cc | 8 +- be/src/exec/partitioned_hash_table.cc | 2 +- be/src/exec/tablet_sink.cpp | 6 +- be/src/exec/tablet_sink.h | 4 +- be/src/http/default_path_handlers.cpp | 66 +-- be/src/olap/base_compaction.cpp | 2 +- be/src/olap/compaction.cpp | 8 +- be/src/olap/cumulative_compaction.cpp | 2 +- be/src/olap/delta_writer.cpp | 12 + be/src/olap/lru_cache.cpp | 2 +- be/src/olap/memtable.cpp | 2 +- be/src/olap/olap_server.cpp | 4 +- be/src/olap/rowset/beta_rowset_writer.cpp | 3 +- be/src/olap/schema_change.cpp | 4 +- be/src/olap/storage_engine.cpp | 35 +- be/src/olap/storage_engine.h | 30 +- be/src/olap/task/engine_alter_tablet_task.cpp | 6 +- be/src/olap/task/engine_batch_load_task.cpp | 7 +- be/src/olap/task/engine_checksum_task.cpp | 6 +- be/src/olap/task/engine_clone_task.cpp | 6 +- be/src/runtime/CMakeLists.txt | 1 - be/src/runtime/buffer_control_block.cpp | 6 +- be/src/runtime/buffered_block_mgr2.cc | 14 +- be/src/runtime/data_stream_recvr.cc | 19 +- be/src/runtime/data_stream_sender.cpp | 15 +- be/src/runtime/disk_io_mgr.cc | 3 +- be/src/runtime/exec_env.h | 34 +- be/src/runtime/exec_env_init.cpp | 69 +--- be/src/runtime/fragment_mgr.cpp | 27 ++ be/src/runtime/load_channel_mgr.cpp | 9 + be/src/runtime/load_channel_mgr.h | 3 + be/src/runtime/mem_pool.cpp | 2 +- be/src/runtime/memory/chunk_allocator.cpp | 3 +- be/src/runtime/memory/mem_tracker.cpp | 71 ++-- be/src/runtime/memory/mem_tracker.h | 59 +-- be/src/runtime/memory/mem_tracker_limiter.cpp | 387 ++++++++---------- be/src/runtime/memory/mem_tracker_limiter.h | 283 +++++-------- .../runtime/memory/mem_tracker_task_pool.cpp | 162 -------- be/src/runtime/memory/mem_tracker_task_pool.h | 68 --- be/src/runtime/memory/system_allocator.cpp | 2 +- .../runtime/memory/thread_mem_tracker_mgr.cpp | 28 +- .../runtime/memory/thread_mem_tracker_mgr.h | 90 ++-- be/src/runtime/plan_fragment_executor.cpp | 7 +- be/src/runtime/query_fragments_ctx.h | 19 + be/src/runtime/runtime_filter_mgr.cpp | 2 +- be/src/runtime/runtime_state.cpp | 57 +-- be/src/runtime/runtime_state.h | 32 +- be/src/runtime/sorted_run_merger.cc | 2 +- be/src/runtime/stream_load/stream_load_pipe.h | 4 +- be/src/runtime/thread_context.cpp | 25 +- be/src/runtime/thread_context.h | 97 ++--- be/src/service/doris_main.cpp | 22 +- be/src/service/internal_service.cpp | 72 ++-- be/src/util/mem_info.cpp | 14 +- be/src/util/ref_count_closure.h | 1 + be/src/vec/common/allocator.h | 14 +- be/src/vec/exec/scan/scanner_scheduler.cpp | 6 +- be/src/vec/exec/volap_scan_node.cpp | 6 +- be/src/vec/runtime/vdata_stream_mgr.cpp | 2 +- be/src/vec/runtime/vdata_stream_recvr.cpp | 24 +- be/src/vec/runtime/vdata_stream_recvr.h | 5 +- be/src/vec/sink/vdata_stream_sender.cpp | 6 +- be/src/vec/sink/vtablet_sink.cpp | 4 +- be/test/exec/broker_scan_node_test.cpp | 2 +- be/test/exec/broker_scanner_test.cpp | 2 +- be/test/exec/es_http_scan_node_test.cpp | 2 +- be/test/exec/es_predicate_test.cpp | 2 +- be/test/exec/hash_table_test.cpp | 6 +- be/test/exec/json_scanner_test.cpp | 2 +- .../exec/json_scanner_with_jsonpath_test.cpp | 2 +- be/test/exec/orc_scanner_test.cpp | 2 +- be/test/exec/parquet_scanner_test.cpp | 2 +- be/test/exec/tablet_sink_test.cpp | 3 - be/test/exprs/runtime_filter_test.cpp | 2 +- be/test/olap/lru_cache_test.cpp | 3 +- be/test/runtime/mem_limit_test.cpp | 96 +---- be/test/runtime/test_env.cc | 3 - be/test/testutil/run_all_tests.cpp | 12 +- be/test/util/arrow/arrow_work_flow_test.cpp | 5 +- .../vec/exec/parquet/parquet_reader_test.cpp | 2 +- be/test/vec/exec/vbroker_scan_node_test.cpp | 2 +- be/test/vec/exec/vbroker_scanner_test.cpp | 2 +- be/test/vec/exec/vjson_scanner_test.cpp | 2 +- be/test/vec/exec/vorc_scanner_test.cpp | 2 +- be/test/vec/exec/vparquet_scanner_test.cpp | 2 +- be/test/vec/exec/vtablet_sink_test.cpp | 3 - be/test/vec/exprs/vexpr_test.cpp | 4 +- be/test/vec/runtime/vdata_stream_test.cpp | 2 +- 90 files changed, 723 insertions(+), 1439 deletions(-) delete mode 100644 be/src/runtime/memory/mem_tracker_task_pool.cpp delete mode 100644 be/src/runtime/memory/mem_tracker_task_pool.h diff --git a/be/src/exec/hash_table.cpp b/be/src/exec/hash_table.cpp index 2aa195bebc..b50b03460e 100644 --- a/be/src/exec/hash_table.cpp +++ b/be/src/exec/hash_table.cpp @@ -175,7 +175,7 @@ Status HashTable::resize_buckets(int64_t num_buckets) { int64_t old_num_buckets = _num_buckets; int64_t delta_bytes = (num_buckets - old_num_buckets) * sizeof(Bucket); - Status st = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->check_limit( + Status st = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit( delta_bytes); if (!st) { LOG_EVERY_N(WARNING, 100) << "resize bucket failed: " << st.to_string(); diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index 1dcd5c3754..b4bd204614 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -1535,7 +1535,7 @@ void OlapScanNode::transfer_thread(RuntimeState* state) { size_t thread_slot_num = 0; mem_consume = _scanner_mem_tracker->consumption(); // check limit for total memory and _scan_row_batches memory - if (mem_consume < (state->instance_mem_tracker()->limit() * 6) / 10 && + if (mem_consume < (state->query_mem_tracker()->limit() * 6) / 10 && _scan_row_batches_bytes < _max_scanner_queue_size_bytes / 2) { thread_slot_num = max_thread - assigned_thread_num; } else { diff --git a/be/src/exec/partitioned_aggregation_node.cc b/be/src/exec/partitioned_aggregation_node.cc index 624f7c71ce..49c02fa5a8 100644 --- a/be/src/exec/partitioned_aggregation_node.cc +++ b/be/src/exec/partitioned_aggregation_node.cc @@ -911,14 +911,14 @@ Tuple* PartitionedAggregationNode::ConstructIntermediateTuple( << "Backend: " << BackendOptions::get_localhost() << ", " << "fragment: " << print_id(state_->fragment_instance_id()) << " " << "Used: " - << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->consumption() + << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->consumption() << ", Limit: " - << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->limit() << ". " + << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->limit() << ". " << "You can change the limit by session variable exec_mem_limit."; string details = Substitute(str.str(), _id, tuple_data_size); *status = thread_context() - ->_thread_mem_tracker_mgr->limiter_mem_tracker_raw() - ->mem_limit_exceeded(state_, details, tuple_data_size); + ->_thread_mem_tracker_mgr->limiter_mem_tracker() + ->fragment_mem_limit_exceeded(state_, details, tuple_data_size); return nullptr; } memset(tuple_data, 0, fixed_size); diff --git a/be/src/exec/partitioned_hash_table.cc b/be/src/exec/partitioned_hash_table.cc index 83fe65d1b6..f9598b510b 100644 --- a/be/src/exec/partitioned_hash_table.cc +++ b/be/src/exec/partitioned_hash_table.cc @@ -307,7 +307,7 @@ Status PartitionedHashTableCtx::ExprValuesCache::Init(RuntimeState* state, MAX_EXPR_VALUES_ARRAY_SIZE / expr_values_bytes_per_row_)); int mem_usage = MemUsage(capacity_, expr_values_bytes_per_row_, num_exprs_); - if (UNLIKELY(!thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->check_limit( + if (UNLIKELY(!thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit( mem_usage))) { capacity_ = 0; string details = Substitute( diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp index 762feacb15..a7e03c11e2 100644 --- a/be/src/exec/tablet_sink.cpp +++ b/be/src/exec/tablet_sink.cpp @@ -48,7 +48,7 @@ NodeChannel::NodeChannel(OlapTableSink* parent, IndexChannel* index_channel, int : _parent(parent), _index_channel(index_channel), _node_id(node_id) { _node_channel_tracker = std::make_unique(fmt::format( "NodeChannel:indexID={}:threadId={}", std::to_string(_index_channel->_index_id), - thread_context()->thread_id_str())); + thread_context()->get_thread_id())); } NodeChannel::~NodeChannel() noexcept { @@ -624,7 +624,7 @@ void NodeChannel::try_send_batch(RuntimeState* state) { _add_batch_closure->cntl.http_request().set_method(brpc::HTTP_METHOD_POST); _add_batch_closure->cntl.http_request().set_content_type("application/json"); { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); _brpc_http_stub->tablet_writer_add_batch_by_http(&_add_batch_closure->cntl, NULL, &_add_batch_closure->result, _add_batch_closure); @@ -632,7 +632,7 @@ void NodeChannel::try_send_batch(RuntimeState* state) { } else { _add_batch_closure->cntl.http_request().Clear(); { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); _stub->tablet_writer_add_batch(&_add_batch_closure->cntl, &request, &_add_batch_closure->result, _add_batch_closure); } diff --git a/be/src/exec/tablet_sink.h b/be/src/exec/tablet_sink.h index 1ba4b46a60..45552329bf 100644 --- a/be/src/exec/tablet_sink.h +++ b/be/src/exec/tablet_sink.h @@ -96,7 +96,7 @@ public: ~ReusableClosure() override { // shouldn't delete when Run() is calling or going to be called, wait for current Run() done. join(); - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); cntl.Reset(); } @@ -124,7 +124,7 @@ public: // plz follow this order: reset() -> set_in_flight() -> send brpc batch void reset() { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); cntl.Reset(); cid = cntl.call_id(); } diff --git a/be/src/http/default_path_handlers.cpp b/be/src/http/default_path_handlers.cpp index a74fc740d5..7edc3ec7f0 100644 --- a/be/src/http/default_path_handlers.cpp +++ b/be/src/http/default_path_handlers.cpp @@ -84,9 +84,7 @@ void config_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* // Registered to handle "/memz", and prints out memory allocation statistics. void mem_usage_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* output) { (*output) << "
"
-              << "Mem Limit: "
-              << PrettyPrinter::print(ExecEnv::GetInstance()->process_mem_tracker()->limit(),
-                                      TUnit::BYTES)
+              << "Mem Limit: " << PrettyPrinter::print(MemInfo::mem_limit(), TUnit::BYTES)
               << std::endl
               << "Physical Mem From Perf: "
               << PrettyPrinter::print(PerfCounters::get_vm_rss(), TUnit::BYTES) << std::endl
@@ -121,14 +119,48 @@ void display_tablets_callback(const WebPageHandler::ArgumentMap& args, EasyJson*
 // Registered to handle "/mem_tracker", and prints out memory tracker information.
 void mem_tracker_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* output) {
     (*output) << "

Memory usage by subsystem

\n"; + std::vector snapshots; + auto iter = args.find("type"); + if (iter != args.end()) { + if (iter->second == "global") { + MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::GLOBAL); + } else if (iter->second == "query") { + MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::QUERY); + } else if (iter->second == "load") { + MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::LOAD); + } else if (iter->second == "compaction") { + MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::COMPACTION); + } else if (iter->second == "schema_change") { + MemTrackerLimiter::make_type_snapshots(&snapshots, + MemTrackerLimiter::Type::SCHEMA_CHANGE); + } else if (iter->second == "clone") { + MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::CLONE); + } else if (iter->second == "batch_load") { + MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::BATCHLOAD); + } else if (iter->second == "consistency") { + MemTrackerLimiter::make_type_snapshots(&snapshots, + MemTrackerLimiter::Type::CONSISTENCY); + } + } else { + (*output) << "

*Note: (see documentation for details)

\n"; + (*output) << "

1.`/mem_tracker?type=global` to view the memory statistics of each " + "type

\n"; + (*output) << "

2.`/mem_tracker` counts virtual memory, which is equal to `Actual " + "memory used` in `/memz`

\n"; + (*output) << "

3.`process` is equal to the sum of all types of memory, " + "`/mem_tracker` can be logically divided into 4 layers: 1)`process` 2)`type` " + "3)`query/load/compation task etc.` 4)`exec node etc.`

\n"; + MemTrackerLimiter::make_process_snapshots(&snapshots); + } + (*output) << "\n"; (*output) << "" - "" + "" "" - "" + "" "" "" @@ -136,35 +168,17 @@ void mem_tracker_handler(const WebPageHandler::ArgumentMap& args, std::stringstr "" "" - "" ""; (*output) << "\n"; - - size_t upper_level; - size_t cur_level = 1; - // the level equal or lower than upper_level will show in web page - auto iter = args.find("upper_level"); - if (iter != args.end()) { - upper_level = std::stol(iter->second); - } else { - upper_level = 3; - } - - std::vector snapshots; - ExecEnv::GetInstance()->process_mem_tracker()->make_snapshot(&snapshots, cur_level, - upper_level); - MemTracker::make_global_mem_tracker_snapshot(&snapshots); for (const auto& item : snapshots) { string limit_str = item.limit == -1 ? "none" : AccurateItoaKMGT(item.limit); string current_consumption_normalize = AccurateItoaKMGT(item.cur_consumption); string peak_consumption_normalize = AccurateItoaKMGT(item.peak_consumption); (*output) << strings::Substitute( "\n", - item.level, item.label, item.parent, limit_str, item.cur_consumption, - current_consumption_normalize, item.peak_consumption, peak_consumption_normalize, - item.child_count); + "td>\n", + item.type, item.label, item.parent_label, limit_str, item.cur_consumption, + current_consumption_normalize, item.peak_consumption, peak_consumption_normalize); } (*output) << "
LevelTypeLabelParentParent LabelLimitCurrent Consumption(Bytes)Peak Consumption(Bytes)Peak Consumption(Normalize)Child Count
$0$1$2$3$4$5$6$7$8
$7
\n"; } diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp index 63da436d23..0e1d737b15 100644 --- a/be/src/olap/base_compaction.cpp +++ b/be/src/olap/base_compaction.cpp @@ -68,7 +68,7 @@ Status BaseCompaction::execute_compact_impl() { return Status::OLAPInternalError(OLAP_ERR_BE_CLONE_OCCURRED); } - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::COMPACTION); + SCOPED_ATTACH_TASK(_mem_tracker); // 2. do base compaction, merge rowsets int64_t permits = get_compaction_permits(); diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 801cbf2990..e74dca760f 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -34,13 +34,7 @@ Compaction::Compaction(TabletSharedPtr tablet, const std::string& label) _input_rowsets_size(0), _input_row_num(0), _state(CompactionState::INITED) { -#ifndef BE_TEST - _mem_tracker = std::make_shared( - -1, label, StorageEngine::instance()->compaction_mem_tracker()); - _mem_tracker->enable_reset_zero(); -#else - _mem_tracker = std::make_shared(-1, label); -#endif + _mem_tracker = std::make_shared(MemTrackerLimiter::Type::COMPACTION, label); } Compaction::~Compaction() {} diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp index 4461a240b5..4736454c09 100644 --- a/be/src/olap/cumulative_compaction.cpp +++ b/be/src/olap/cumulative_compaction.cpp @@ -70,7 +70,7 @@ Status CumulativeCompaction::execute_compact_impl() { return Status::OLAPInternalError(OLAP_ERR_CUMULATIVE_CLONE_OCCURRED); } - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::COMPACTION); + SCOPED_ATTACH_TASK(_mem_tracker); // 3. do cumulative compaction, merge rowsets int64_t permits = get_compaction_permits(); diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 5ec23ea593..c9a4890fbb 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -25,6 +25,7 @@ #include "olap/schema.h" #include "olap/schema_change.h" #include "olap/storage_engine.h" +#include "runtime/load_channel_mgr.h" #include "runtime/row_batch.h" #include "runtime/tuple_row.h" #include "service/backend_options.h" @@ -283,12 +284,23 @@ void DeltaWriter::_reset_mem_table() { if (_tablet->enable_unique_key_merge_on_write() && _delete_bitmap == nullptr) { _delete_bitmap.reset(new DeleteBitmap(_tablet->tablet_id())); } +#ifndef BE_TEST + auto mem_table_insert_tracker = std::make_shared( + fmt::format("MemTableManualInsert:TabletId={}:MemTableNum={}#loadID={}", + std::to_string(tablet_id()), _mem_table_num, _load_id.to_string()), + nullptr, ExecEnv::GetInstance()->load_channel_mgr()->mem_tracker_set()); + auto mem_table_flush_tracker = std::make_shared( + fmt::format("MemTableHookFlush:TabletId={}:MemTableNum={}#loadID={}", + std::to_string(tablet_id()), _mem_table_num++, _load_id.to_string()), + nullptr, ExecEnv::GetInstance()->load_channel_mgr()->mem_tracker_set()); +#else auto mem_table_insert_tracker = std::make_shared( fmt::format("MemTableManualInsert:TabletId={}:MemTableNum={}#loadID={}", std::to_string(tablet_id()), _mem_table_num, _load_id.to_string())); auto mem_table_flush_tracker = std::make_shared( fmt::format("MemTableHookFlush:TabletId={}:MemTableNum={}#loadID={}", std::to_string(tablet_id()), _mem_table_num++, _load_id.to_string())); +#endif { std::lock_guard l(_mem_table_tracker_lock); _mem_table_tracker.push_back(mem_table_insert_tracker); diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp index a1b2a174ab..a6505d9bbe 100644 --- a/be/src/olap/lru_cache.cpp +++ b/be/src/olap/lru_cache.cpp @@ -436,7 +436,7 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity, _num_shards(num_shards), _shards(nullptr), _last_id(1) { - _mem_tracker = std::make_unique(-1, name); + _mem_tracker = std::make_unique(MemTrackerLimiter::Type::GLOBAL, name); CHECK(num_shards > 0) << "num_shards cannot be 0"; CHECK_EQ((num_shards & (num_shards - 1)), 0) << "num_shards should be power of two, but got " << num_shards; diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp index f61e945bda..9faf757bf9 100644 --- a/be/src/olap/memtable.cpp +++ b/be/src/olap/memtable.cpp @@ -157,7 +157,7 @@ MemTable::~MemTable() { _flush_mem_tracker->set_consumption(0); DCHECK_EQ(_insert_mem_tracker->consumption(), 0) << std::endl - << MemTracker::log_usage(_insert_mem_tracker->make_snapshot(0)); + << MemTracker::log_usage(_insert_mem_tracker->make_snapshot()); DCHECK_EQ(_flush_mem_tracker->consumption(), 0); } diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 958e60137e..ead81f7367 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -125,7 +125,7 @@ Status StorageEngine::start_bg_threads() { RETURN_IF_ERROR(Thread::create( "StorageEngine", "path_scan_thread", [this, data_dir]() { - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); this->_path_scan_thread_callback(data_dir); }, &path_scan_thread)); @@ -135,7 +135,7 @@ Status StorageEngine::start_bg_threads() { RETURN_IF_ERROR(Thread::create( "StorageEngine", "path_gc_thread", [this, data_dir]() { - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); this->_path_gc_thread_callback(data_dir); }, &path_gc_thread)); diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 795ca8e145..e89d945f53 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -272,8 +272,7 @@ Status BetaRowsetWriter::_check_correctness(std::unique_ptrsegcompaction_mem_tracker(), - ThreadContext::TaskType::COMPACTION); + SCOPED_CONSUME_MEM_TRACKER(StorageEngine::instance()->segcompaction_mem_tracker()); // throttle segcompaction task if memory depleted. if (MemTrackerLimiter::sys_mem_exceed_limit_check(GB_EXCHANGE_BYTE)) { LOG(WARNING) << "skip segcompaction due to memory shortage"; diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 4ab9b80bdb..3081b37886 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -1596,10 +1596,10 @@ Status VSchemaChangeWithSorting::_inner_process(RowsetReaderSharedPtr rowset_rea } RETURN_IF_ERROR(_changer.change_block(ref_block.get(), new_block.get())); - if (!_mem_tracker->check_limit(_memory_limitation, new_block->allocated_bytes())) { + if (_mem_tracker->consumption() + new_block->allocated_bytes() > _memory_limitation) { RETURN_IF_ERROR(create_rowset()); - if (!_mem_tracker->check_limit(_memory_limitation, new_block->allocated_bytes())) { + if (_mem_tracker->consumption() + new_block->allocated_bytes() > _memory_limitation) { LOG(WARNING) << "Memory limitation is too small for Schema Change." << " _memory_limitation=" << _memory_limitation << ", new_block->allocated_bytes()=" << new_block->allocated_bytes() diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 9da8c8e147..083814fef0 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -81,12 +81,6 @@ using strings::Substitute; namespace doris { DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(unused_rowsets_count, MetricUnit::ROWSETS); -DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(segcompaction_mem_consumption, MetricUnit::BYTES, "", - mem_consumption, Labels({{"type", "segcompaction"}})); -DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(compaction_mem_consumption, MetricUnit::BYTES, "", - mem_consumption, Labels({{"type", "compaction"}})); -DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(schema_change_mem_consumption, MetricUnit::BYTES, "", - mem_consumption, Labels({{"type", "schema_change"}})); StorageEngine* StorageEngine::_s_instance = nullptr; @@ -112,19 +106,9 @@ StorageEngine::StorageEngine(const EngineOptions& options) _available_storage_medium_type_count(0), _effective_cluster_id(-1), _is_all_cluster_id_exist(true), - _segcompaction_mem_tracker( - std::make_shared(-1, "StorageEngine::SegCompaction")), - _compaction_mem_tracker( - std::make_shared(-1, "StorageEngine::AutoCompaction")), - _segment_meta_mem_tracker(std::make_unique("StorageEngine::SegmentMeta")), - _schema_change_mem_tracker( - std::make_shared(-1, "StorageEngine::SchemaChange")), - _clone_mem_tracker(std::make_shared(-1, "StorageEngine::Clone")), - _batch_load_mem_tracker( - std::make_shared(-1, "StorageEngine::BatchLoad")), - _consistency_mem_tracker( - std::make_shared(-1, "StorageEngine::Consistency")), - _mem_tracker(std::make_shared(-1, "StorageEngine::Self")), + _mem_tracker(std::make_unique("StorageEngine")), + _segcompaction_mem_tracker(std::make_unique("SegCompaction")), + _segment_meta_mem_tracker(std::make_unique("SegmentMeta")), _stop_background_threads_latch(1), _tablet_manager(new TabletManager(config::tablet_map_shard_size)), _txn_manager(new TxnManager(config::txn_map_shard_size, config::txn_shard_size)), @@ -138,19 +122,10 @@ StorageEngine::StorageEngine(const EngineOptions& options) // std::lock_guard lock(_gc_mutex); return _unused_rowsets.size(); }); - REGISTER_HOOK_METRIC(segcompaction_mem_consumption, - [this]() { return _segcompaction_mem_tracker->consumption(); }); - REGISTER_HOOK_METRIC(compaction_mem_consumption, - [this]() { return _compaction_mem_tracker->consumption(); }); - REGISTER_HOOK_METRIC(schema_change_mem_consumption, - [this]() { return _schema_change_mem_tracker->consumption(); }); } StorageEngine::~StorageEngine() { DEREGISTER_HOOK_METRIC(unused_rowsets_count); - DEREGISTER_HOOK_METRIC(segcompaction_mem_consumption); - DEREGISTER_HOOK_METRIC(compaction_mem_consumption); - DEREGISTER_HOOK_METRIC(schema_change_mem_consumption); _clear(); if (_base_compaction_thread_pool) { @@ -177,7 +152,7 @@ void StorageEngine::load_data_dirs(const std::vector& data_dirs) { std::vector threads; for (auto data_dir : data_dirs) { threads.emplace_back([this, data_dir] { - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); auto res = data_dir->load(); if (!res.ok()) { LOG(WARNING) << "io error when init load tables. res=" << res @@ -223,7 +198,7 @@ Status StorageEngine::_init_store_map() { _tablet_manager.get(), _txn_manager.get()); tmp_stores.emplace_back(store); threads.emplace_back([this, store, &error_msg_lock, &error_msg]() { - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); auto st = store->init(); if (!st.ok()) { { diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index 93f1d0c438..ab7fa4ac7d 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -179,19 +179,8 @@ public: Status get_compaction_status_json(std::string* result); - std::shared_ptr segcompaction_mem_tracker() { - return _segcompaction_mem_tracker; - } - std::shared_ptr compaction_mem_tracker() { return _compaction_mem_tracker; } MemTracker* segment_meta_mem_tracker() { return _segment_meta_mem_tracker.get(); } - std::shared_ptr schema_change_mem_tracker() { - return _schema_change_mem_tracker; - } - std::shared_ptr clone_mem_tracker() { return _clone_mem_tracker; } - std::shared_ptr batch_load_mem_tracker() { return _batch_load_mem_tracker; } - std::shared_ptr consistency_mem_tracker() { - return _consistency_mem_tracker; - } + MemTracker* segcompaction_mem_tracker() { return _segcompaction_mem_tracker.get(); } // check cumulative compaction config void check_cumulative_compaction_config(); @@ -334,24 +323,13 @@ private: // map, if we use RowsetId as the key, we need custom hash func std::unordered_map _unused_rowsets; + // StorageEngine oneself + std::unique_ptr _mem_tracker; // Count the memory consumption of segment compaction tasks. - std::shared_ptr _segcompaction_mem_tracker; - // Count the memory consumption of all Base and Cumulative tasks. - std::shared_ptr _compaction_mem_tracker; + std::unique_ptr _segcompaction_mem_tracker; // This mem tracker is only for tracking memory use by segment meta data such as footer or index page. // The memory consumed by querying is tracked in segment iterator. std::unique_ptr _segment_meta_mem_tracker; - // Count the memory consumption of all SchemaChange tasks. - std::shared_ptr _schema_change_mem_tracker; - // Count the memory consumption of all EngineCloneTask. - // Note: Memory that does not contain make/release snapshots. - std::shared_ptr _clone_mem_tracker; - // Count the memory consumption of all EngineBatchLoadTask. - std::shared_ptr _batch_load_mem_tracker; - // Count the memory consumption of all EngineChecksumTask. - std::shared_ptr _consistency_mem_tracker; - // StorageEngine oneself - std::shared_ptr _mem_tracker; CountDownLatch _stop_background_threads_latch; scoped_refptr _unused_rowset_monitor_thread; diff --git a/be/src/olap/task/engine_alter_tablet_task.cpp b/be/src/olap/task/engine_alter_tablet_task.cpp index 8164049296..7ec1a4d4d7 100644 --- a/be/src/olap/task/engine_alter_tablet_task.cpp +++ b/be/src/olap/task/engine_alter_tablet_task.cpp @@ -26,15 +26,15 @@ namespace doris { EngineAlterTabletTask::EngineAlterTabletTask(const TAlterTabletReqV2& request) : _alter_tablet_req(request) { _mem_tracker = std::make_shared( - config::memory_limitation_per_thread_for_schema_change_bytes, + MemTrackerLimiter::Type::SCHEMA_CHANGE, fmt::format("EngineAlterTabletTask#baseTabletId={}:newTabletId={}", std::to_string(_alter_tablet_req.base_tablet_id), std::to_string(_alter_tablet_req.new_tablet_id)), - StorageEngine::instance()->schema_change_mem_tracker()); + config::memory_limitation_per_thread_for_schema_change_bytes); } Status EngineAlterTabletTask::execute() { - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_ATTACH_TASK(_mem_tracker); DorisMetrics::instance()->create_rollup_requests_total->increment(1); Status res = SchemaChangeHandler::process_alter_tablet_v2(_alter_tablet_req); diff --git a/be/src/olap/task/engine_batch_load_task.cpp b/be/src/olap/task/engine_batch_load_task.cpp index 9b56055339..59a9cf16bc 100644 --- a/be/src/olap/task/engine_batch_load_task.cpp +++ b/be/src/olap/task/engine_batch_load_task.cpp @@ -49,16 +49,15 @@ namespace doris { EngineBatchLoadTask::EngineBatchLoadTask(TPushReq& push_req, std::vector* tablet_infos) : _push_req(push_req), _tablet_infos(tablet_infos) { _mem_tracker = std::make_shared( - -1, + MemTrackerLimiter::Type::BATCHLOAD, fmt::format("EngineBatchLoadTask#pushType={}:tabletId={}", _push_req.push_type, - std::to_string(_push_req.tablet_id)), - StorageEngine::instance()->batch_load_mem_tracker()); + std::to_string(_push_req.tablet_id))); } EngineBatchLoadTask::~EngineBatchLoadTask() {} Status EngineBatchLoadTask::execute() { - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_ATTACH_TASK(_mem_tracker); Status status; if (_push_req.push_type == TPushType::LOAD || _push_req.push_type == TPushType::LOAD_V2) { RETURN_IF_ERROR(_init()); diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp index b6f25bb2a4..e040ffcece 100644 --- a/be/src/olap/task/engine_checksum_task.cpp +++ b/be/src/olap/task/engine_checksum_task.cpp @@ -27,12 +27,12 @@ EngineChecksumTask::EngineChecksumTask(TTabletId tablet_id, TSchemaHash schema_h TVersion version, uint32_t* checksum) : _tablet_id(tablet_id), _schema_hash(schema_hash), _version(version), _checksum(checksum) { _mem_tracker = std::make_shared( - -1, "EngineChecksumTask#tabletId=" + std::to_string(tablet_id), - StorageEngine::instance()->consistency_mem_tracker()); + MemTrackerLimiter::Type::CONSISTENCY, + "EngineChecksumTask#tabletId=" + std::to_string(tablet_id)); } Status EngineChecksumTask::execute() { - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_ATTACH_TASK(_mem_tracker); return _compute_checksum(); } // execute diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index 9a42d8e0e9..b015adfe87 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -55,13 +55,13 @@ EngineCloneTask::EngineCloneTask(const TCloneReq& clone_req, const TMasterInfo& _signature(signature), _master_info(master_info) { _mem_tracker = std::make_shared( - -1, "EngineCloneTask#tabletId=" + std::to_string(_clone_req.tablet_id), - StorageEngine::instance()->clone_mem_tracker()); + MemTrackerLimiter::Type::CLONE, + "EngineCloneTask#tabletId=" + std::to_string(_clone_req.tablet_id)); } Status EngineCloneTask::execute() { // register the tablet to avoid it is deleted by gc thread during clone process - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_ATTACH_TASK(_mem_tracker); StorageEngine::instance()->tablet_manager()->register_clone_tablet(_clone_req.tablet_id); Status st = _do_clone(); StorageEngine::instance()->tablet_manager()->unregister_clone_tablet(_clone_req.tablet_id); diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index bd515e1c06..69af332894 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -99,7 +99,6 @@ set(RUNTIME_FILES memory/chunk_allocator.cpp memory/mem_tracker_limiter.cpp memory/mem_tracker.cpp - memory/mem_tracker_task_pool.cpp memory/thread_mem_tracker_mgr.cpp fold_constant_executor.cpp cache/result_node.cpp diff --git a/be/src/runtime/buffer_control_block.cpp b/be/src/runtime/buffer_control_block.cpp index 79496b449b..03bd6d3466 100644 --- a/be/src/runtime/buffer_control_block.cpp +++ b/be/src/runtime/buffer_control_block.cpp @@ -30,7 +30,7 @@ void GetResultBatchCtx::on_failure(const Status& status) { status.to_protobuf(result->mutable_status()); { // call by result sink - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); done->Run(); } delete this; @@ -45,7 +45,7 @@ void GetResultBatchCtx::on_close(int64_t packet_seq, QueryStatistics* statistics result->set_packet_seq(packet_seq); result->set_eos(true); { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); done->Run(); } delete this; @@ -73,7 +73,7 @@ void GetResultBatchCtx::on_data(const std::unique_ptr& t_resul } st.to_protobuf(result->mutable_status()); { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); done->Run(); } delete this; diff --git a/be/src/runtime/buffered_block_mgr2.cc b/be/src/runtime/buffered_block_mgr2.cc index fa2d1b70b0..e610cf3803 100644 --- a/be/src/runtime/buffered_block_mgr2.cc +++ b/be/src/runtime/buffered_block_mgr2.cc @@ -251,7 +251,7 @@ int64_t BufferedBlockMgr2::remaining_unreserved_buffers() const { int64_t num_buffers = _free_io_buffers.size() + _unpinned_blocks.size() + _non_local_outstanding_writes; num_buffers += - thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->spare_capacity() / + thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->spare_capacity() / max_block_size(); num_buffers -= _unfullfilled_reserved_buffers; return num_buffers; @@ -358,9 +358,9 @@ Status BufferedBlockMgr2::get_new_block(Client* client, Block* unpin_block, Bloc if (len > 0 && len < _max_block_size) { DCHECK(unpin_block == nullptr); - Status st = thread_context() - ->_thread_mem_tracker_mgr->limiter_mem_tracker_raw() - ->check_limit(len); + Status st = + thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit( + len); WARN_IF_ERROR(st, "get_new_block failed"); if (st) { client->_tracker->consume(len); @@ -986,7 +986,7 @@ Status BufferedBlockMgr2::find_buffer(unique_lock& lock, BufferDescriptor // First, try to allocate a new buffer. if (_free_io_buffers.size() < _block_write_threshold && - thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->check_limit( + thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit( _max_block_size)) { _mem_tracker->consume(_max_block_size); uint8_t* new_buffer = new uint8_t[_max_block_size]; @@ -1155,9 +1155,9 @@ string BufferedBlockMgr2::debug_internal() const { << " Unfullfilled reserved buffers: " << _unfullfilled_reserved_buffers << endl << " BUffer Block Mgr Used memory: " << _mem_tracker->consumption() << " Instance remaining memory: " - << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->spare_capacity() + << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->spare_capacity() << " (#blocks=" - << (thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->spare_capacity() / + << (thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->spare_capacity() / _max_block_size) << ")" << endl << " Block write threshold: " << _block_write_threshold; diff --git a/be/src/runtime/data_stream_recvr.cc b/be/src/runtime/data_stream_recvr.cc index c9533959f8..59d46102b3 100644 --- a/be/src/runtime/data_stream_recvr.cc +++ b/be/src/runtime/data_stream_recvr.cc @@ -186,10 +186,7 @@ Status DataStreamRecvr::SenderQueue::get_batch(RowBatch** next_batch) { if (!_pending_closures.empty()) { auto closure_pair = _pending_closures.front(); - { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); - closure_pair.first->Run(); - } + closure_pair.first->Run(); _pending_closures.pop_front(); closure_pair.second.stop(); @@ -339,11 +336,8 @@ void DataStreamRecvr::SenderQueue::cancel() { { std::lock_guard l(_lock); - { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); - for (auto closure_pair : _pending_closures) { - closure_pair.first->Run(); - } + for (auto closure_pair : _pending_closures) { + closure_pair.first->Run(); } _pending_closures.clear(); } @@ -357,11 +351,8 @@ void DataStreamRecvr::SenderQueue::close() { std::lock_guard l(_lock); _is_cancelled = true; - { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); - for (auto closure_pair : _pending_closures) { - closure_pair.first->Run(); - } + for (auto closure_pair : _pending_closures) { + closure_pair.first->Run(); } _pending_closures.clear(); } diff --git a/be/src/runtime/data_stream_sender.cpp b/be/src/runtime/data_stream_sender.cpp index cb02491701..df1495b750 100644 --- a/be/src/runtime/data_stream_sender.cpp +++ b/be/src/runtime/data_stream_sender.cpp @@ -138,7 +138,7 @@ Status DataStreamSender::Channel::send_batch(PRowBatch* batch, bool eos) { _closure->ref(); } else { RETURN_IF_ERROR(_wait_last_brpc()); - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); _closure->cntl.Reset(); } VLOG_ROW << "Channel::send_batch() instance_id=" << _fragment_instance_id @@ -160,7 +160,6 @@ Status DataStreamSender::Channel::send_batch(PRowBatch* batch, bool eos) { if (_parent->_transfer_large_data_by_brpc && _brpc_request.has_row_batch() && _brpc_request.row_batch().has_tuple_data() && _brpc_request.ByteSizeLong() > MIN_HTTP_BRPC_SIZE) { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); Status st = request_embed_attachment_contain_tuple>( &_brpc_request, _closure); @@ -174,11 +173,17 @@ Status DataStreamSender::Channel::send_batch(PRowBatch* batch, bool eos) { brpc_url + "/PInternalServiceImpl/transmit_data_by_http"; _closure->cntl.http_request().set_method(brpc::HTTP_METHOD_POST); _closure->cntl.http_request().set_content_type("application/json"); - _brpc_http_stub->transmit_data_by_http(&_closure->cntl, NULL, &_closure->result, _closure); + { + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); + _brpc_http_stub->transmit_data_by_http(&_closure->cntl, NULL, &_closure->result, + _closure); + } } else { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); _closure->cntl.http_request().Clear(); - _brpc_stub->transmit_data(&_closure->cntl, &_brpc_request, &_closure->result, _closure); + { + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); + _brpc_stub->transmit_data(&_closure->cntl, &_brpc_request, &_closure->result, _closure); + } } if (batch != nullptr) { diff --git a/be/src/runtime/disk_io_mgr.cc b/be/src/runtime/disk_io_mgr.cc index 1f506d82b1..2f330f250c 100644 --- a/be/src/runtime/disk_io_mgr.cc +++ b/be/src/runtime/disk_io_mgr.cc @@ -347,7 +347,8 @@ DiskIoMgr::~DiskIoMgr() { } Status DiskIoMgr::init(const int64_t mem_limit) { - _mem_tracker = std::make_unique(mem_limit, "DiskIO"); + _mem_tracker = std::make_unique(MemTrackerLimiter::Type::GLOBAL, "DiskIO", + mem_limit); for (int i = 0; i < _disk_queues.size(); ++i) { _disk_queues[i] = new DiskQueue(i); diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index 81af4a18f3..4bd9fd73af 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -49,7 +49,6 @@ class LoadStreamMgr; class MemTrackerLimiter; class MemTracker; class StorageEngine; -class MemTrackerTaskPool; class PriorityThreadPool; class PriorityWorkStealingThreadPool; class ResultBufferMgr; @@ -117,27 +116,12 @@ public: return nullptr; } - std::shared_ptr process_mem_tracker() { return _process_mem_tracker; } - void set_global_mem_tracker(const std::shared_ptr& process_tracker, - const std::shared_ptr& orphan_tracker, - const std::shared_ptr& nursery_mem_tracker, - const std::shared_ptr& bthread_mem_tracker) { - _process_mem_tracker = process_tracker; + void set_orphan_mem_tracker(const std::shared_ptr& orphan_tracker) { _orphan_mem_tracker = orphan_tracker; _orphan_mem_tracker_raw = orphan_tracker.get(); - _nursery_mem_tracker = nursery_mem_tracker; - _bthread_mem_tracker = bthread_mem_tracker; - } - std::shared_ptr allocator_cache_mem_tracker() { - return _allocator_cache_mem_tracker; } std::shared_ptr orphan_mem_tracker() { return _orphan_mem_tracker; } MemTrackerLimiter* orphan_mem_tracker_raw() { return _orphan_mem_tracker_raw; } - std::shared_ptr nursery_mem_tracker() { return _nursery_mem_tracker; } - std::shared_ptr bthread_mem_tracker() { return _bthread_mem_tracker; } - std::shared_ptr query_pool_mem_tracker() { return _query_pool_mem_tracker; } - std::shared_ptr load_pool_mem_tracker() { return _load_pool_mem_tracker; } - MemTrackerTaskPool* task_pool_mem_tracker_registry() { return _task_pool_mem_tracker_registry; } ThreadResourceMgr* thread_mgr() { return _thread_mgr; } PriorityThreadPool* scan_thread_pool() { return _scan_thread_pool; } PriorityThreadPool* remote_scan_thread_pool() { return _remote_scan_thread_pool; } @@ -194,7 +178,7 @@ private: Status _init(const std::vector& store_paths); void _destroy(); - Status _init_mem_tracker(); + Status _init_mem_env(); /// Initialise 'buffer_pool_' with given capacity. void _init_buffer_pool(int64_t min_page_len, int64_t capacity, int64_t clean_pages_limit); @@ -217,11 +201,6 @@ private: ClientCache* _broker_client_cache = nullptr; ThreadResourceMgr* _thread_mgr = nullptr; - // The ancestor for all trackers. Every tracker is visible from the process down. - // Not limit total memory by process tracker, and it's just used to track virtual memory of process. - std::shared_ptr _process_mem_tracker; - // tcmalloc/jemalloc allocator cache tracker, Including thread cache, free heap, etc. - std::shared_ptr _allocator_cache_mem_tracker; // The default tracker consumed by mem hook. If the thread does not attach other trackers, // by default all consumption will be passed to the process tracker through the orphan tracker. // In real time, `consumption of all limiter trackers` + `orphan tracker consumption` = `process tracker consumption`. @@ -229,15 +208,6 @@ private: // and the consumption of the orphan mem tracker is close to 0, but greater than 0. std::shared_ptr _orphan_mem_tracker; MemTrackerLimiter* _orphan_mem_tracker_raw; - // Parent is orphan, Nursery of orphan memory after manually switching thread mem tracker - std::shared_ptr _nursery_mem_tracker; - // Parent is orphan, bthread default mem tracker - std::shared_ptr _bthread_mem_tracker; - // The ancestor for all querys tracker. - std::shared_ptr _query_pool_mem_tracker; - // The ancestor for all load tracker. - std::shared_ptr _load_pool_mem_tracker; - MemTrackerTaskPool* _task_pool_mem_tracker_registry; // The following two thread pools are used in different scenarios. // _scan_thread_pool is a priority thread pool. diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index d18e02b90b..bec666859c 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -39,7 +39,6 @@ #include "runtime/load_channel_mgr.h" #include "runtime/load_path_mgr.h" #include "runtime/memory/mem_tracker.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "runtime/result_buffer_mgr.h" #include "runtime/result_queue_mgr.h" #include "runtime/routine_load/routine_load_task_executor.h" @@ -72,10 +71,6 @@ DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(send_batch_thread_pool_thread_num, MetricUnit DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(send_batch_thread_pool_queue_size, MetricUnit::NOUNIT); DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(download_cache_thread_pool_thread_num, MetricUnit::NOUNIT); DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(download_cache_thread_pool_queue_size, MetricUnit::NOUNIT); -DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(query_mem_consumption, MetricUnit::BYTES, "", mem_consumption, - Labels({{"type", "query"}})); -DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(load_mem_consumption, MetricUnit::BYTES, "", mem_consumption, - Labels({{"type", "load"}})); Status ExecEnv::init(ExecEnv* env, const std::vector& store_paths) { return env->_init(store_paths); @@ -100,7 +95,6 @@ Status ExecEnv::_init(const std::vector& store_paths) { _backend_client_cache = new BackendServiceClientCache(config::max_client_cache_size_per_host); _frontend_client_cache = new FrontendServiceClientCache(config::max_client_cache_size_per_host); _broker_client_cache = new BrokerServiceClientCache(config::max_client_cache_size_per_host); - _task_pool_mem_tracker_registry = new MemTrackerTaskPool(); _thread_mgr = new ThreadResourceMgr(); if (config::doris_enable_scanner_thread_pool_per_disk && config::doris_scanner_thread_pool_thread_num >= store_paths.size() && @@ -169,42 +163,22 @@ Status ExecEnv::_init(const std::vector& store_paths) { _small_file_mgr->init(); _scanner_scheduler->init(this); - _init_mem_tracker(); + _init_mem_env(); - RETURN_IF_ERROR( - _load_channel_mgr->init(ExecEnv::GetInstance()->process_mem_tracker()->limit())); + RETURN_IF_ERROR(_load_channel_mgr->init(MemInfo::mem_limit())); _heartbeat_flags = new HeartbeatFlags(); _register_metrics(); _is_init = true; return Status::OK(); } -Status ExecEnv::_init_mem_tracker() { - // 1. init global memory limit. - int64_t global_memory_limit_bytes = 0; +Status ExecEnv::_init_mem_env() { bool is_percent = false; std::stringstream ss; - global_memory_limit_bytes = - ParseUtil::parse_mem_spec(config::mem_limit, -1, MemInfo::physical_mem(), &is_percent); - if (global_memory_limit_bytes <= 0) { - ss << "Failed to parse mem limit from '" + config::mem_limit + "'."; - return Status::InternalError(ss.str()); - } - - if (global_memory_limit_bytes > MemInfo::physical_mem()) { - LOG(WARNING) << "Memory limit " - << PrettyPrinter::print(global_memory_limit_bytes, TUnit::BYTES) - << " exceeds physical memory of " - << PrettyPrinter::print(MemInfo::physical_mem(), TUnit::BYTES) - << ". Using physical memory instead"; - global_memory_limit_bytes = MemInfo::physical_mem(); - } - _process_mem_tracker = - std::make_shared(global_memory_limit_bytes, "Process"); - _orphan_mem_tracker = std::make_shared(-1, "Orphan", _process_mem_tracker); + // 1. init mem tracker + _orphan_mem_tracker = + std::make_shared(MemTrackerLimiter::Type::GLOBAL, "Orphan"); _orphan_mem_tracker_raw = _orphan_mem_tracker.get(); - _nursery_mem_tracker = std::make_shared(-1, "Nursery", _orphan_mem_tracker); - _bthread_mem_tracker = std::make_shared(-1, "Bthread", _orphan_mem_tracker); thread_context()->_thread_mem_tracker_mgr->init(); thread_context()->_thread_mem_tracker_mgr->set_check_attach(false); #if defined(USE_MEM_TRACKER) && !defined(__SANITIZE_ADDRESS__) && !defined(ADDRESS_SANITIZER) && \ @@ -213,18 +187,6 @@ Status ExecEnv::_init_mem_tracker() { init_hook(); } #endif - _allocator_cache_mem_tracker = std::make_shared("Tc/JemallocAllocatorCache"); - _query_pool_mem_tracker = - std::make_shared(-1, "QueryPool", _process_mem_tracker); - REGISTER_HOOK_METRIC(query_mem_consumption, - [this]() { return _query_pool_mem_tracker->consumption(); }); - _load_pool_mem_tracker = - std::make_shared(-1, "LoadPool", _process_mem_tracker); - REGISTER_HOOK_METRIC(load_mem_consumption, - [this]() { return _load_pool_mem_tracker->consumption(); }); - LOG(INFO) << "Using global memory limit: " - << PrettyPrinter::print(global_memory_limit_bytes, TUnit::BYTES) - << ", origin config value: " << config::mem_limit; // 2. init buffer pool if (!BitUtil::IsPowerOf2(config::min_buffer_size)) { @@ -232,9 +194,8 @@ Status ExecEnv::_init_mem_tracker() { return Status::InternalError(ss.str()); } - int64_t buffer_pool_limit = - ParseUtil::parse_mem_spec(config::buffer_pool_limit, global_memory_limit_bytes, - MemInfo::physical_mem(), &is_percent); + int64_t buffer_pool_limit = ParseUtil::parse_mem_spec( + config::buffer_pool_limit, MemInfo::mem_limit(), MemInfo::physical_mem(), &is_percent); if (buffer_pool_limit <= 0) { ss << "Invalid config buffer_pool_limit value, must be a percentage or " "positive bytes value or percentage: " @@ -242,7 +203,7 @@ Status ExecEnv::_init_mem_tracker() { return Status::InternalError(ss.str()); } buffer_pool_limit = BitUtil::RoundDown(buffer_pool_limit, config::min_buffer_size); - while (!is_percent && buffer_pool_limit > global_memory_limit_bytes / 2) { + while (!is_percent && buffer_pool_limit > MemInfo::mem_limit() / 2) { // If buffer_pool_limit is not a percentage, and the value exceeds 50% of the total memory limit, // it is forced to be reduced to less than 50% of the total memory limit. // This is to ensure compatibility. In principle, buffer_pool_limit should be set as a percentage. @@ -271,9 +232,9 @@ Status ExecEnv::_init_mem_tracker() { // 3. init storage page cache int64_t storage_cache_limit = - ParseUtil::parse_mem_spec(config::storage_page_cache_limit, global_memory_limit_bytes, + ParseUtil::parse_mem_spec(config::storage_page_cache_limit, MemInfo::mem_limit(), MemInfo::physical_mem(), &is_percent); - while (!is_percent && storage_cache_limit > global_memory_limit_bytes / 2) { + while (!is_percent && storage_cache_limit > MemInfo::mem_limit() / 2) { // Reason same as buffer_pool_limit storage_cache_limit = storage_cache_limit / 2; } @@ -301,7 +262,7 @@ Status ExecEnv::_init_mem_tracker() { SegmentLoader::create_global_instance(segment_cache_capacity); // 4. init other managers - RETURN_IF_ERROR(_disk_io_mgr->init(global_memory_limit_bytes)); + RETURN_IF_ERROR(_disk_io_mgr->init(MemInfo::mem_limit())); RETURN_IF_ERROR(_tmp_file_mgr->init()); // 5. init chunk allocator @@ -312,7 +273,7 @@ Status ExecEnv::_init_mem_tracker() { } int64_t chunk_reserved_bytes_limit = - ParseUtil::parse_mem_spec(config::chunk_reserved_bytes_limit, global_memory_limit_bytes, + ParseUtil::parse_mem_spec(config::chunk_reserved_bytes_limit, MemInfo::mem_limit(), MemInfo::physical_mem(), &is_percent); if (chunk_reserved_bytes_limit <= 0) { ss << "Invalid config chunk_reserved_bytes_limit value, must be a percentage or " @@ -408,12 +369,8 @@ void ExecEnv::_destroy() { SAFE_DELETE(_routine_load_task_executor); SAFE_DELETE(_external_scan_context_mgr); SAFE_DELETE(_heartbeat_flags); - SAFE_DELETE(_task_pool_mem_tracker_registry); SAFE_DELETE(_scanner_scheduler); - DEREGISTER_HOOK_METRIC(query_mem_consumption); - DEREGISTER_HOOK_METRIC(load_mem_consumption); - _is_init = false; } diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp index 8b5a1c5c44..8085ddf13b 100644 --- a/be/src/runtime/fragment_mgr.cpp +++ b/be/src/runtime/fragment_mgr.cpp @@ -640,6 +640,30 @@ Status FragmentMgr::exec_plan_fragment(const TExecPlanFragmentParams& params, Fi fragments_ctx->timeout_second = params.query_options.query_timeout; _set_scan_concurrency(params, fragments_ctx.get()); + bool has_query_mem_tracker = + params.query_options.__isset.mem_limit && (params.query_options.mem_limit > 0); + int64_t bytes_limit = has_query_mem_tracker ? params.query_options.mem_limit : -1; + if (bytes_limit > MemInfo::mem_limit()) { + VLOG_NOTICE << "Query memory limit " << PrettyPrinter::print(bytes_limit, TUnit::BYTES) + << " exceeds process memory limit of " + << PrettyPrinter::print(MemInfo::mem_limit(), TUnit::BYTES) + << ". Using process memory limit instead"; + bytes_limit = MemInfo::mem_limit(); + } + if (params.query_options.query_type == TQueryType::SELECT) { + fragments_ctx->query_mem_tracker = std::make_shared( + MemTrackerLimiter::Type::QUERY, + fmt::format("Query#Id={}", print_id(fragments_ctx->query_id)), bytes_limit); + } else if (params.query_options.query_type == TQueryType::LOAD) { + fragments_ctx->query_mem_tracker = std::make_shared( + MemTrackerLimiter::Type::LOAD, + fmt::format("Load#Id={}", print_id(fragments_ctx->query_id)), bytes_limit); + } + if (params.query_options.__isset.is_report_success && + params.query_options.is_report_success) { + fragments_ctx->query_mem_tracker->enable_print_log_usage(); + } + { // Find _fragments_ctx_map again, in case some other request has already // create the query fragments context. @@ -647,6 +671,9 @@ Status FragmentMgr::exec_plan_fragment(const TExecPlanFragmentParams& params, Fi auto search = _fragments_ctx_map.find(params.params.query_id); if (search == _fragments_ctx_map.end()) { _fragments_ctx_map.insert(std::make_pair(fragments_ctx->query_id, fragments_ctx)); + LOG(INFO) << "Register query/load memory tracker, query/load id: " + << print_id(fragments_ctx->query_id) + << " limit: " << PrettyPrinter::print(bytes_limit, TUnit::BYTES); } else { // Already has a query fragmentscontext, use it fragments_ctx = search->second; diff --git a/be/src/runtime/load_channel_mgr.cpp b/be/src/runtime/load_channel_mgr.cpp index 8292b25656..6eef349fe1 100644 --- a/be/src/runtime/load_channel_mgr.cpp +++ b/be/src/runtime/load_channel_mgr.cpp @@ -71,6 +71,8 @@ Status LoadChannelMgr::init(int64_t process_mem_limit) { _load_hard_mem_limit = calc_process_max_load_memory(process_mem_limit); _load_soft_mem_limit = _load_hard_mem_limit * config::load_process_soft_mem_limit_percent / 100; _mem_tracker = std::make_unique("LoadChannelMgr"); + _mem_tracker_set = std::make_unique(MemTrackerLimiter::Type::LOAD, + "LoadChannelMgrTrackerSet"); REGISTER_HOOK_METRIC(load_channel_mem_consumption, [this]() { return _mem_tracker->consumption(); }); _last_success_channel = new_lru_cache("LastestSuccessChannelCache", 1024); @@ -94,8 +96,15 @@ Status LoadChannelMgr::open(const PTabletWriterOpenRequest& params) { bool is_high_priority = (params.has_is_high_priority() && params.is_high_priority()); // Use the same mem limit as LoadChannelMgr for a single load channel +#ifndef BE_TEST + auto channel_mem_tracker = std::make_unique( + fmt::format("LoadChannel#senderIp={}#loadID={}", params.sender_ip(), + load_id.to_string()), + nullptr, ExecEnv::GetInstance()->load_channel_mgr()->mem_tracker_set()); +#else auto channel_mem_tracker = std::make_unique(fmt::format( "LoadChannel#senderIp={}#loadID={}", params.sender_ip(), load_id.to_string())); +#endif channel.reset(new LoadChannel(load_id, std::move(channel_mem_tracker), channel_timeout_s, is_high_priority, params.sender_ip(), params.is_vectorized())); diff --git a/be/src/runtime/load_channel_mgr.h b/be/src/runtime/load_channel_mgr.h index 6b30503f0c..3f27eafd0e 100644 --- a/be/src/runtime/load_channel_mgr.h +++ b/be/src/runtime/load_channel_mgr.h @@ -67,6 +67,7 @@ public: } _mem_tracker->set_consumption(mem_usage); } + MemTrackerLimiter* mem_tracker_set() { return _mem_tracker_set.get(); } private: template @@ -91,6 +92,8 @@ protected: // check the total load channel mem consumption of this Backend std::unique_ptr _mem_tracker; + // Associate load channel tracker and memtable tracker, avoid default association to Orphan tracker. + std::unique_ptr _mem_tracker_set; int64_t _load_hard_mem_limit = -1; int64_t _load_soft_mem_limit = -1; diff --git a/be/src/runtime/mem_pool.cpp b/be/src/runtime/mem_pool.cpp index d49389353d..6d2760b389 100644 --- a/be/src/runtime/mem_pool.cpp +++ b/be/src/runtime/mem_pool.cpp @@ -134,7 +134,7 @@ Status MemPool::find_chunk(size_t min_size, bool check_limits) { chunk_size = BitUtil::RoundUpToPowerOfTwo(chunk_size); if (check_limits && - !thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->check_limit( + !thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit( chunk_size)) { return Status::MemoryAllocFailed("MemPool find new chunk {} bytes faild, exceed limit", chunk_size); diff --git a/be/src/runtime/memory/chunk_allocator.cpp b/be/src/runtime/memory/chunk_allocator.cpp index 43acc79538..b812734b4c 100644 --- a/be/src/runtime/memory/chunk_allocator.cpp +++ b/be/src/runtime/memory/chunk_allocator.cpp @@ -135,7 +135,8 @@ ChunkAllocator::ChunkAllocator(size_t reserve_limit) _steal_arena_limit(reserve_limit * 0.1), _reserved_bytes(0), _arenas(CpuInfo::get_max_num_cores()) { - _mem_tracker = std::make_unique(-1, "ChunkAllocator"); + _mem_tracker = + std::make_unique(MemTrackerLimiter::Type::GLOBAL, "ChunkAllocator"); for (int i = 0; i < _arenas.size(); ++i) { _arenas[i].reset(new ChunkArena()); } diff --git a/be/src/runtime/memory/mem_tracker.cpp b/be/src/runtime/memory/mem_tracker.cpp index 0604d538dc..bf7e308ff3 100644 --- a/be/src/runtime/memory/mem_tracker.cpp +++ b/be/src/runtime/memory/mem_tracker.cpp @@ -41,7 +41,8 @@ struct TrackerGroup { // Multiple groups are used to reduce the impact of locks. static std::vector mem_tracker_pool(1000); -MemTracker::MemTracker(const std::string& label, RuntimeProfile* profile) { +MemTracker::MemTracker(const std::string& label, RuntimeProfile* profile, MemTrackerLimiter* parent) + : _label(label) { if (profile == nullptr) { _consumption = std::make_shared(TUnit::BYTES); } else { @@ -57,75 +58,57 @@ MemTracker::MemTracker(const std::string& label, RuntimeProfile* profile) { _consumption = profile->AddSharedHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES); } - DCHECK(thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw() != nullptr); - MemTrackerLimiter* parent = - thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw(); - _label = fmt::format("[Observer] {} | {}", label, parent->label()); - _bind_group_num = parent->group_num(); + if (parent) { + _parent_label = parent->label(); + _parent_group_num = parent->group_num(); + } else { + DCHECK(thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker() != nullptr); + _parent_label = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->label(); + _parent_group_num = + thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->group_num(); + } { - std::lock_guard l(mem_tracker_pool[_bind_group_num].group_lock); - _tracker_group_it = mem_tracker_pool[_bind_group_num].trackers.insert( - mem_tracker_pool[_bind_group_num].trackers.end(), this); + std::lock_guard l(mem_tracker_pool[_parent_group_num].group_lock); + _tracker_group_it = mem_tracker_pool[_parent_group_num].trackers.insert( + mem_tracker_pool[_parent_group_num].trackers.end(), this); } } MemTracker::~MemTracker() { - if (_bind_group_num != -1) { - std::lock_guard l(mem_tracker_pool[_bind_group_num].group_lock); - if (_tracker_group_it != mem_tracker_pool[_bind_group_num].trackers.end()) { - mem_tracker_pool[_bind_group_num].trackers.erase(_tracker_group_it); - _tracker_group_it = mem_tracker_pool[_bind_group_num].trackers.end(); + if (_parent_group_num != -1) { + std::lock_guard l(mem_tracker_pool[_parent_group_num].group_lock); + if (_tracker_group_it != mem_tracker_pool[_parent_group_num].trackers.end()) { + mem_tracker_pool[_parent_group_num].trackers.erase(_tracker_group_it); + _tracker_group_it = mem_tracker_pool[_parent_group_num].trackers.end(); } } } -MemTracker::Snapshot MemTracker::make_snapshot(size_t level) const { +MemTracker::Snapshot MemTracker::make_snapshot() const { Snapshot snapshot; - snapshot.label = split(_label, " | ")[0]; - snapshot.parent = split(_label, " | ")[1]; - snapshot.level = level; + snapshot.label = _label; + snapshot.parent_label = _parent_label; snapshot.limit = -1; snapshot.cur_consumption = _consumption->current_value(); snapshot.peak_consumption = _consumption->value(); - snapshot.child_count = 0; return snapshot; } -void MemTracker::make_group_snapshot(std::vector* snapshots, size_t level, - int64_t group_num, std::string related_label) { +void MemTracker::make_group_snapshot(std::vector* snapshots, + int64_t group_num, std::string parent_label) { std::lock_guard l(mem_tracker_pool[group_num].group_lock); for (auto tracker : mem_tracker_pool[group_num].trackers) { - if (split(tracker->label(), " | ")[1] == related_label) { - snapshots->push_back(tracker->make_snapshot(level)); + if (tracker->parent_label() == parent_label) { + snapshots->push_back(tracker->make_snapshot()); } } } std::string MemTracker::log_usage(MemTracker::Snapshot snapshot) { return fmt::format("MemTracker Label={}, Parent Label={}, Used={}({} B), Peak={}({} B)", - snapshot.label, snapshot.parent, print_bytes(snapshot.cur_consumption), + snapshot.label, snapshot.type, print_bytes(snapshot.cur_consumption), snapshot.cur_consumption, print_bytes(snapshot.peak_consumption), snapshot.peak_consumption); } -static std::unordered_map> global_mem_trackers; -static std::mutex global_trackers_lock; - -std::shared_ptr MemTracker::get_global_mem_tracker(const std::string& label) { - std::lock_guard l(global_trackers_lock); - if (global_mem_trackers.find(label) != global_mem_trackers.end()) { - return global_mem_trackers[label]; - } else { - global_mem_trackers.emplace( - label, std::make_shared(fmt::format("[Global] {}", label))); - return global_mem_trackers[label]; - } -} - -void MemTracker::make_global_mem_tracker_snapshot(std::vector* snapshots) { - std::lock_guard l(global_trackers_lock); - for (auto& v : global_mem_trackers) { - snapshots->push_back(v.second->make_snapshot(1)); - } -} } // namespace doris \ No newline at end of file diff --git a/be/src/runtime/memory/mem_tracker.h b/be/src/runtime/memory/mem_tracker.h index 01a0d58cdb..5ffcaa30a1 100644 --- a/be/src/runtime/memory/mem_tracker.h +++ b/be/src/runtime/memory/mem_tracker.h @@ -24,6 +24,8 @@ namespace doris { +class MemTrackerLimiter; + // Used to track memory usage. // // MemTracker can be consumed manually by consume()/release(), or put into SCOPED_CONSUME_MEM_TRACKER, @@ -33,30 +35,22 @@ namespace doris { class MemTracker { public: struct Snapshot { + std::string type = ""; std::string label; - // For MemTracker, it is only weakly related to parent through label, ensuring MemTracker Independence. - // For MemTrackerLimiter, it is strongly related to parent and saves pointer objects to each other. - std::string parent = ""; - size_t level = 0; + std::string parent_label = ""; int64_t limit = 0; int64_t cur_consumption = 0; int64_t peak_consumption = 0; - size_t child_count = 0; }; // Creates and adds the tracker to the mem_tracker_pool. - MemTracker(const std::string& label, RuntimeProfile* profile = nullptr); + MemTracker(const std::string& label, RuntimeProfile* profile = nullptr, + MemTrackerLimiter* parent = nullptr); // For MemTrackerLimiter - MemTracker() { _bind_group_num = -1; } + MemTracker() { _parent_group_num = -1; } ~MemTracker(); - // Get a global tracker with a specified label, and the tracker will be created when the label is first get. - // use SCOPED_CONSUME_MEM_TRACKER count the memory in the scope to a global tracker with the specified label name. - // which is usually used for debugging, to finding memory hotspots. - static std::shared_ptr get_global_mem_tracker(const std::string& label); - static void make_global_mem_tracker_snapshot(std::vector* snapshots); - static std::string print_bytes(int64_t bytes) { return bytes >= 0 ? PrettyPrinter::print(bytes, TUnit::BYTES) : "-" + PrettyPrinter::print(std::abs(bytes), TUnit::BYTES); @@ -64,28 +58,23 @@ public: public: const std::string& label() const { return _label; } + const std::string& parent_label() const { return _parent_label; } // Returns the memory consumed in bytes. int64_t consumption() const { return _consumption->current_value(); } int64_t peak_consumption() const { return _consumption->value(); } - void consume(int64_t bytes); + void consume(int64_t bytes) { + if (bytes == 0) return; + _consumption->add(bytes); + } void release(int64_t bytes) { consume(-bytes); } - // Transfer 'bytes' of consumption from this tracker to 'dst'. - void transfer_to(MemTracker* dst, int64_t bytes); void set_consumption(int64_t bytes) { _consumption->set(bytes); } public: - bool limit_exceeded(int64_t limit) const { return limit >= 0 && limit < consumption(); } - // Return true, no exceeded limit - bool check_limit(int64_t limit, int64_t bytes) const { - return limit >= 0 && limit > consumption() + bytes; - } - - Snapshot make_snapshot(size_t level) const; - // Specify group_num from mem_tracker_pool to generate snapshot, requiring tracker.label to be related - // with parameter related_label - static void make_group_snapshot(std::vector* snapshots, size_t level, - int64_t group_num, std::string related_label); + Snapshot make_snapshot() const; + // Specify group_num from mem_tracker_pool to generate snapshot. + static void make_group_snapshot(std::vector* snapshots, int64_t group_num, + std::string parent_label); static std::string log_usage(MemTracker::Snapshot snapshot); std::string debug_string() { @@ -105,23 +94,11 @@ protected: std::shared_ptr _consumption; // in bytes // Tracker is located in group num in mem_tracker_pool - int64_t _bind_group_num; + int64_t _parent_group_num; + std::string _parent_label; // Iterator into mem_tracker_pool for this object. Stored to have O(1) remove. std::list::iterator _tracker_group_it; }; -inline void MemTracker::consume(int64_t bytes) { - if (bytes == 0) { - return; - } else { - _consumption->add(bytes); - } -} - -inline void MemTracker::transfer_to(MemTracker* dst, int64_t bytes) { - release(bytes); - dst->consume(bytes); -} - } // namespace doris \ No newline at end of file diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp index 20639326f8..1fca6009ee 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.cpp +++ b/be/src/runtime/memory/mem_tracker_limiter.cpp @@ -22,7 +22,6 @@ #include #include "gutil/once.h" -#include "gutil/walltime.h" #include "runtime/runtime_state.h" #include "runtime/thread_context.h" #include "util/pretty_printer.h" @@ -30,8 +29,19 @@ namespace doris { -MemTrackerLimiter::MemTrackerLimiter(int64_t byte_limit, const std::string& label, - const std::shared_ptr& parent, +struct TrackerLimiterGroup { + std::list trackers; + std::mutex group_lock; +}; + +// Save all MemTrackerLimiters in use. +// Each group corresponds to several MemTrackerLimiters and has a lock. +// Multiple groups are used to reduce the impact of locks. +static std::vector mem_tracker_limiter_pool(1000); + +std::atomic MemTrackerLimiter::_enable_print_log_process_usage {true}; + +MemTrackerLimiter::MemTrackerLimiter(Type type, const std::string& label, int64_t byte_limit, RuntimeProfile* profile) { DCHECK_GE(byte_limit, -1); if (profile == nullptr) { @@ -39,268 +49,215 @@ MemTrackerLimiter::MemTrackerLimiter(int64_t byte_limit, const std::string& labe } else { _consumption = profile->AddSharedHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES); } + _type = type; _label = label; _limit = byte_limit; - _group_num = GetCurrentTimeMicros() % 1000; - if (parent || label == "Process") { - _parent = parent; - } else if (thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->label() == - "Orphan") { - _parent = ExecEnv::GetInstance()->process_mem_tracker(); + if (_type == Type::GLOBAL) { + _group_num = 0; } else { - _parent = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker(); + _group_num = random() % 999 + 1; } - DCHECK(_parent || label == "Process"); - - // Walks the MemTrackerLimiter hierarchy and populates _all_ancestors and _limited_ancestors - MemTrackerLimiter* tracker = this; - while (tracker != nullptr) { - _all_ancestors.push_back(tracker); - // Process tracker does not participate in the process memory limit, process tracker consumption is virtual memory, - // and there is a diff between the real physical memory value of the process. It is replaced by check_sys_mem_info. - if (tracker->has_limit() && tracker->label() != "Process") - _limited_ancestors.push_back(tracker); - tracker = tracker->_parent.get(); - } - DCHECK_GT(_all_ancestors.size(), 0); - DCHECK_EQ(_all_ancestors[0], this); - if (_parent) { - std::lock_guard l(_parent->_child_tracker_limiter_lock); - _child_tracker_it = _parent->_child_tracker_limiters.insert( - _parent->_child_tracker_limiters.end(), this); - _had_child_count++; + { + std::lock_guard l(mem_tracker_limiter_pool[_group_num].group_lock); + _tracker_limiter_group_it = mem_tracker_limiter_pool[_group_num].trackers.insert( + mem_tracker_limiter_pool[_group_num].trackers.end(), this); } } MemTrackerLimiter::~MemTrackerLimiter() { - // TCMalloc hook will be triggered during destructor memtracker, may cause crash. - if (_label == "Process") doris::thread_context_ptr.init = false; - DCHECK(remain_child_count() == 0 || _label == "Process"); + // mem hook record tracker cannot guarantee that the final consumption is 0, + // nor can it guarantee that the memory alloc and free are recorded in a one-to-one correspondence. // In order to ensure `consumption of all limiter trackers` + `orphan tracker consumption` = `process tracker consumption` // in real time. Merge its consumption into orphan when parent is process, to avoid repetition. - if (_parent && _parent->label() == "Process") { - ExecEnv::GetInstance()->orphan_mem_tracker_raw()->cache_consume_local( - _consumption->current_value()); - } - if (_reset_zero) { - reset_zero(); - _all_ancestors.clear(); - _all_ancestors.push_back(ExecEnv::GetInstance()->orphan_mem_tracker_raw()); - } - consume_local(_untracked_mem); - if (_parent) { - std::lock_guard l(_parent->_child_tracker_limiter_lock); - if (_child_tracker_it != _parent->_child_tracker_limiters.end()) { - _parent->_child_tracker_limiters.erase(_child_tracker_it); - _child_tracker_it = _parent->_child_tracker_limiters.end(); + ExecEnv::GetInstance()->orphan_mem_tracker()->consume(_consumption->current_value()); + _consumption->set(0); + { + std::lock_guard l(mem_tracker_limiter_pool[_group_num].group_lock); + if (_tracker_limiter_group_it != mem_tracker_limiter_pool[_group_num].trackers.end()) { + mem_tracker_limiter_pool[_group_num].trackers.erase(_tracker_limiter_group_it); + _tracker_limiter_group_it = mem_tracker_limiter_pool[_group_num].trackers.end(); } } } -MemTracker::Snapshot MemTrackerLimiter::make_snapshot(size_t level) const { +MemTracker::Snapshot MemTrackerLimiter::make_snapshot() const { Snapshot snapshot; + snapshot.type = TypeString[_type]; snapshot.label = _label; - snapshot.parent = _parent != nullptr ? _parent->label() : "Root"; - snapshot.level = level; snapshot.limit = _limit; snapshot.cur_consumption = _consumption->current_value(); snapshot.peak_consumption = _consumption->value(); - snapshot.child_count = remain_child_count(); return snapshot; } -void MemTrackerLimiter::make_snapshot(std::vector* snapshots, - size_t cur_level, size_t upper_level) const { - Snapshot snapshot = MemTrackerLimiter::make_snapshot(cur_level); +void MemTrackerLimiter::refresh_global_counter() { + std::unordered_map type_mem_sum = { + {Type::GLOBAL, 0}, {Type::QUERY, 0}, {Type::LOAD, 0}, + {Type::COMPACTION, 0}, {Type::SCHEMA_CHANGE, 0}, {Type::CLONE, 0}, + {Type::BATCHLOAD, 0}, {Type::CONSISTENCY, 0}}; + for (unsigned i = 0; i < mem_tracker_limiter_pool.size(); ++i) { + std::lock_guard l(mem_tracker_limiter_pool[i].group_lock); + for (auto tracker : mem_tracker_limiter_pool[i].trackers) { + type_mem_sum[tracker->type()] += tracker->consumption(); + } + } + for (auto it : type_mem_sum) { + MemTrackerLimiter::TypeMemSum[it.first]->set(it.second); + } +} + +void MemTrackerLimiter::make_process_snapshots(std::vector* snapshots) { + MemTrackerLimiter::refresh_global_counter(); + int64_t process_mem_sum = 0; + Snapshot snapshot; + for (auto it : MemTrackerLimiter::TypeMemSum) { + snapshot.type = TypeString[it.first]; + snapshot.label = ""; + snapshot.limit = -1; + snapshot.cur_consumption = it.second->current_value(); + snapshot.peak_consumption = it.second->value(); + (*snapshots).emplace_back(snapshot); + process_mem_sum += it.second->current_value(); + } + + snapshot.type = "tc/jemalloc_cache"; + snapshot.label = ""; + snapshot.limit = -1; + snapshot.cur_consumption = MemInfo::allocator_cache_mem(); + snapshot.peak_consumption = -1; (*snapshots).emplace_back(snapshot); - if (cur_level < upper_level) { - { - std::lock_guard l(_child_tracker_limiter_lock); - for (const auto& child : _child_tracker_limiters) { - child->make_snapshot(snapshots, cur_level + 1, upper_level); + process_mem_sum += MemInfo::allocator_cache_mem(); + + snapshot.type = "process"; + snapshot.label = ""; + snapshot.limit = -1; + snapshot.cur_consumption = process_mem_sum; + snapshot.peak_consumption = -1; + (*snapshots).emplace_back(snapshot); +} + +void MemTrackerLimiter::make_type_snapshots(std::vector* snapshots, + MemTrackerLimiter::Type type) { + if (type == Type::GLOBAL) { + std::lock_guard l(mem_tracker_limiter_pool[0].group_lock); + for (auto tracker : mem_tracker_limiter_pool[0].trackers) { + (*snapshots).emplace_back(tracker->make_snapshot()); + MemTracker::make_group_snapshot(snapshots, tracker->group_num(), tracker->label()); + } + } else { + for (unsigned i = 1; i < mem_tracker_limiter_pool.size(); ++i) { + std::lock_guard l(mem_tracker_limiter_pool[i].group_lock); + for (auto tracker : mem_tracker_limiter_pool[i].trackers) { + if (tracker->type() == type) { + (*snapshots).emplace_back(tracker->make_snapshot()); + MemTracker::make_group_snapshot(snapshots, tracker->group_num(), + tracker->label()); + } } } - MemTracker::make_group_snapshot(snapshots, cur_level + 1, _group_num, _label); } } -int64_t MemTrackerLimiter::spare_capacity() const { - int64_t result = std::numeric_limits::max(); - for (const auto& tracker : _limited_ancestors) { - int64_t mem_left = tracker->limit() - tracker->consumption(); - result = std::min(result, mem_left); - } - return result; -} - -int64_t MemTrackerLimiter::get_lowest_limit() const { - if (_limited_ancestors.empty()) return -1; - int64_t min_limit = std::numeric_limits::max(); - for (const auto& tracker : _limited_ancestors) { - DCHECK(tracker->has_limit()); - min_limit = std::min(min_limit, tracker->limit()); - } - return min_limit; -} - -// Calling this on the query tracker results in output like: -// -// Query(4a4c81fedaed337d:4acadfda00000000) Limit=10.00 GB Total=508.28 MB Peak=508.45 MB -// Fragment 4a4c81fedaed337d:4acadfda00000000: Total=8.00 KB Peak=8.00 KB -// EXCHANGE_NODE (id=4): Total=0 Peak=0 -// DataStreamRecvr: Total=0 Peak=0 -// Block Manager: Limit=6.68 GB Total=394.00 MB Peak=394.00 MB -// Fragment 4a4c81fedaed337d:4acadfda00000006: Total=233.72 MB Peak=242.24 MB -// AGGREGATION_NODE (id=1): Total=139.21 MB Peak=139.84 MB -// HDFS_SCAN_NODE (id=0): Total=93.94 MB Peak=102.24 MB -// DataStreamSender (dst_id=2): Total=45.99 KB Peak=85.99 KB -// Fragment 4a4c81fedaed337d:4acadfda00000003: Total=274.55 MB Peak=274.62 MB -// AGGREGATION_NODE (id=3): Total=274.50 MB Peak=274.50 MB -// EXCHANGE_NODE (id=2): Total=0 Peak=0 -// DataStreamRecvr: Total=45.91 KB Peak=684.07 KB -// DataStreamSender (dst_id=4): Total=680.00 B Peak=680.00 B -// -// If 'reservation_metrics_' are set, we ge a more granular breakdown: -// TrackerName: Limit=5.00 MB Reservation=5.00 MB OtherMemory=1.04 MB -// Total=6.04 MB Peak=6.45 MB -// -std::string MemTrackerLimiter::log_usage(int max_recursive_depth, int64_t* logged_consumption) { - int64_t curr_consumption = consumption(); - int64_t peak_consumption = _consumption->value(); - if (logged_consumption != nullptr) *logged_consumption = curr_consumption; - - std::string detail = - "MemTrackerLimiter Label={}, Limit={}({} B), Used={}({} B), Peak={}({} B), Exceeded={}"; - detail = fmt::format(detail, _label, print_bytes(_limit), _limit, print_bytes(curr_consumption), - curr_consumption, print_bytes(peak_consumption), peak_consumption, - limit_exceeded() ? "true" : "false"); - - // This call does not need the children, so return early. - if (max_recursive_depth == 0) return detail; - - // Recurse and get information about the children - int64_t child_consumption; - std::string child_trackers_usage; - { - std::lock_guard l(_child_tracker_limiter_lock); - child_trackers_usage = - log_usage(max_recursive_depth - 1, _child_tracker_limiters, &child_consumption); - } - std::vector snapshots; - MemTracker::make_group_snapshot(&snapshots, 0, _group_num, _label); - for (const auto& snapshot : snapshots) { - child_trackers_usage += "\n " + MemTracker::log_usage(snapshot); - } - if (!child_trackers_usage.empty()) detail += child_trackers_usage; - return detail; -} - -std::string MemTrackerLimiter::log_usage(int max_recursive_depth, - const std::list& trackers, - int64_t* logged_consumption) { - *logged_consumption = 0; - std::vector usage_strings; - for (const auto& tracker : trackers) { - int64_t tracker_consumption; - std::string usage_string = tracker->log_usage(max_recursive_depth, &tracker_consumption); - if (!usage_string.empty()) usage_strings.push_back(usage_string); - *logged_consumption += tracker_consumption; - } - return usage_strings.size() == 0 ? "" : "\n " + join(usage_strings, "\n "); +std::string MemTrackerLimiter::log_usage(MemTracker::Snapshot snapshot) { + return fmt::format( + "MemTrackerLimiter Label={}, Type={}, Limit={}({} B), Used={}({} B), Peak={}({} B)", + snapshot.label, snapshot.type, print_bytes(snapshot.limit), snapshot.limit, + print_bytes(snapshot.cur_consumption), snapshot.cur_consumption, + print_bytes(snapshot.peak_consumption), snapshot.peak_consumption); } void MemTrackerLimiter::print_log_usage(const std::string& msg) { - // only print the tracker log_usage in be log. std::string detail = msg; - detail += "\n " + fmt::format( - "process memory used {}, limit {}, hard limit {}, tc/jemalloc " - "allocator cache {}", - PerfCounters::get_vm_rss_str(), MemInfo::mem_limit_str(), - print_bytes(MemInfo::hard_mem_limit()), - MemInfo::allocator_cache_mem_str()); - if (_print_log_usage) { - if (_label == "Process") { - // Dumping the process MemTracker is expensive. Limiting the recursive depth to two - // levels limits the level of detail to a one-line summary for each query MemTracker. - detail += "\n " + log_usage(2); - } else { - detail += "\n " + log_usage(); + detail += "\n " + MemTrackerLimiter::process_mem_log_str(); + if (_enable_print_log_usage) { + detail += log_usage(); + std::string child_trackers_usage; + std::vector snapshots; + MemTracker::make_group_snapshot(&snapshots, _group_num, _label); + for (const auto& snapshot : snapshots) { + child_trackers_usage += "\n " + MemTracker::log_usage(snapshot); } + if (!child_trackers_usage.empty()) detail += child_trackers_usage; + // TODO: memory leak by calling `boost::stacktrace` in tcmalloc hook, // test whether overwriting malloc/free is the same problem in jemalloc/tcmalloc. // detail += "\n" + boost::stacktrace::to_string(boost::stacktrace::stacktrace()); LOG(WARNING) << detail; - _print_log_usage = false; + _enable_print_log_usage = false; } } -std::string MemTrackerLimiter::mem_limit_exceeded(const std::string& msg, - int64_t failed_allocation_size) { - STOP_CHECK_THREAD_MEM_TRACKER_LIMIT(); - std::string detail = fmt::format("Memory limit exceeded:, ", _label); - MemTrackerLimiter* exceeded_tracker = nullptr; - MemTrackerLimiter* max_consumption_tracker = nullptr; - int64_t free_size = INT64_MAX; - // Find the tracker that exceed limit and has the least free. - for (const auto& tracker : _limited_ancestors) { - int64_t max_consumption = tracker->peak_consumption() > tracker->consumption() - ? tracker->peak_consumption() - : tracker->consumption(); - if (tracker->limit() < max_consumption + failed_allocation_size) { - exceeded_tracker = tracker; - break; - } - if (tracker->limit() - max_consumption < free_size) { - free_size = tracker->limit() - max_consumption; - max_consumption_tracker = tracker; - } +void MemTrackerLimiter::print_log_process_usage(const std::string& msg) { + MemTrackerLimiter::_enable_print_log_process_usage = false; + std::string detail = msg; + detail += "\n " + MemTrackerLimiter::process_mem_log_str(); + std::vector snapshots; + MemTrackerLimiter::make_process_snapshots(&snapshots); + MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::GLOBAL); + for (const auto& snapshot : snapshots) { + detail += "\n " + MemTrackerLimiter::log_usage(snapshot); } - - MemTrackerLimiter* print_log_usage_tracker = nullptr; - if (exceeded_tracker != nullptr) { - detail += limit_exceeded_errmsg_prefix_str(failed_allocation_size, exceeded_tracker); - print_log_usage_tracker = exceeded_tracker; - } else if (sys_mem_exceed_limit_check(failed_allocation_size)) { - detail += fmt::format("{}>, executing msg:<{}>", - limit_exceeded_errmsg_sys_str(failed_allocation_size), msg); - } else if (max_consumption_tracker != nullptr) { - // must after check_sys_mem_info false - detail += fmt::format( - "failed alloc size {}, max consumption tracker:<{}>, limit {}, peak used {}, " - "current used {}>, executing msg:<{}>", - print_bytes(failed_allocation_size), max_consumption_tracker->label(), - print_bytes(max_consumption_tracker->limit()), - print_bytes(max_consumption_tracker->peak_consumption()), - print_bytes(max_consumption_tracker->consumption()), msg); - print_log_usage_tracker = max_consumption_tracker; - } else { - // The limit of the current tracker and parents is less than 0, the consume will not fail, - // and the current process memory has no excess limit. - detail += fmt::format("unknown exceed reason, executing msg:<{}>", msg); - print_log_usage_tracker = ExecEnv::GetInstance()->process_mem_tracker().get(); - } - auto failed_msg = MemTrackerLimiter::limit_exceeded_errmsg_suffix_str(detail); - if (print_log_usage_tracker != nullptr) print_log_usage_tracker->print_log_usage(failed_msg); - return failed_msg; + LOG(WARNING) << detail; } std::string MemTrackerLimiter::mem_limit_exceeded(const std::string& msg, - MemTrackerLimiter* failed_tracker, const std::string& limit_exceeded_errmsg_prefix) { + DCHECK(_limit != -1); STOP_CHECK_THREAD_MEM_TRACKER_LIMIT(); - std::string detail = - fmt::format("Memory limit exceeded:, {}>, executing msg:<{}>", - _label, limit_exceeded_errmsg_prefix, msg); - auto failed_msg = MemTrackerLimiter::limit_exceeded_errmsg_suffix_str(detail); - failed_tracker->print_log_usage(failed_msg); - return failed_msg; + std::string detail = fmt::format( + "Memory limit exceeded:, {}>, executing msg:<{}>. backend {} " + "process memory used {}, limit {}. If query tracker exceed, `set " + "exec_mem_limit=8G` to change limit, details mem usage see be.INFO.", + _label, limit_exceeded_errmsg_prefix, msg, BackendOptions::get_localhost(), + PerfCounters::get_vm_rss_str(), MemInfo::mem_limit_str()); + print_log_usage(detail); + return detail; } -Status MemTrackerLimiter::mem_limit_exceeded(RuntimeState* state, const std::string& msg, - int64_t failed_alloc_size) { - auto failed_msg = mem_limit_exceeded(msg, failed_alloc_size); +Status MemTrackerLimiter::fragment_mem_limit_exceeded(RuntimeState* state, const std::string& msg, + int64_t failed_alloc_size) { + auto failed_msg = + mem_limit_exceeded(msg, tracker_limit_exceeded_errmsg_str(failed_alloc_size, this)); state->log_error(failed_msg); return Status::MemoryLimitExceeded(failed_msg); } +// TODO(zxy) More observable methods +// /// Logs the usage of 'limit' number of queries based on maximum total memory +// /// consumption. +// std::string MemTracker::LogTopNQueries(int limit) { +// if (limit == 0) return ""; +// priority_queue, std::vector>, +// std::greater>> +// min_pq; +// GetTopNQueries(min_pq, limit); +// std::vector usage_strings(min_pq.size()); +// while (!min_pq.empty()) { +// usage_strings.push_back(min_pq.top().second); +// min_pq.pop(); +// } +// std::reverse(usage_strings.begin(), usage_strings.end()); +// return join(usage_strings, "\n"); +// } + +// /// Helper function for LogTopNQueries that iterates through the MemTracker hierarchy +// /// and populates 'min_pq' with 'limit' number of elements (that contain state related +// /// to query MemTrackers) based on maximum total memory consumption. +// void MemTracker::GetTopNQueries( +// priority_queue, std::vector>, +// greater>>& min_pq, +// int limit) { +// list> children; +// { +// lock_guard l(child_trackers_lock_); +// children = child_trackers_; +// } +// for (const auto& child_weak : children) { +// shared_ptr child = child_weak.lock(); +// if (child) { +// child->GetTopNQueries(min_pq, limit); +// } +// } +// } + } // namespace doris diff --git a/be/src/runtime/memory/mem_tracker_limiter.h b/be/src/runtime/memory/mem_tracker_limiter.h index 6bc9449c20..2182e84f00 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.h +++ b/be/src/runtime/memory/mem_tracker_limiter.h @@ -31,45 +31,59 @@ namespace doris { class RuntimeState; // Track and limit the memory usage of process and query. -// Contains an limit, arranged into a tree structure, the consumption also tracked by its ancestors. +// Contains an limit, arranged into a tree structure. // // Automatically track every once malloc/free of the system memory allocator (Currently, based on TCMlloc hook). // Put Query MemTrackerLimiter into SCOPED_ATTACH_TASK when the thread starts,all memory used by this thread -// will be recorded on this Query, otherwise it will be recorded in Process Tracker by default. -// -// We use a five-level hierarchy of mem trackers: process, query pool, query, instance, node. -// The first four layers are MemTrackerLimiter with limit, and the fifth layer is MemTracker without limit. -// Specific parts of the fragment (exec nodes, sinks, etc) will add a fifth level when they are initialized. +// will be recorded on this Query, otherwise it will be recorded in Orphan Tracker by default. class MemTrackerLimiter final : public MemTracker { public: - // Creates and adds the tracker limiter to the tree - MemTrackerLimiter( - int64_t byte_limit = -1, const std::string& label = std::string(), - const std::shared_ptr& parent = std::shared_ptr(), - RuntimeProfile* profile = nullptr); + enum Type { + GLOBAL = 0, // Life cycle is the same as the process, e.g. Cache and default Orphan + QUERY = 1, // Count the memory consumption of all Query tasks. + LOAD = 2, // Count the memory consumption of all Load tasks. + COMPACTION = 3, // Count the memory consumption of all Base and Cumulative tasks. + SCHEMA_CHANGE = 4, // Count the memory consumption of all SchemaChange tasks. + CLONE = 5, // Count the memory consumption of all EngineCloneTask. Note: Memory that does not contain make/release snapshots. + BATCHLOAD = 6, // Count the memory consumption of all EngineBatchLoadTask. + CONSISTENCY = 7 // Count the memory consumption of all EngineChecksumTask. + }; - // If the final consumption is not as expected, this usually means that the same memory is calling - // consume and release on different trackers. If the two trackers have a parent-child relationship, - // the parent tracker consumption is correct, and the child tracker is wrong; if the two trackers have - // no parent-child relationship, the two tracker consumptions are wrong. - ~MemTrackerLimiter(); + inline static std::unordered_map> + TypeMemSum = {{Type::GLOBAL, + std::make_shared(TUnit::BYTES)}, + {Type::QUERY, + std::make_shared(TUnit::BYTES)}, + {Type::LOAD, + std::make_shared(TUnit::BYTES)}, + {Type::COMPACTION, + std::make_shared(TUnit::BYTES)}, + {Type::SCHEMA_CHANGE, + std::make_shared(TUnit::BYTES)}, + {Type::CLONE, + std::make_shared(TUnit::BYTES)}, + {Type::BATCHLOAD, + std::make_shared(TUnit::BYTES)}, + {Type::CONSISTENCY, + std::make_shared(TUnit::BYTES)}}; - std::shared_ptr parent() const { return _parent; } - - size_t remain_child_count() const { return _child_tracker_limiters.size(); } - size_t had_child_count() const { return _had_child_count; } - - Snapshot make_snapshot(size_t level) const; - // Returns a list of all the valid tracker snapshots. - void make_snapshot(std::vector* snapshots, size_t cur_level, - size_t upper_level) const; + inline static const std::string TypeString[] = {"global", "query", "load", + "compaction", "schema_change", "clone", + "batch_load", "consistency"}; public: + // byte_limit equal to -1 means no consumption limit, only participate in process memory statistics. + MemTrackerLimiter(Type type, const std::string& label = std::string(), int64_t byte_limit = -1, + RuntimeProfile* profile = nullptr); + + ~MemTrackerLimiter(); + static bool sys_mem_exceed_limit_check(int64_t bytes) { // Limit process memory usage using the actual physical memory of the process in `/proc/self/status`. // This is independent of the consumption value of the mem tracker, which counts the virtual memory // of the process malloc. // for fast, expect MemInfo::initialized() to be true. + // // tcmalloc/jemalloc allocator cache does not participate in the mem check as part of the process physical memory. // because `new/malloc` will trigger mem hook when using tcmalloc/jemalloc allocator cache, // but it may not actually alloc physical memory, which is not expected in mem hook fail. @@ -77,127 +91,78 @@ public: // TODO: In order to ensure no OOM, currently reserve 200M, and then use the free mem in /proc/meminfo to ensure no OOM. if (MemInfo::proc_mem_no_allocator_cache() + bytes >= MemInfo::mem_limit() || PerfCounters::get_vm_rss() + bytes >= MemInfo::hard_mem_limit()) { + print_log_process_usage("sys_mem_exceed_limit_check"); return true; } return false; } void set_consumption() { LOG(FATAL) << "MemTrackerLimiter set_consumption not supported"; } + Type type() const { return _type; } int64_t group_num() const { return _group_num; } bool has_limit() const { return _limit >= 0; } int64_t limit() const { return _limit; } - void update_limit(int64_t limit) { - DCHECK(has_limit()); - _limit = limit; - } bool limit_exceeded() const { return _limit >= 0 && _limit < consumption(); } - // Returns true if a valid limit of this tracker limiter or one of its ancestors is exceeded. - bool any_limit_exceeded() const { - for (const auto& tracker : _limited_ancestors) { - if (tracker->limit_exceeded()) { - return true; - } - } - return false; - } - Status check_limit(int64_t bytes); // Returns the maximum consumption that can be made without exceeding the limit on - // this tracker limiter or any of its parents. Returns int64_t::max() if there are no - // limits and a negative value if any limit is already exceeded. - int64_t spare_capacity() const; - - // Returns the lowest limit for this tracker limiter and its ancestors. Returns -1 if there is no limit. - int64_t get_lowest_limit() const; + // this tracker limiter. + int64_t spare_capacity() const { return _limit - consumption(); } public: - // up to (but not including) end_tracker. - // This happens when we want to update tracking on a particular mem tracker but the consumption - // against the limit recorded in one of its ancestors already happened. - // It is used for revise mem tracker consumption. - // If the location of memory alloc and free is different, the consumption value of mem tracker will be inaccurate. - // But the consumption value of the process mem tracker is not affecte - void cache_consume_local(int64_t bytes); + // If need to consume the tracker frequently, use it + void cache_consume(int64_t bytes); - // Will not change the value of process_mem_tracker, even though mem_tracker == process_mem_tracker. + // Transfer 'bytes' of consumption from this tracker to 'dst'. void transfer_to(int64_t size, MemTrackerLimiter* dst) { - cache_consume_local(-size); - dst->cache_consume_local(size); + cache_consume(-size); + dst->cache_consume(size); } - void enable_print_log_usage() { _print_log_usage = true; } - void enable_reset_zero() { _reset_zero = true; } + static void refresh_global_counter(); + Snapshot make_snapshot() const; + // Returns a list of all the valid tracker snapshots. + static void make_process_snapshots(std::vector* snapshots); + static void make_type_snapshots(std::vector* snapshots, Type type); - void reset_zero() { - ExecEnv::GetInstance()->orphan_mem_tracker_raw()->cache_consume_local( - _consumption->current_value()); - cache_consume_local(-_consumption->current_value()); - } - - // Logs the usage of this tracker limiter and optionally its children (recursively). - // If 'logged_consumption' is non-nullptr, sets the consumption value logged. - // 'max_recursive_depth' specifies the maximum number of levels of children - // to include in the dump. If it is zero, then no children are dumped. - // Limiting the recursive depth reduces the cost of dumping, particularly - // for the process tracker limiter. - std::string log_usage(int max_recursive_depth = INT_MAX, int64_t* logged_consumption = nullptr); + static std::string log_usage(MemTracker::Snapshot snapshot); + std::string log_usage() { return log_usage(make_snapshot()); } void print_log_usage(const std::string& msg); + void enable_print_log_usage() { _enable_print_log_usage = true; } + static void enable_print_log_process_usage() { _enable_print_log_process_usage = true; } + static void print_log_process_usage(const std::string& msg); - // Log the memory usage when memory limit is exceeded and return a status object with - // msg of the allocation which caused the limit to be exceeded. - // If 'failed_allocation_size' is greater than zero, logs the allocation size. If - // 'failed_allocation_size' is zero, nothing about the allocation size is logged. - // If 'state' is non-nullptr, logs the error to 'state'. - std::string mem_limit_exceeded(const std::string& msg, int64_t failed_allocation_size = 0); - std::string mem_limit_exceeded(const std::string& msg, MemTrackerLimiter* failed_tracker, + // Log the memory usage when memory limit is exceeded. + std::string mem_limit_exceeded(const std::string& msg, const std::string& limit_exceeded_errmsg_prefix); - Status mem_limit_exceeded(RuntimeState* state, const std::string& msg, - int64_t failed_allocation_size = 0); + Status fragment_mem_limit_exceeded(RuntimeState* state, const std::string& msg, + int64_t failed_allocation_size = 0); std::string debug_string() { std::stringstream msg; msg << "limit: " << _limit << "; " << "consumption: " << _consumption->current_value() << "; " << "label: " << _label << "; " - << "all ancestor size: " << _all_ancestors.size() - 1 << "; " - << "limited ancestor size: " << _limited_ancestors.size() - 1 << "; "; + << "type: " << TypeString[_type] << "; "; return msg.str(); } private: - // The following func, for automatic memory tracking and limiting based on system memory allocation. friend class ThreadMemTrackerMgr; - // Increases consumption of this tracker and its ancestors by 'bytes'. - void consume(int64_t bytes); - - // Decreases consumption of this tracker and its ancestors by 'bytes'. - void release(int64_t bytes) { consume(-bytes); } - - // Increases consumption of this tracker and its ancestors by 'bytes' only if - // they can all consume 'bytes' without exceeding limit. If limit would be exceed, - // no MemTrackerLimiters are updated. Returns true if the consumption was successfully updated. + // Increases consumption of this tracker by 'bytes' only if will not exceeding limit. + // Returns true if the consumption was successfully updated. WARN_UNUSED_RESULT bool try_consume(int64_t bytes, std::string& failed_msg); - void consume_local(int64_t bytes); - // When the accumulated untracked memory value exceeds the upper limit, // the current value is returned and set to 0. // Thread safety. int64_t add_untracked_mem(int64_t bytes); - // Log consumption of all the trackers provided. Returns the sum of consumption in - // 'logged_consumption'. 'max_recursive_depth' specifies the maximum number of levels - // of children to include in the dump. If it is zero, then no children are dumped. - static std::string log_usage(int max_recursive_depth, - const std::list& trackers, - int64_t* logged_consumption); - - static std::string limit_exceeded_errmsg_prefix_str(int64_t bytes, - MemTrackerLimiter* exceed_tracker) { + static std::string tracker_limit_exceeded_errmsg_str(int64_t bytes, + MemTrackerLimiter* exceed_tracker) { return fmt::format( "failed alloc size {}, exceeded tracker:<{}>, limit {}, peak " "used {}, current used {}", @@ -206,68 +171,43 @@ private: print_bytes(exceed_tracker->_consumption->current_value())); } - static std::string limit_exceeded_errmsg_suffix_str(const std::string& msg) { + static std::string process_limit_exceeded_errmsg_str(int64_t bytes) { return fmt::format( - "{}. backend {} process memory used {}, limit {}. If query tracker exceed, `set " - "exec_mem_limit=8G` to change limit, details mem usage see be.INFO.", - msg, BackendOptions::get_localhost(), PerfCounters::get_vm_rss_str(), - MemInfo::mem_limit_str()); - } - - static std::string limit_exceeded_errmsg_sys_str(int64_t bytes) { - auto err_msg = fmt::format( "process memory used {}, tc/jemalloc allocator cache {}, exceed limit {}, failed " "alloc size {}", PerfCounters::get_vm_rss_str(), MemInfo::allocator_cache_mem_str(), MemInfo::mem_limit_str(), print_bytes(bytes)); - ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err_msg); - return err_msg; + } + + static std::string process_mem_log_str() { + return fmt::format( + "process memory used {}, limit {}, hard limit {}, tc/jemalloc " + "allocator cache {}", + PerfCounters::get_vm_rss_str(), MemInfo::mem_limit_str(), + print_bytes(MemInfo::hard_mem_limit()), MemInfo::allocator_cache_mem_str()); } private: - // Limit on memory consumption, in bytes. If limit_ == -1, there is no consumption limit. Used in log_usage。 + Type _type; + + // Limit on memory consumption, in bytes. int64_t _limit; - // Group number in MemTracker::mem_tracker_pool, generated by the timestamp. + // Group number in MemTracker::mem_tracker_limiter_pool and MemTracker::mem_tracker_pool, generated by the timestamp. int64_t _group_num; - std::shared_ptr _parent; // The parent of this tracker. - - // this tracker limiter plus all of its ancestors - std::vector _all_ancestors; - // _all_ancestors with valid limits, except process tracker - std::vector _limited_ancestors; - // Consume size smaller than mem_tracker_consume_min_size_bytes will continue to accumulate // to avoid frequent calls to consume/release of MemTracker. std::atomic _untracked_mem = 0; - // Child trackers of this tracker limiter. Used for error reporting and - // listing only (i.e. updating the consumption of a parent tracker limiter does not - // update that of its children). - mutable std::mutex _child_tracker_limiter_lock; - std::list _child_tracker_limiters; - // Iterator into parent_->_child_tracker_limiters for this object. Stored to have O(1) remove. - std::list::iterator _child_tracker_it; + // Avoid frequent printing. + bool _enable_print_log_usage = false; + static std::atomic _enable_print_log_process_usage; - // The number of child trackers that have been added. - std::atomic_size_t _had_child_count = 0; - - bool _print_log_usage = false; - // mem hook record tracker cannot guarantee that the final consumption is 0, - // nor can it guarantee that the memory alloc and free are recorded in a one-to-one correspondence. - // In some cases, in order to avoid the cumulative error of the upper global tracker, - // the consumption of the current tracker is reset to zero. - bool _reset_zero = false; + // Iterator into mem_tracker_limiter_pool for this object. Stored to have O(1) remove. + std::list::iterator _tracker_limiter_group_it; }; -inline void MemTrackerLimiter::consume(int64_t bytes) { - if (bytes == 0) return; - for (auto& tracker : _all_ancestors) { - tracker->_consumption->add(bytes); - } -} - inline int64_t MemTrackerLimiter::add_untracked_mem(int64_t bytes) { _untracked_mem += bytes; if (std::abs(_untracked_mem) >= config::mem_tracker_consume_min_size_bytes) { @@ -276,18 +216,10 @@ inline int64_t MemTrackerLimiter::add_untracked_mem(int64_t bytes) { return 0; } -inline void MemTrackerLimiter::consume_local(int64_t bytes) { - if (bytes == 0) return; - for (auto& tracker : _all_ancestors) { - if (tracker->label() == "Process") return; - tracker->_consumption->add(bytes); - } -} - -inline void MemTrackerLimiter::cache_consume_local(int64_t bytes) { +inline void MemTrackerLimiter::cache_consume(int64_t bytes) { if (bytes == 0) return; int64_t consume_bytes = add_untracked_mem(bytes); - consume_local(consume_bytes); + consume(consume_bytes); } inline bool MemTrackerLimiter::try_consume(int64_t bytes, std::string& failed_msg) { @@ -297,30 +229,20 @@ inline bool MemTrackerLimiter::try_consume(int64_t bytes, std::string& failed_ms return true; } if (sys_mem_exceed_limit_check(bytes)) { - failed_msg = limit_exceeded_errmsg_sys_str(bytes); + failed_msg = process_limit_exceeded_errmsg_str(bytes); return false; } - int i; - // Walk the tracker tree top-down. - for (i = _all_ancestors.size() - 1; i >= 0; --i) { - MemTrackerLimiter* tracker = _all_ancestors[i]; - // Process tracker does not participate in the process memory limit, process tracker consumption is virtual memory, - // and there is a diff between the real physical memory value of the process. It is replaced by check_sys_mem_info. - if (tracker->limit() < 0 || tracker->label() == "Process") { - tracker->_consumption->add(bytes); // No limit at this tracker. - } else { - if (!tracker->_consumption->try_add(bytes, tracker->limit())) { - // Failed for this mem tracker. Roll back the ones that succeeded. - for (int j = _all_ancestors.size() - 1; j > i; --j) { - _all_ancestors[j]->_consumption->add(-bytes); - } - failed_msg = limit_exceeded_errmsg_prefix_str(bytes, tracker); - return false; - } + + if (_limit < 0) { + _consumption->add(bytes); // No limit at this tracker. + } else { + if (!_consumption->try_add(bytes, _limit)) { + // Failed for this mem tracker. Roll back the ones that succeeded. + _consumption->add(-bytes); + failed_msg = tracker_limit_exceeded_errmsg_str(bytes, this); + return false; } } - // Everyone succeeded, return. - DCHECK_EQ(i, -1); failed_msg = std::string(); return true; } @@ -328,15 +250,10 @@ inline bool MemTrackerLimiter::try_consume(int64_t bytes, std::string& failed_ms inline Status MemTrackerLimiter::check_limit(int64_t bytes) { if (bytes <= 0) return Status::OK(); if (sys_mem_exceed_limit_check(bytes)) { - return Status::MemoryLimitExceeded(limit_exceeded_errmsg_sys_str(bytes)); + return Status::MemoryLimitExceeded(process_limit_exceeded_errmsg_str(bytes)); } - int i; - // Walk the tracker tree top-down. - for (i = _limited_ancestors.size() - 1; i >= 0; --i) { - MemTrackerLimiter* tracker = _limited_ancestors[i]; - if (tracker->_consumption->current_value() + bytes > tracker->limit()) { - return Status::MemoryLimitExceeded(limit_exceeded_errmsg_prefix_str(bytes, tracker)); - } + if (_limit > 0 && _consumption->current_value() + bytes > _limit) { + return Status::MemoryLimitExceeded(tracker_limit_exceeded_errmsg_str(bytes, this)); } return Status::OK(); } diff --git a/be/src/runtime/memory/mem_tracker_task_pool.cpp b/be/src/runtime/memory/mem_tracker_task_pool.cpp deleted file mode 100644 index 28539703b4..0000000000 --- a/be/src/runtime/memory/mem_tracker_task_pool.cpp +++ /dev/null @@ -1,162 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "runtime/memory/mem_tracker_task_pool.h" - -#include "common/config.h" -#include "runtime/exec_env.h" -#include "util/pretty_printer.h" - -namespace doris { - -std::shared_ptr MemTrackerTaskPool::register_task_mem_tracker_impl( - const std::string& task_id, int64_t mem_limit, const std::string& label, - const std::shared_ptr& parent) { - DCHECK(!task_id.empty()); - std::lock_guard l(_task_tracker_lock); - // First time this task_id registered, make a new object, otherwise do nothing. - // Combine new tracker and emplace into one operation to avoid the use of locks - // Name for task MemTrackers. '$0' is replaced with the task id. - std::shared_ptr tracker; - bool new_emplace = _task_mem_trackers.lazy_emplace_l( - task_id, [&](const std::shared_ptr& v) { tracker = v; }, - [&](const auto& ctor) { - tracker = std::make_shared(mem_limit, label, parent); - ctor(task_id, tracker); - }); - if (new_emplace) { - LOG(INFO) << "Register query/load memory tracker, query/load id: " << task_id - << " limit: " << PrettyPrinter::print(mem_limit, TUnit::BYTES); - } - return tracker; -} - -std::shared_ptr MemTrackerTaskPool::register_query_mem_tracker( - const std::string& query_id, int64_t mem_limit) { - return register_task_mem_tracker_impl(query_id, mem_limit, fmt::format("Query#Id={}", query_id), - ExecEnv::GetInstance()->query_pool_mem_tracker()); -} - -std::shared_ptr MemTrackerTaskPool::register_query_scanner_mem_tracker( - const std::string& query_id) { - return register_task_mem_tracker_impl("Scanner#" + query_id, -1, - fmt::format("Scanner#Query#Id={}", query_id), - get_task_mem_tracker(query_id)); -} - -std::shared_ptr MemTrackerTaskPool::register_load_mem_tracker( - const std::string& load_id, int64_t mem_limit) { - // In load, the query id of the fragment is executed, which is the same as the load id of the load channel. - return register_task_mem_tracker_impl(load_id, mem_limit, fmt::format("Load#Id={}", load_id), - ExecEnv::GetInstance()->load_pool_mem_tracker()); -} - -std::shared_ptr MemTrackerTaskPool::register_load_scanner_mem_tracker( - const std::string& load_id) { - return register_task_mem_tracker_impl("Scanner#" + load_id, -1, - fmt::format("Scanner#Load#Id={}", load_id), - get_task_mem_tracker(load_id)); -} - -std::shared_ptr MemTrackerTaskPool::get_task_mem_tracker( - const std::string& task_id) { - DCHECK(!task_id.empty()); - std::shared_ptr tracker = nullptr; - // Avoid using locks to resolve erase conflicts - _task_mem_trackers.if_contains( - task_id, [&tracker](const std::shared_ptr& v) { tracker = v; }); - return tracker; -} - -void MemTrackerTaskPool::logout_task_mem_tracker() { - std::lock_guard l(_task_tracker_lock); - std::vector expired_task_ids; - for (auto it = _task_mem_trackers.begin(); it != _task_mem_trackers.end(); it++) { - if (!it->second) { - // Unknown exception case with high concurrency, after _task_mem_trackers.erase, - // the key still exists in _task_mem_trackers. https://github.com/apache/incubator-doris/issues/10006 - expired_task_ids.emplace_back(it->first); - } else if (it->second.use_count() == 1 && it->second->had_child_count() != 0) { - // No RuntimeState uses this task MemTrackerLimiter, it is only referenced by this map, - // and tracker was not created soon, delete it. - // - // If consumption is not equal to 0 before query mem tracker is destructed, - // there are two possibilities in theory. - // 1. A memory leak occurs. - // 2. memory consumed on query mem tracker, released on other trackers, and no manual transfer - // between the two trackers. - // At present, it is impossible to effectively locate which memory consume and release on different trackers, - // so query memory leaks cannot be found. - LOG(INFO) << fmt::format( - "Deregister query/load memory tracker, queryId={}, Limit={}, CurrUsed={}, " - "PeakUsed={}", - it->first, MemTracker::print_bytes(it->second->limit()), - MemTracker::print_bytes(it->second->consumption()), - MemTracker::print_bytes(it->second->peak_consumption())); - expired_task_ids.emplace_back(it->first); - } else if (config::memory_debug) { - it->second->print_log_usage("query routine"); - it->second->enable_print_log_usage(); - } - } - for (auto tid : expired_task_ids) { - // Verify the condition again to make sure the tracker is not being used again. - _task_mem_trackers.erase_if(tid, [&](const std::shared_ptr& v) { - return !v || v.use_count() == 1; - }); - } -} - -// TODO(zxy) More observable methods -// /// Logs the usage of 'limit' number of queries based on maximum total memory -// /// consumption. -// std::string MemTracker::LogTopNQueries(int limit) { -// if (limit == 0) return ""; -// priority_queue, std::vector>, -// std::greater>> -// min_pq; -// GetTopNQueries(min_pq, limit); -// std::vector usage_strings(min_pq.size()); -// while (!min_pq.empty()) { -// usage_strings.push_back(min_pq.top().second); -// min_pq.pop(); -// } -// std::reverse(usage_strings.begin(), usage_strings.end()); -// return join(usage_strings, "\n"); -// } - -// /// Helper function for LogTopNQueries that iterates through the MemTracker hierarchy -// /// and populates 'min_pq' with 'limit' number of elements (that contain state related -// /// to query MemTrackers) based on maximum total memory consumption. -// void MemTracker::GetTopNQueries( -// priority_queue, std::vector>, -// greater>>& min_pq, -// int limit) { -// list> children; -// { -// lock_guard l(child_trackers_lock_); -// children = child_trackers_; -// } -// for (const auto& child_weak : children) { -// shared_ptr child = child_weak.lock(); -// if (child) { -// child->GetTopNQueries(min_pq, limit); -// } -// } -// } - -} // namespace doris diff --git a/be/src/runtime/memory/mem_tracker_task_pool.h b/be/src/runtime/memory/mem_tracker_task_pool.h deleted file mode 100644 index 9e5813ba03..0000000000 --- a/be/src/runtime/memory/mem_tracker_task_pool.h +++ /dev/null @@ -1,68 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "runtime/memory/mem_tracker_limiter.h" - -namespace doris { - -// TODO: phmap `parallel_flat_hash_map` is not thread-safe. If it is not fixed in the future, -// can consider using other maps instead. -using TaskTrackersMap = phmap::parallel_flat_hash_map< - std::string, std::shared_ptr, - phmap::priv::hash_default_hash, phmap::priv::hash_default_eq, - std::allocator>>, 12, - std::mutex>; - -// Global task pool for query MemTrackers. Owned by ExecEnv. -class MemTrackerTaskPool { -public: - // Construct a MemTrackerLimiter object for 'task_id' with 'mem_limit' as the memory limit. - // The MemTrackerLimiter is a child of the pool MemTrackerLimiter, Calling this with the same - // 'task_id' will return the same MemTrackerLimiter object. This is used to track the local - // memory usage of all tasks executing. The first time this is called for a task, - // a new MemTrackerLimiter object is created with the pool tracker as its parent. - // Newly created trackers will always have a limit of -1. - std::shared_ptr register_task_mem_tracker_impl( - const std::string& task_id, int64_t mem_limit, const std::string& label, - const std::shared_ptr& parent); - std::shared_ptr register_query_mem_tracker(const std::string& query_id, - int64_t mem_limit); - std::shared_ptr register_query_scanner_mem_tracker( - const std::string& query_id); - std::shared_ptr register_load_mem_tracker(const std::string& load_id, - int64_t mem_limit); - std::shared_ptr register_load_scanner_mem_tracker( - const std::string& load_id); - - std::shared_ptr get_task_mem_tracker(const std::string& task_id); - - // Remove the mem tracker that has ended the query. - void logout_task_mem_tracker(); - -private: - // All per-task MemTrackerLimiter objects. - // The life cycle of task MemTrackerLimiter in the process is the same as task runtime state, - // MemTrackers will be removed from this map after query finish or cancel. - TaskTrackersMap _task_mem_trackers; - std::mutex _task_tracker_lock; -}; - -} // namespace doris \ No newline at end of file diff --git a/be/src/runtime/memory/system_allocator.cpp b/be/src/runtime/memory/system_allocator.cpp index 78a9e7911e..f8dd402ad1 100644 --- a/be/src/runtime/memory/system_allocator.cpp +++ b/be/src/runtime/memory/system_allocator.cpp @@ -45,7 +45,7 @@ uint8_t* SystemAllocator::allocate_via_malloc(size_t length) { char buf[64]; auto err = fmt::format("fail to allocate mem via posix_memalign, res={}, errmsg={}.", res, strerror_r(res, buf, 64)); - ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + MemTrackerLimiter::print_log_process_usage(err); LOG(ERROR) << err; return nullptr; } diff --git a/be/src/runtime/memory/thread_mem_tracker_mgr.cpp b/be/src/runtime/memory/thread_mem_tracker_mgr.cpp index b19f9181b0..4273860468 100644 --- a/be/src/runtime/memory/thread_mem_tracker_mgr.cpp +++ b/be/src/runtime/memory/thread_mem_tracker_mgr.cpp @@ -19,35 +19,32 @@ #include "runtime/exec_env.h" #include "runtime/fragment_mgr.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "service/backend_options.h" namespace doris { void ThreadMemTrackerMgr::attach_limiter_tracker( - const std::string& task_id, const TUniqueId& fragment_instance_id, - const std::shared_ptr& mem_tracker) { + const std::shared_ptr& mem_tracker, + const TUniqueId& fragment_instance_id) { DCHECK(mem_tracker); flush_untracked_mem(); - _task_id_stack.push_back(task_id); - _fragment_instance_id_stack.push_back(fragment_instance_id); - _limiter_tracker_stack.push_back(mem_tracker); + _fragment_instance_id = fragment_instance_id; + _limiter_tracker = mem_tracker; _limiter_tracker_raw = mem_tracker.get(); } -void ThreadMemTrackerMgr::detach_limiter_tracker() { - DCHECK(!_limiter_tracker_stack.empty()); +void ThreadMemTrackerMgr::detach_limiter_tracker( + const std::shared_ptr& old_mem_tracker) { flush_untracked_mem(); - _task_id_stack.pop_back(); - _fragment_instance_id_stack.pop_back(); - _limiter_tracker_stack.pop_back(); - _limiter_tracker_raw = _limiter_tracker_stack.back().get(); + _fragment_instance_id = TUniqueId(); + _limiter_tracker = old_mem_tracker; + _limiter_tracker_raw = old_mem_tracker.get(); } void ThreadMemTrackerMgr::exceeded_cancel_task(const std::string& cancel_details) { - if (_fragment_instance_id_stack.back() != TUniqueId()) { + if (_fragment_instance_id != TUniqueId()) { ExecEnv::GetInstance()->fragment_mgr()->cancel( - _fragment_instance_id_stack.back(), PPlanFragmentCancelReason::MEMORY_LIMIT_EXCEED, + _fragment_instance_id, PPlanFragmentCancelReason::MEMORY_LIMIT_EXCEED, cancel_details); } } @@ -57,8 +54,7 @@ void ThreadMemTrackerMgr::exceeded(const std::string& failed_msg) { _cb_func(); } auto cancel_msg = _limiter_tracker_raw->mem_limit_exceeded( - fmt::format("exec node:<{}>", last_consumer_tracker()), - _limiter_tracker_raw->parent().get(), failed_msg); + fmt::format("execute:<{}>", last_consumer_tracker()), failed_msg); if (is_attach_query()) { exceeded_cancel_task(cancel_msg); } diff --git a/be/src/runtime/memory/thread_mem_tracker_mgr.h b/be/src/runtime/memory/thread_mem_tracker_mgr.h index 6c8cae39f9..1ab3e7bce4 100644 --- a/be/src/runtime/memory/thread_mem_tracker_mgr.h +++ b/be/src/runtime/memory/thread_mem_tracker_mgr.h @@ -27,56 +27,34 @@ namespace doris { -extern bthread_key_t btls_key; -static const bthread_key_t EMPTY_BTLS_KEY = {0, 0}; - using ExceedCallBack = void (*)(); -// TCMalloc new/delete Hook is counted in the memory_tracker of the current thread. -// -// In the original design, the MemTracker consume method is called before the memory is allocated. -// If the consume succeeds, the memory is actually allocated, otherwise an exception is thrown. -// But the statistics of memory through TCMalloc new/delete Hook are after the memory is actually allocated, -// which is different from the previous behavior. Therefore, when alloc for some large memory. +// Memory Hook is counted in the memory tracker of the current thread. class ThreadMemTrackerMgr { public: ThreadMemTrackerMgr() {} ~ThreadMemTrackerMgr() { // if _init == false, exec env is not initialized when init(). and never consumed mem tracker once. - if (_init) { - flush_untracked_mem(); - if (bthread_self() == 0) { - DCHECK(_consumer_tracker_stack.empty()); - DCHECK(_limiter_tracker_stack.size() == 1) - << ", limiter_tracker_stack.size(): " << _limiter_tracker_stack.size(); - } - } + if (_init) flush_untracked_mem(); } - // only for tcmalloc hook + // only for memory hook static void consume_no_attach(int64_t size) { if (ExecEnv::GetInstance()->initialized()) { - ExecEnv::GetInstance()->orphan_mem_tracker_raw()->consume(size); + ExecEnv::GetInstance()->orphan_mem_tracker()->consume(size); } } - // After thread initialization, calling `init` again must call `clear_untracked_mems` first - // to avoid memory tracking loss. void init(); - void init_impl(); - void clear(); - // After attach, the current thread TCMalloc Hook starts to consume/release task mem_tracker - void attach_limiter_tracker(const std::string& task_id, const TUniqueId& fragment_instance_id, - const std::shared_ptr& mem_tracker); - void detach_limiter_tracker(); - // Usually there are only two layers, the first is the default trackerOrphan; - // the second is the query tracker or bthread tracker. - int64_t get_attach_layers() { return _limiter_tracker_stack.size(); } + // After attach, the current thread Memory Hook starts to consume/release task mem_tracker + void attach_limiter_tracker(const std::shared_ptr& mem_tracker, + const TUniqueId& fragment_instance_id); + void detach_limiter_tracker(const std::shared_ptr& old_mem_tracker = + ExecEnv::GetInstance()->orphan_mem_tracker()); // Must be fast enough! Thread update_tracker may be called very frequently. - // So for performance, add tracker as early as possible, and then call update_tracker. bool push_consumer_tracker(MemTracker* mem_tracker); void pop_consumer_tracker(); std::string last_consumer_tracker() { @@ -96,7 +74,7 @@ public: void set_exceed_call_back(ExceedCallBack cb_func) { _cb_func = cb_func; } - // Note that, If call the memory allocation operation in TCMalloc new/delete Hook, + // Note that, If call the memory allocation operation in Memory Hook, // such as calling LOG/iostream/sstream/stringstream/etc. related methods, // must increase the control to avoid entering infinite recursion, otherwise it may cause crash or stuck, void consume(int64_t size); @@ -104,11 +82,11 @@ public: template void flush_untracked_mem(); - bool is_attach_query() { return _fragment_instance_id_stack.back() != TUniqueId(); } + bool is_attach_query() { return _fragment_instance_id != TUniqueId(); } std::shared_ptr limiter_mem_tracker() { - if (!_init) init(); - return _limiter_tracker_stack.back(); + if (!_init) init(); // ExecEnv not initialized when thread is created. + return _limiter_tracker; } MemTrackerLimiter* limiter_mem_tracker_raw() { if (!_init) init(); @@ -122,14 +100,13 @@ public: std::string print_debug_string() { fmt::memory_buffer consumer_tracker_buf; for (const auto& v : _consumer_tracker_stack) { - fmt::format_to(consumer_tracker_buf, "{}, ", - MemTracker::log_usage(v->make_snapshot(0))); + fmt::format_to(consumer_tracker_buf, "{}, ", MemTracker::log_usage(v->make_snapshot())); } return fmt::format( - "ThreadMemTrackerMgr debug, _untracked_mem:{}, _task_id:{}, " + "ThreadMemTrackerMgr debug, _untracked_mem:{}, " "_limiter_tracker:<{}>, _consumer_tracker_stack:<{}>", - std::to_string(_untracked_mem), _task_id_stack.back(), - _limiter_tracker_raw->log_usage(1), fmt::to_string(consumer_tracker_buf)); + std::to_string(_untracked_mem), _limiter_tracker_raw->log_usage(), + fmt::to_string(consumer_tracker_buf)); } private: @@ -141,8 +118,7 @@ private: private: // is false: ExecEnv::GetInstance()->initialized() = false when thread local is initialized bool _init = false; - // Cache untracked mem, only update to _untracked_mems when switching mem tracker. - // Frequent calls to unordered_map _untracked_mems[] in consume will degrade performance. + // Cache untracked mem. int64_t _untracked_mem = 0; int64_t old_untracked_mem = 0; @@ -151,8 +127,7 @@ private: std::string failed_msg = std::string(); - // _limiter_tracker_stack[0] = orphan_mem_tracker - std::vector> _limiter_tracker_stack; + std::shared_ptr _limiter_tracker; MemTrackerLimiter* _limiter_tracker_raw = nullptr; std::vector _consumer_tracker_stack; @@ -161,35 +136,18 @@ private: // If there is a memory new/delete operation in the consume method, it may enter infinite recursion. bool _stop_consume = false; bool _check_attach = true; - std::vector _task_id_stack; - std::vector _fragment_instance_id_stack; + TUniqueId _fragment_instance_id = TUniqueId(); ExceedCallBack _cb_func = nullptr; }; inline void ThreadMemTrackerMgr::init() { - DCHECK(_limiter_tracker_stack.size() == 0); - DCHECK(_limiter_tracker_raw == nullptr); - init_impl(); -} - -inline void ThreadMemTrackerMgr::init_impl() { - _limiter_tracker_stack.push_back(ExecEnv::GetInstance()->orphan_mem_tracker()); + DCHECK(_limiter_tracker == nullptr); + _limiter_tracker = ExecEnv::GetInstance()->orphan_mem_tracker(); _limiter_tracker_raw = ExecEnv::GetInstance()->orphan_mem_tracker_raw(); - _task_id_stack.push_back(""); - _fragment_instance_id_stack.push_back(TUniqueId()); _check_limit = true; _init = true; } -inline void ThreadMemTrackerMgr::clear() { - flush_untracked_mem(); - std::vector>().swap(_limiter_tracker_stack); - std::vector().swap(_consumer_tracker_stack); - std::vector().swap(_task_id_stack); - std::vector().swap(_fragment_instance_id_stack); - init_impl(); -} - inline bool ThreadMemTrackerMgr::push_consumer_tracker(MemTracker* tracker) { DCHECK(tracker) << print_debug_string(); if (std::count(_consumer_tracker_stack.begin(), _consumer_tracker_stack.end(), tracker)) { @@ -226,9 +184,9 @@ inline void ThreadMemTrackerMgr::consume(int64_t size) { template inline void ThreadMemTrackerMgr::flush_untracked_mem() { // Temporary memory may be allocated during the consumption of the mem tracker, which will lead to entering - // the TCMalloc Hook again, so suspend consumption to avoid falling into an infinite loop. + // the Memory Hook again, so suspend consumption to avoid falling into an infinite loop. _stop_consume = true; - if (!_init) init(); + if (!_init) init(); // ExecEnv not initialized when thread is created. DCHECK(_limiter_tracker_raw); old_untracked_mem = _untracked_mem; if (_count_scope_mem) _scope_mem += _untracked_mem; diff --git a/be/src/runtime/plan_fragment_executor.cpp b/be/src/runtime/plan_fragment_executor.cpp index 6e60c27cb9..5f83fe454a 100644 --- a/be/src/runtime/plan_fragment_executor.cpp +++ b/be/src/runtime/plan_fragment_executor.cpp @@ -95,10 +95,11 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request, fragments_ctx == nullptr ? request.query_globals : fragments_ctx->query_globals; _runtime_state.reset(new RuntimeState(params, request.query_options, query_globals, _exec_env)); _runtime_state->set_query_fragments_ctx(fragments_ctx); + _runtime_state->set_query_mem_tracker(fragments_ctx->query_mem_tracker); _runtime_state->set_tracer(std::move(tracer)); - RETURN_IF_ERROR(_runtime_state->init_mem_trackers(_query_id)); SCOPED_ATTACH_TASK(_runtime_state.get()); + _runtime_state->init_scanner_mem_trackers(); _runtime_state->runtime_filter_mgr()->init(); _runtime_state->set_be_number(request.backend_num); if (request.__isset.backend_id) { @@ -230,7 +231,7 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request, } Status PlanFragmentExecutor::open() { - int64_t mem_limit = _runtime_state->instance_mem_tracker()->limit(); + int64_t mem_limit = _runtime_state->query_mem_tracker()->limit(); LOG_INFO("PlanFragmentExecutor::open") .tag("query_id", _query_id) .tag("instance_id", _runtime_state->fragment_instance_id()) @@ -459,7 +460,7 @@ void PlanFragmentExecutor::_collect_node_statistics() { DCHECK(_runtime_state->backend_id() != -1); NodeStatistics* node_statistics = _query_statistics->add_nodes_statistics(_runtime_state->backend_id()); - node_statistics->add_peak_memory(_runtime_state->instance_mem_tracker()->peak_consumption()); + node_statistics->add_peak_memory(_runtime_state->query_mem_tracker()->peak_consumption()); } void PlanFragmentExecutor::report_profile() { diff --git a/be/src/runtime/query_fragments_ctx.h b/be/src/runtime/query_fragments_ctx.h index 8f9ceb38d6..1fc58f2f28 100644 --- a/be/src/runtime/query_fragments_ctx.h +++ b/be/src/runtime/query_fragments_ctx.h @@ -26,6 +26,8 @@ #include "gen_cpp/Types_types.h" // for TUniqueId #include "runtime/datetime_value.h" #include "runtime/exec_env.h" +#include "runtime/memory/mem_tracker_limiter.h" +#include "util/pretty_printer.h" #include "util/threadpool.h" namespace doris { @@ -41,6 +43,21 @@ public: _start_time = DateTimeValue::local_time(); } + ~QueryFragmentsCtx() { + // query mem tracker consumption is equal to 0, it means that after QueryFragmentsCtx is created, + // it is found that query already exists in _fragments_ctx_map, and query mem tracker is not used. + // query mem tracker consumption is not equal to 0 after use, because there is memory consumed + // on query mem tracker, released on other trackers. + if (query_mem_tracker->consumption() != 0) { + LOG(INFO) << fmt::format( + "Deregister query/load memory tracker, queryId={}, Limit={}, CurrUsed={}, " + "PeakUsed={}", + print_id(query_id), MemTracker::print_bytes(query_mem_tracker->limit()), + MemTracker::print_bytes(query_mem_tracker->consumption()), + MemTracker::print_bytes(query_mem_tracker->peak_consumption())); + } + } + bool countdown() { return fragment_num.fetch_sub(1) == 1; } bool is_timeout(const DateTimeValue& now) const { @@ -99,6 +116,8 @@ public: std::atomic fragment_num; int timeout_second; ObjectPool obj_pool; + // MemTracker that is shared by all fragment instances running on this host. + std::shared_ptr query_mem_tracker; private: ExecEnv* _exec_env; diff --git a/be/src/runtime/runtime_filter_mgr.cpp b/be/src/runtime/runtime_filter_mgr.cpp index 8087d6efc5..7bc894e1da 100644 --- a/be/src/runtime/runtime_filter_mgr.cpp +++ b/be/src/runtime/runtime_filter_mgr.cpp @@ -44,7 +44,7 @@ RuntimeFilterMgr::RuntimeFilterMgr(const UniqueId& query_id, RuntimeState* state RuntimeFilterMgr::~RuntimeFilterMgr() {} Status RuntimeFilterMgr::init() { - DCHECK(_state->instance_mem_tracker() != nullptr); + DCHECK(_state->query_mem_tracker() != nullptr); _tracker = std::make_unique("RuntimeFilterMgr"); return Status::OK(); } diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp index 3766431f50..07b04f5f09 100644 --- a/be/src/runtime/runtime_state.cpp +++ b/be/src/runtime/runtime_state.cpp @@ -34,7 +34,6 @@ #include "runtime/exec_env.h" #include "runtime/load_path_mgr.h" #include "runtime/memory/mem_tracker.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "runtime/runtime_filter_mgr.h" #include "util/file_utils.h" #include "util/load_error_hub.h" @@ -160,7 +159,6 @@ RuntimeState::~RuntimeState() { _error_hub->close(); } - // Manually release the child mem tracker before _instance_mem_tracker is destructed. _obj_pool->clear(); _runtime_filter_mgr.reset(); } @@ -216,53 +214,10 @@ Status RuntimeState::init(const TUniqueId& fragment_instance_id, const TQueryOpt } Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) { - bool has_query_mem_tracker = _query_options.__isset.mem_limit && (_query_options.mem_limit > 0); - int64_t bytes_limit = has_query_mem_tracker ? _query_options.mem_limit : -1; - if (bytes_limit > ExecEnv::GetInstance()->process_mem_tracker()->limit()) { - VLOG_NOTICE << "Query memory limit " << PrettyPrinter::print(bytes_limit, TUnit::BYTES) - << " exceeds process memory limit of " - << PrettyPrinter::print(ExecEnv::GetInstance()->process_mem_tracker()->limit(), - TUnit::BYTES) - << ". Using process memory limit instead"; - bytes_limit = ExecEnv::GetInstance()->process_mem_tracker()->limit(); - } - auto mem_tracker_counter = ADD_COUNTER(&_profile, "MemoryLimit", TUnit::BYTES); - mem_tracker_counter->set(bytes_limit); - - if (query_type() == TQueryType::SELECT) { - _query_mem_tracker = - _exec_env->task_pool_mem_tracker_registry()->register_query_mem_tracker( - print_id(query_id), bytes_limit); - _scanner_mem_tracker = - _exec_env->task_pool_mem_tracker_registry()->register_query_scanner_mem_tracker( - print_id(query_id)); - } else if (query_type() == TQueryType::LOAD) { - _query_mem_tracker = _exec_env->task_pool_mem_tracker_registry()->register_load_mem_tracker( - print_id(query_id), bytes_limit); - _scanner_mem_tracker = - _exec_env->task_pool_mem_tracker_registry()->register_load_scanner_mem_tracker( - print_id(query_id)); - } else { - DCHECK(false); - _query_mem_tracker = ExecEnv::GetInstance()->query_pool_mem_tracker(); - } - _query_mem_tracker->enable_reset_zero(); - - _instance_mem_tracker = std::make_shared( - -1, "RuntimeState:instance:" + print_id(_fragment_instance_id), _query_mem_tracker, - &_profile); - - if (_query_options.is_report_success) { - _query_mem_tracker->enable_print_log_usage(); - _instance_mem_tracker->enable_print_log_usage(); - } - - return Status::OK(); -} - -Status RuntimeState::init_instance_mem_tracker() { - _query_mem_tracker = nullptr; - _instance_mem_tracker = std::make_shared(-1, "RuntimeState:instance"); + _query_mem_tracker = std::make_shared( + MemTrackerLimiter::Type::QUERY, fmt::format("TestQuery#Id={}", print_id(query_id))); + _scanner_mem_tracker = + std::make_shared(fmt::format("TestScanner#QueryId={}", print_id(query_id))); return Status::OK(); } @@ -326,9 +281,7 @@ Status RuntimeState::set_mem_limit_exceeded(const std::string& msg) { Status RuntimeState::check_query_state(const std::string& msg) { // TODO: it would be nice if this also checked for cancellation, but doing so breaks // cases where we use Status::Cancelled("Cancelled") to indicate that the limit was reached. - if (thread_context() - ->_thread_mem_tracker_mgr->limiter_mem_tracker_raw() - ->any_limit_exceeded()) { + if (thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->limit_exceeded()) { RETURN_LIMIT_EXCEEDED(this, msg); } return query_status(); diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index 83f651725c..69b40f6c6f 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -76,14 +76,13 @@ public: Status init(const TUniqueId& fragment_instance_id, const TQueryOptions& query_options, const TQueryGlobals& query_globals, ExecEnv* exec_env); - // Set up four-level hierarchy of mem trackers: process, query, fragment instance. - // The instance tracker is tied to our profile. - // Specific parts of the fragment (i.e. exec nodes, sinks, data stream senders, etc) - // will add a fourth level when they are initialized. - Status init_mem_trackers(const TUniqueId& query_id); - - // for ut only - Status init_instance_mem_tracker(); + // after SCOPED_ATTACH_TASK; + void init_scanner_mem_trackers() { + _scanner_mem_tracker = std::make_shared( + fmt::format("Scanner#QueryId={}", print_id(_query_id))); + } + // for ut and non-query. + Status init_mem_trackers(const TUniqueId& query_id = TUniqueId()); // Gets/Creates the query wide block mgr. Status create_block_mgr(); @@ -116,8 +115,7 @@ public: const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; } ExecEnv* exec_env() { return _exec_env; } std::shared_ptr query_mem_tracker() { return _query_mem_tracker; } - std::shared_ptr instance_mem_tracker() { return _instance_mem_tracker; } - std::shared_ptr scanner_mem_tracker() { return _scanner_mem_tracker; } + std::shared_ptr scanner_mem_tracker() { return _scanner_mem_tracker; } ThreadResourceMgr::ResourcePool* resource_pool() { return _resource_pool; } void set_fragment_root_id(PlanNodeId id) { @@ -382,6 +380,10 @@ public: QueryFragmentsCtx* get_query_fragments_ctx() { return _query_ctx; } + void set_query_mem_tracker(const std::shared_ptr& tracker) { + _query_mem_tracker = tracker; + } + OpentelemetryTracer get_tracer() { return _tracer; } void set_tracer(OpentelemetryTracer&& tracer) { _tracer = std::move(tracer); } @@ -398,15 +400,9 @@ private: static const int DEFAULT_BATCH_SIZE = 2048; - // MemTracker that is shared by all fragment instances running on this host. - // The query mem tracker must be released after the _instance_mem_tracker. std::shared_ptr _query_mem_tracker; - // Memory usage of this fragment instance - std::shared_ptr _instance_mem_tracker; - // Count the memory consumption of Scanner, independent and unique for each query, - // this means that scnner memory does not count into query mem tracker, - // label is `Scanner#{queryId}`. - std::shared_ptr _scanner_mem_tracker; + // Count the memory consumption of Scanner + std::shared_ptr _scanner_mem_tracker; // put runtime state before _obj_pool, so that it will be deconstructed after // _obj_pool. Because some of object in _obj_pool will use profile when deconstructing. diff --git a/be/src/runtime/sorted_run_merger.cc b/be/src/runtime/sorted_run_merger.cc index 28d347462f..32bdad9a6d 100644 --- a/be/src/runtime/sorted_run_merger.cc +++ b/be/src/runtime/sorted_run_merger.cc @@ -183,7 +183,7 @@ private: std::condition_variable _batch_prepared_cv; void process_sorted_run_task(const std::shared_ptr& mem_tracker) { - SCOPED_ATTACH_TASK(mem_tracker, ThreadContext::TaskType::QUERY); + SCOPED_ATTACH_TASK(mem_tracker); std::unique_lock lock(_mutex); while (true) { _batch_prepared_cv.wait(lock, [this]() { return !_backup_ready.load(); }); diff --git a/be/src/runtime/stream_load/stream_load_pipe.h b/be/src/runtime/stream_load/stream_load_pipe.h index 106426653e..fac16b81ff 100644 --- a/be/src/runtime/stream_load/stream_load_pipe.h +++ b/be/src/runtime/stream_load/stream_load_pipe.h @@ -47,7 +47,7 @@ public: _use_proto(use_proto) {} virtual ~StreamLoadPipe() { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->nursery_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); while (!_buf_queue.empty()) _buf_queue.pop_front(); } @@ -119,7 +119,7 @@ public: } Status read(uint8_t* data, int64_t data_size, int64_t* bytes_read, bool* eof) override { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->nursery_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); *bytes_read = 0; while (*bytes_read < data_size) { std::unique_lock l(_lock); diff --git a/be/src/runtime/thread_context.cpp b/be/src/runtime/thread_context.cpp index 442fe755a6..cd593f05ad 100644 --- a/be/src/runtime/thread_context.cpp +++ b/be/src/runtime/thread_context.cpp @@ -39,22 +39,14 @@ ScopeMemCount::~ScopeMemCount() { } AttachTask::AttachTask(const std::shared_ptr& mem_tracker, - const ThreadContext::TaskType& type, const std::string& task_id, - const TUniqueId& fragment_instance_id) { - DCHECK(mem_tracker); - thread_context()->attach_task(type, task_id, fragment_instance_id, mem_tracker); + const std::string& task_id, const TUniqueId& fragment_instance_id) { + thread_context()->attach_task(task_id, fragment_instance_id, mem_tracker); } AttachTask::AttachTask(RuntimeState* runtime_state) { -#ifndef BE_TEST - DCHECK(print_id(runtime_state->query_id()) != ""); - DCHECK(runtime_state->fragment_instance_id() != TUniqueId()); -#endif // BE_TEST - DCHECK(runtime_state->instance_mem_tracker()); - thread_context()->attach_task(ThreadContext::query_to_task_type(runtime_state->query_type()), - print_id(runtime_state->query_id()), + thread_context()->attach_task(print_id(runtime_state->query_id()), runtime_state->fragment_instance_id(), - runtime_state->instance_mem_tracker()); + runtime_state->query_mem_tracker()); } AttachTask::~AttachTask() { @@ -65,14 +57,13 @@ AttachTask::~AttachTask() { } SwitchThreadMemTrackerLimiter::SwitchThreadMemTrackerLimiter( - const std::shared_ptr& mem_tracker_limiter) { - DCHECK(mem_tracker_limiter); - thread_context()->_thread_mem_tracker_mgr->attach_limiter_tracker("", TUniqueId(), - mem_tracker_limiter); + const std::shared_ptr& mem_tracker) { + _old_mem_tracker = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker(); + thread_context()->_thread_mem_tracker_mgr->attach_limiter_tracker(mem_tracker, TUniqueId()); } SwitchThreadMemTrackerLimiter::~SwitchThreadMemTrackerLimiter() { - thread_context()->_thread_mem_tracker_mgr->detach_limiter_tracker(); + thread_context()->_thread_mem_tracker_mgr->detach_limiter_tracker(_old_mem_tracker); } AddThreadMemTrackerConsumer::AddThreadMemTrackerConsumer(MemTracker* mem_tracker) { diff --git a/be/src/runtime/thread_context.h b/be/src/runtime/thread_context.h index ca09df0ecf..d07cbe7056 100644 --- a/be/src/runtime/thread_context.h +++ b/be/src/runtime/thread_context.h @@ -68,8 +68,8 @@ // Usually used after SCOPED_ATTACH_TASK, during query execution. #define RETURN_LIMIT_EXCEEDED(state, msg, ...) \ return doris::thread_context() \ - ->_thread_mem_tracker_mgr->limiter_mem_tracker_raw() \ - ->mem_limit_exceeded( \ + ->_thread_mem_tracker_mgr->limiter_mem_tracker() \ + ->fragment_mem_limit_exceeded( \ state, \ fmt::format("exec node:<{}>, {}", \ doris::thread_context() \ @@ -140,82 +140,36 @@ inline thread_local bthread_t bthread_id; // // There may be other optional info to be added later. class ThreadContext { -public: - enum TaskType { - UNKNOWN = 0, - QUERY = 1, - LOAD = 2, - COMPACTION = 3, - STORAGE = 4, - BRPC = 5 - // to be added ... - }; - inline static const std::string TaskTypeStr[] = {"UNKNOWN", "QUERY", "LOAD", - "COMPACTION", "STORAGE", "BRPC"}; - public: ThreadContext() { _thread_mem_tracker_mgr.reset(new ThreadMemTrackerMgr()); - init(); - } - - ~ThreadContext() { - // Restore to the memory state before init=true to ensure accurate overall memory statistics. - // Thereby ensuring that the memory alloc size is not tracked during the initialization of the - // ThreadContext before `init = true in ThreadContextPtr()`, - // Equal to the size of the memory release that is not tracked during the destruction of the - // ThreadContext after `init = false in ~ThreadContextPtr()`, - if (ExecEnv::GetInstance()->initialized()) _thread_mem_tracker_mgr->clear(); - thread_context_ptr.init = false; - } - - void init() { - _type = TaskType::UNKNOWN; if (ExecEnv::GetInstance()->initialized()) _thread_mem_tracker_mgr->init(); - _thread_id = get_thread_id(); } - void attach_task(const TaskType& type, const std::string& task_id, - const TUniqueId& fragment_instance_id, + ~ThreadContext() { thread_context_ptr.init = false; } + + void attach_task(const std::string& task_id, const TUniqueId& fragment_instance_id, const std::shared_ptr& mem_tracker) { #ifndef BE_TEST // will only attach_task at the beginning of the thread function, there should be no duplicate attach_task. - DCHECK((_type == TaskType::UNKNOWN || _type == TaskType::BRPC) && - type != TaskType::UNKNOWN && _task_id == "" && mem_tracker != nullptr) - << ",new tracker label: " << mem_tracker->label() << ",old tracker label: " - << _thread_mem_tracker_mgr->limiter_mem_tracker_raw()->label(); + DCHECK(mem_tracker); + // Orphan is thread default tracker. + DCHECK(_thread_mem_tracker_mgr->limiter_mem_tracker()->label() == "Orphan") + << ", attach mem tracker label: " << mem_tracker->label(); #endif - _type = type; _task_id = task_id; _fragment_instance_id = fragment_instance_id; - _thread_mem_tracker_mgr->attach_limiter_tracker(task_id, fragment_instance_id, mem_tracker); + _thread_mem_tracker_mgr->attach_limiter_tracker(mem_tracker, fragment_instance_id); } void detach_task() { - _type = TaskType::UNKNOWN; _task_id = ""; _fragment_instance_id = TUniqueId(); _thread_mem_tracker_mgr->detach_limiter_tracker(); } - const TaskType& type() const { return _type; } - const void set_type(const TaskType& type) { _type = type; } - const std::string& task_id() const { return _task_id; } - const std::string& thread_id_str() const { return _thread_id; } const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; } - static TaskType query_to_task_type(const TQueryType::type& query_type) { - switch (query_type) { - case TQueryType::SELECT: - return TaskType::QUERY; - case TQueryType::LOAD: - return TaskType::LOAD; - default: - DCHECK(false); - return TaskType::UNKNOWN; - } - } - std::string get_thread_id() { std::stringstream ss; ss << std::this_thread::get_id(); @@ -231,23 +185,23 @@ public: std::unique_ptr _thread_mem_tracker_mgr; private: - std::string _thread_id; - TaskType _type; - std::string _task_id; + std::string _task_id = ""; TUniqueId _fragment_instance_id; }; -static void attach_bthread() { +// Cache the pointer of bthread local in pthead local, +// Avoid calling bthread_getspecific frequently to get bthread local, which has performance problems. +static void pthread_attach_bthread() { bthread_id = bthread_self(); bthread_context = static_cast(bthread_getspecific(btls_key)); if (bthread_context == nullptr) { // A new bthread starts, two scenarios: // 1. First call to bthread_getspecific (and before any bthread_setspecific) returns NULL // 2. There are not enough reusable btls in btls pool. -#ifndef BE_TEST - DCHECK(ExecEnv::GetInstance()->initialized()); -#endif - // Create thread-local data on demand. + // else, two scenarios: + // 1. A new bthread starts, but get a reuses btls. + // 2. A pthread switch occurs. Because the pthread switch cannot be accurately identified at the moment. + // So tracker call reset 0 like reuses btls. bthread_context = new ThreadContext; // set the data so that next time bthread_getspecific in the thread returns the data. CHECK_EQ(0, bthread_setspecific(btls_key, bthread_context)); @@ -257,9 +211,9 @@ static void attach_bthread() { static ThreadContext* thread_context() { if (bthread_self() != 0) { if (bthread_self() != bthread_id) { - // A new bthread starts or pthread switch occurs. + // A new bthread starts or pthread switch occurs, during this period, stop the use of thread_context. thread_context_ptr.init = false; - attach_bthread(); + pthread_attach_bthread(); thread_context_ptr.init = true; } return bthread_context; @@ -281,7 +235,6 @@ private: class AttachTask { public: explicit AttachTask(const std::shared_ptr& mem_tracker, - const ThreadContext::TaskType& type = ThreadContext::TaskType::UNKNOWN, const std::string& task_id = "", const TUniqueId& fragment_instance_id = TUniqueId()); @@ -292,10 +245,12 @@ public: class SwitchThreadMemTrackerLimiter { public: - explicit SwitchThreadMemTrackerLimiter( - const std::shared_ptr& mem_tracker_limiter); + explicit SwitchThreadMemTrackerLimiter(const std::shared_ptr& mem_tracker); ~SwitchThreadMemTrackerLimiter(); + +private: + std::shared_ptr _old_mem_tracker; }; class AddThreadMemTrackerConsumer { @@ -345,6 +300,10 @@ private: size, doris::thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()) // Mem Hook to consume thread mem tracker +// TODO: In the original design, the MemTracker consume method is called before the memory is allocated. +// If the consume succeeds, the memory is actually allocated, otherwise an exception is thrown. +// But the statistics of memory through TCMalloc new/delete Hook are after the memory is actually allocated, +// which is different from the previous behavior. #define MEM_MALLOC_HOOK(size) \ do { \ if (doris::thread_context_ptr.init) { \ diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp index 3b53e6dc31..714e4757bc 100644 --- a/be/src/service/doris_main.cpp +++ b/be/src/service/doris_main.cpp @@ -53,7 +53,6 @@ #include "runtime/exec_env.h" #include "runtime/heartbeat_flags.h" #include "runtime/load_channel_mgr.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "service/backend_options.h" #include "service/backend_service.h" #include "service/brpc_service.h" @@ -500,29 +499,16 @@ int main(int argc, char** argv) { __lsan_do_leak_check(); #endif doris::PerfCounters::refresh_proc_status(); + doris::MemTrackerLimiter::refresh_global_counter(); + doris::ExecEnv::GetInstance()->load_channel_mgr()->refresh_mem_tracker(); #if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER) && \ !defined(USE_JEMALLOC) doris::MemInfo::refresh_allocator_mem(); #endif - int64_t allocator_cache_mem_diff = - doris::MemInfo::allocator_cache_mem() - - doris::ExecEnv::GetInstance()->allocator_cache_mem_tracker()->consumption(); - doris::ExecEnv::GetInstance()->allocator_cache_mem_tracker()->consume( - allocator_cache_mem_diff); - CONSUME_THREAD_MEM_TRACKER(allocator_cache_mem_diff); - doris::ExecEnv::GetInstance()->load_channel_mgr()->refresh_mem_tracker(); - - // 1s clear the expired task mem tracker, a query mem tracker is about 57 bytes. - // this will cause coredump for ASAN build when running regression test, - // disable temporarily. - doris::ExecEnv::GetInstance()->task_pool_mem_tracker_registry()->logout_task_mem_tracker(); - // The process tracker print log usage interval is 1s to avoid a large number of tasks being - // canceled when the process exceeds the mem limit, resulting in too many duplicate logs. - doris::ExecEnv::GetInstance()->process_mem_tracker()->enable_print_log_usage(); if (doris::config::memory_debug) { - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage("main routine"); - doris::ExecEnv::GetInstance()->process_mem_tracker()->enable_print_log_usage(); + doris::MemTrackerLimiter::print_log_process_usage("memory_debug"); } + doris::MemTrackerLimiter::enable_print_log_process_usage(); sleep(1); } diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index 3e672ab910..26301b8f4e 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -34,7 +34,6 @@ #include "runtime/fold_constant_executor.h" #include "runtime/fragment_mgr.h" #include "runtime/load_channel_mgr.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "runtime/result_buffer_mgr.h" #include "runtime/routine_load/routine_load_task_executor.h" #include "runtime/runtime_state.h" @@ -69,10 +68,12 @@ static void thread_context_deleter(void* d) { template class NewHttpClosure : public ::google::protobuf::Closure { public: + NewHttpClosure(google::protobuf::Closure* done) : _done(done) {} NewHttpClosure(T* request, google::protobuf::Closure* done) : _request(request), _done(done) {} ~NewHttpClosure() {} void Run() { + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); if (_request != nullptr) { delete _request; _request = nullptr; @@ -108,22 +109,23 @@ void PInternalServiceImpl::transmit_data(google::protobuf::RpcController* cntl_b PTransmitDataResult* response, google::protobuf::Closure* done) { // TODO(zxy) delete in 1.2 version + google::protobuf::Closure* new_done = new NewHttpClosure(done); brpc::Controller* cntl = static_cast(cntl_base); attachment_transfer_request_row_batch(request, cntl); - _transmit_data(cntl_base, request, response, done, Status::OK()); + _transmit_data(cntl_base, request, response, new_done, Status::OK()); } void PInternalServiceImpl::transmit_data_by_http(google::protobuf::RpcController* cntl_base, const PEmptyRequest* request, PTransmitDataResult* response, google::protobuf::Closure* done) { - PTransmitDataParams* request_raw = new PTransmitDataParams(); - google::protobuf::Closure* done_raw = - new NewHttpClosure(request_raw, done); + PTransmitDataParams* new_request = new PTransmitDataParams(); + google::protobuf::Closure* new_done = + new NewHttpClosure(new_request, done); brpc::Controller* cntl = static_cast(cntl_base); - Status st = attachment_extract_request_contain_tuple(request_raw, cntl); - _transmit_data(cntl_base, request_raw, response, done_raw, st); + Status st = attachment_extract_request_contain_tuple(new_request, cntl); + _transmit_data(cntl_base, new_request, response, new_done, st); } void PInternalServiceImpl::_transmit_data(google::protobuf::RpcController* cntl_base, @@ -133,17 +135,10 @@ void PInternalServiceImpl::_transmit_data(google::protobuf::RpcController* cntl_ const Status& extract_st) { std::string query_id; TUniqueId finst_id; - std::shared_ptr transmit_tracker = nullptr; if (request->has_query_id()) { query_id = print_id(request->query_id()); finst_id.__set_hi(request->finst_id().hi()); finst_id.__set_lo(request->finst_id().lo()); - transmit_tracker = - _exec_env->task_pool_mem_tracker_registry()->get_task_mem_tracker(query_id); - } - if (!transmit_tracker) { - query_id = "unkown_transmit_data"; - transmit_tracker = std::make_shared(-1, "unkown_transmit_data"); } VLOG_ROW << "transmit data: fragment_instance_id=" << print_id(request->finst_id()) << " query_id=" << query_id << " node=" << request->node_id(); @@ -152,7 +147,6 @@ void PInternalServiceImpl::_transmit_data(google::protobuf::RpcController* cntl_ Status st; st.to_protobuf(response->mutable_status()); if (extract_st.ok()) { - SCOPED_ATTACH_TASK(transmit_tracker, ThreadContext::TaskType::QUERY, query_id, finst_id); st = _exec_env->stream_mgr()->transmit_data(request, &done); if (!st.ok()) { LOG(WARNING) << "transmit_data failed, message=" << st.get_error_msg() @@ -225,23 +219,24 @@ void PInternalServiceImpl::tablet_writer_add_block(google::protobuf::RpcControll PTabletWriterAddBlockResult* response, google::protobuf::Closure* done) { // TODO(zxy) delete in 1.2 version + google::protobuf::Closure* new_done = new NewHttpClosure(done); brpc::Controller* cntl = static_cast(cntl_base); attachment_transfer_request_block(request, cntl); - _tablet_writer_add_block(cntl_base, request, response, done); + _tablet_writer_add_block(cntl_base, request, response, new_done); } void PInternalServiceImpl::tablet_writer_add_block_by_http( google::protobuf::RpcController* cntl_base, const ::doris::PEmptyRequest* request, PTabletWriterAddBlockResult* response, google::protobuf::Closure* done) { - PTabletWriterAddBlockRequest* request_raw = new PTabletWriterAddBlockRequest(); - google::protobuf::Closure* done_raw = - new NewHttpClosure(request_raw, done); + PTabletWriterAddBlockRequest* new_request = new PTabletWriterAddBlockRequest(); + google::protobuf::Closure* new_done = + new NewHttpClosure(new_request, done); brpc::Controller* cntl = static_cast(cntl_base); - Status st = attachment_extract_request_contain_block(request_raw, + Status st = attachment_extract_request_contain_block(new_request, cntl); if (st.ok()) { - _tablet_writer_add_block(cntl_base, request_raw, response, done_raw); + _tablet_writer_add_block(cntl_base, new_request, response, new_done); } else { st.to_protobuf(response->mutable_status()); } @@ -280,20 +275,21 @@ void PInternalServiceImpl::tablet_writer_add_batch(google::protobuf::RpcControll const PTabletWriterAddBatchRequest* request, PTabletWriterAddBatchResult* response, google::protobuf::Closure* done) { - _tablet_writer_add_batch(cntl_base, request, response, done); + google::protobuf::Closure* new_done = new NewHttpClosure(done); + _tablet_writer_add_batch(cntl_base, request, response, new_done); } void PInternalServiceImpl::tablet_writer_add_batch_by_http( google::protobuf::RpcController* cntl_base, const ::doris::PEmptyRequest* request, PTabletWriterAddBatchResult* response, google::protobuf::Closure* done) { - PTabletWriterAddBatchRequest* request_raw = new PTabletWriterAddBatchRequest(); - google::protobuf::Closure* done_raw = - new NewHttpClosure(request_raw, done); + PTabletWriterAddBatchRequest* new_request = new PTabletWriterAddBatchRequest(); + google::protobuf::Closure* new_done = + new NewHttpClosure(new_request, done); brpc::Controller* cntl = static_cast(cntl_base); - Status st = attachment_extract_request_contain_tuple(request_raw, + Status st = attachment_extract_request_contain_tuple(new_request, cntl); if (st.ok()) { - _tablet_writer_add_batch(cntl_base, request_raw, response, done_raw); + _tablet_writer_add_batch(cntl_base, new_request, response, new_done); } else { st.to_protobuf(response->mutable_status()); } @@ -689,22 +685,23 @@ void PInternalServiceImpl::transmit_block(google::protobuf::RpcController* cntl_ PTransmitDataResult* response, google::protobuf::Closure* done) { // TODO(zxy) delete in 1.2 version + google::protobuf::Closure* new_done = new NewHttpClosure(done); brpc::Controller* cntl = static_cast(cntl_base); attachment_transfer_request_block(request, cntl); - _transmit_block(cntl_base, request, response, done, Status::OK()); + _transmit_block(cntl_base, request, response, new_done, Status::OK()); } void PInternalServiceImpl::transmit_block_by_http(google::protobuf::RpcController* cntl_base, const PEmptyRequest* request, PTransmitDataResult* response, google::protobuf::Closure* done) { - PTransmitDataParams* request_raw = new PTransmitDataParams(); - google::protobuf::Closure* done_raw = - new NewHttpClosure(request_raw, done); + PTransmitDataParams* new_request = new PTransmitDataParams(); + google::protobuf::Closure* new_done = + new NewHttpClosure(new_request, done); brpc::Controller* cntl = static_cast(cntl_base); - Status st = attachment_extract_request_contain_block(request_raw, cntl); - _transmit_block(cntl_base, request_raw, response, done_raw, st); + Status st = attachment_extract_request_contain_block(new_request, cntl); + _transmit_block(cntl_base, new_request, response, new_done, st); } void PInternalServiceImpl::_transmit_block(google::protobuf::RpcController* cntl_base, @@ -714,18 +711,10 @@ void PInternalServiceImpl::_transmit_block(google::protobuf::RpcController* cntl const Status& extract_st) { std::string query_id; TUniqueId finst_id; - std::shared_ptr transmit_tracker = nullptr; if (request->has_query_id()) { query_id = print_id(request->query_id()); finst_id.__set_hi(request->finst_id().hi()); finst_id.__set_lo(request->finst_id().lo()); - // phmap `parallel_flat_hash_map` is not thread safe, so get query mem tracker may be null pointer. - transmit_tracker = - _exec_env->task_pool_mem_tracker_registry()->get_task_mem_tracker(query_id); - } - if (!transmit_tracker) { - query_id = "unkown_transmit_block"; - transmit_tracker = std::make_shared(-1, "unkown_transmit_block"); } VLOG_ROW << "transmit block: fragment_instance_id=" << print_id(request->finst_id()) << " query_id=" << query_id << " node=" << request->node_id(); @@ -734,7 +723,6 @@ void PInternalServiceImpl::_transmit_block(google::protobuf::RpcController* cntl Status st; st.to_protobuf(response->mutable_status()); if (extract_st.ok()) { - SCOPED_ATTACH_TASK(transmit_tracker, ThreadContext::TaskType::QUERY, query_id, finst_id); st = _exec_env->vstream_mgr()->transmit_block(request, &done); if (!st.ok()) { LOG(WARNING) << "transmit_block failed, message=" << st.get_error_msg() diff --git a/be/src/util/mem_info.cpp b/be/src/util/mem_info.cpp index ab17542100..9070b09527 100644 --- a/be/src/util/mem_info.cpp +++ b/be/src/util/mem_info.cpp @@ -104,11 +104,23 @@ void MemInfo::init() { bool is_percent = true; _s_mem_limit = ParseUtil::parse_mem_spec(config::mem_limit, -1, _s_physical_mem, &is_percent); + if (_s_mem_limit <= 0) { + LOG(WARNING) << "Failed to parse mem limit from '" + config::mem_limit + "'."; + } + if (_s_mem_limit > _s_physical_mem) { + LOG(WARNING) << "Memory limit " << PrettyPrinter::print(_s_mem_limit, TUnit::BYTES) + << " exceeds physical memory of " + << PrettyPrinter::print(_s_physical_mem, TUnit::BYTES) + << ". Using physical memory instead"; + _s_mem_limit = _s_physical_mem; + } _s_mem_limit_str = PrettyPrinter::print(_s_mem_limit, TUnit::BYTES); _s_hard_mem_limit = _s_physical_mem - std::max(209715200L, _s_physical_mem / 10); // 200M - LOG(INFO) << "Physical Memory: " << PrettyPrinter::print(_s_physical_mem, TUnit::BYTES); + LOG(INFO) << "Physical Memory: " << PrettyPrinter::print(_s_physical_mem, TUnit::BYTES) + << ", Mem Limit: " << _s_mem_limit_str + << ", origin config value: " << config::mem_limit; _s_initialized = true; } #else diff --git a/be/src/util/ref_count_closure.h b/be/src/util/ref_count_closure.h index b91d6225ed..c278dae9a3 100644 --- a/be/src/util/ref_count_closure.h +++ b/be/src/util/ref_count_closure.h @@ -37,6 +37,7 @@ public: bool unref() { return _refs.fetch_sub(1) == 1; } void Run() override { + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); if (unref()) { delete this; } diff --git a/be/src/vec/common/allocator.h b/be/src/vec/common/allocator.h index 8f9eba9fae..36a11fc6f9 100644 --- a/be/src/vec/common/allocator.h +++ b/be/src/vec/common/allocator.h @@ -132,7 +132,7 @@ public: if (MAP_FAILED == buf) { RELEASE_THREAD_MEM_TRACKER(size); auto err = fmt::format("Allocator: Cannot mmap {}.", size); - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + doris::MemTrackerLimiter::print_log_process_usage(err); doris::vectorized::throwFromErrno(err, doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY); } @@ -142,7 +142,7 @@ public: doris::Chunk chunk; if (!doris::ChunkAllocator::instance()->allocate_align(size, &chunk)) { auto err = fmt::format("Allocator: Cannot allocate chunk {}.", size); - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + doris::MemTrackerLimiter::print_log_process_usage(err); doris::vectorized::throwFromErrno(err, doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY); } @@ -157,7 +157,7 @@ public: if (nullptr == buf) { auto err = fmt::format("Allocator: Cannot malloc {}.", size); - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + doris::MemTrackerLimiter::print_log_process_usage(err); doris::vectorized::throwFromErrno( err, doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY); } @@ -167,7 +167,7 @@ public: if (0 != res) { auto err = fmt::format("Cannot allocate memory (posix_memalign) {}.", size); - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + doris::MemTrackerLimiter::print_log_process_usage(err); doris::vectorized::throwFromErrno( err, doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY, res); } @@ -183,7 +183,7 @@ public: if (size >= MMAP_THRESHOLD) { if (0 != munmap(buf, size)) { auto err = fmt::format("Allocator: Cannot munmap {}.", size); - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + doris::MemTrackerLimiter::print_log_process_usage(err); doris::vectorized::throwFromErrno(err, doris::TStatusCode::VEC_CANNOT_MUNMAP); } else { RELEASE_THREAD_MEM_TRACKER(size); @@ -212,7 +212,7 @@ public: if (nullptr == new_buf) { auto err = fmt::format("Allocator: Cannot realloc from {} to {}.", old_size, new_size); - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + doris::MemTrackerLimiter::print_log_process_usage(err); doris::vectorized::throwFromErrno(err, doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY); } @@ -232,7 +232,7 @@ public: RELEASE_THREAD_MEM_TRACKER(new_size - old_size); auto err = fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.", old_size, new_size); - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + doris::MemTrackerLimiter::print_log_process_usage(err); doris::vectorized::throwFromErrno(err, doris::TStatusCode::VEC_CANNOT_MREMAP); } diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp b/be/src/vec/exec/scan/scanner_scheduler.cpp index dbb9cadb24..09578a2ba1 100644 --- a/be/src/vec/exec/scan/scanner_scheduler.cpp +++ b/be/src/vec/exec/scan/scanner_scheduler.cpp @@ -185,10 +185,8 @@ void ScannerScheduler::_scanner_scan(ScannerScheduler* scheduler, ScannerContext VScanner* scanner) { INIT_AND_SCOPE_REENTRANT_SPAN_IF(ctx->state()->enable_profile(), ctx->state()->get_tracer(), ctx->scan_span(), "VScanner::scan"); - SCOPED_ATTACH_TASK(scanner->runtime_state()->scanner_mem_tracker(), - ThreadContext::query_to_task_type(scanner->runtime_state()->query_type()), - print_id(scanner->runtime_state()->query_id()), - scanner->runtime_state()->fragment_instance_id()); + SCOPED_ATTACH_TASK(scanner->runtime_state()); + SCOPED_CONSUME_MEM_TRACKER(scanner->runtime_state()->scanner_mem_tracker().get()); Thread::set_self_name("_scanner_scan"); scanner->update_wait_worker_timer(); // Do not use ScopedTimer. There is no guarantee that, the counter diff --git a/be/src/vec/exec/volap_scan_node.cpp b/be/src/vec/exec/volap_scan_node.cpp index 19ab63ae37..7e0abc1e77 100644 --- a/be/src/vec/exec/volap_scan_node.cpp +++ b/be/src/vec/exec/volap_scan_node.cpp @@ -394,10 +394,8 @@ void VOlapScanNode::transfer_thread(RuntimeState* state) { } void VOlapScanNode::scanner_thread(VOlapScanner* scanner) { - SCOPED_ATTACH_TASK(_runtime_state->scanner_mem_tracker(), - ThreadContext::query_to_task_type(_runtime_state->query_type()), - print_id(_runtime_state->query_id()), - _runtime_state->fragment_instance_id()); + SCOPED_ATTACH_TASK(_runtime_state); + SCOPED_CONSUME_MEM_TRACKER(_runtime_state->scanner_mem_tracker().get()); Thread::set_self_name("volap_scanner"); int64_t wait_time = scanner->update_wait_worker_timer(); // Do not use ScopedTimer. There is no guarantee that, the counter diff --git a/be/src/vec/runtime/vdata_stream_mgr.cpp b/be/src/vec/runtime/vdata_stream_mgr.cpp index 511fbbe19d..4743d0933f 100644 --- a/be/src/vec/runtime/vdata_stream_mgr.cpp +++ b/be/src/vec/runtime/vdata_stream_mgr.cpp @@ -53,7 +53,7 @@ std::shared_ptr VDataStreamMgr::create_recvr( VLOG_FILE << "creating receiver for fragment=" << fragment_instance_id << ", node=" << dest_node_id; std::shared_ptr recvr(new VDataStreamRecvr( - this, row_desc, fragment_instance_id, dest_node_id, num_senders, is_merging, + this, state, row_desc, fragment_instance_id, dest_node_id, num_senders, is_merging, buffer_size, profile, sub_plan_query_statistics_recvr)); uint32_t hash_value = get_hash_value(fragment_instance_id, dest_node_id); std::lock_guard l(_lock); diff --git a/be/src/vec/runtime/vdata_stream_recvr.cpp b/be/src/vec/runtime/vdata_stream_recvr.cpp index a649816a1b..7a9a55ebc1 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.cpp +++ b/be/src/vec/runtime/vdata_stream_recvr.cpp @@ -77,10 +77,7 @@ Status VDataStreamRecvr::SenderQueue::get_batch(Block** next_block) { if (!_pending_closures.empty()) { auto closure_pair = _pending_closures.front(); - { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); - closure_pair.first->Run(); - } + closure_pair.first->Run(); _pending_closures.pop_front(); closure_pair.second.stop(); @@ -224,11 +221,8 @@ void VDataStreamRecvr::SenderQueue::cancel() { { std::lock_guard l(_lock); - { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); - for (auto closure_pair : _pending_closures) { - closure_pair.first->Run(); - } + for (auto closure_pair : _pending_closures) { + closure_pair.first->Run(); } _pending_closures.clear(); } @@ -242,11 +236,8 @@ void VDataStreamRecvr::SenderQueue::close() { std::lock_guard l(_lock); _is_cancelled = true; - { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); - for (auto closure_pair : _pending_closures) { - closure_pair.first->Run(); - } + for (auto closure_pair : _pending_closures) { + closure_pair.first->Run(); } _pending_closures.clear(); } @@ -260,11 +251,12 @@ void VDataStreamRecvr::SenderQueue::close() { } VDataStreamRecvr::VDataStreamRecvr( - VDataStreamMgr* stream_mgr, const RowDescriptor& row_desc, + VDataStreamMgr* stream_mgr, RuntimeState* state, const RowDescriptor& row_desc, const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id, int num_senders, bool is_merging, int total_buffer_limit, RuntimeProfile* profile, std::shared_ptr sub_plan_query_statistics_recvr) : _mgr(stream_mgr), + _state(state), _fragment_instance_id(fragment_instance_id), _dest_node_id(dest_node_id), _total_buffer_limit(total_buffer_limit), @@ -325,6 +317,8 @@ Status VDataStreamRecvr::create_merger(const std::vector& orderin void VDataStreamRecvr::add_block(const PBlock& pblock, int sender_id, int be_number, int64_t packet_seq, ::google::protobuf::Closure** done) { + SCOPED_ATTACH_TASK(_state->query_mem_tracker(), print_id(_state->query_id()), + _fragment_instance_id); SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); int use_sender_id = _is_merging ? sender_id : 0; _sender_queues[use_sender_id]->add_block(pblock, be_number, packet_seq, done); diff --git a/be/src/vec/runtime/vdata_stream_recvr.h b/be/src/vec/runtime/vdata_stream_recvr.h index 7372285125..254d85185c 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.h +++ b/be/src/vec/runtime/vdata_stream_recvr.h @@ -28,6 +28,7 @@ #include "common/status.h" #include "gen_cpp/Types_types.h" #include "runtime/descriptors.h" +#include "runtime/query_fragments_ctx.h" #include "runtime/query_statistics.h" #include "util/runtime_profile.h" @@ -50,7 +51,7 @@ class VExprContext; class VDataStreamRecvr { public: - VDataStreamRecvr(VDataStreamMgr* stream_mgr, const RowDescriptor& row_desc, + VDataStreamRecvr(VDataStreamMgr* stream_mgr, RuntimeState* state, const RowDescriptor& row_desc, const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id, int num_senders, bool is_merging, int total_buffer_limit, RuntimeProfile* profile, @@ -97,6 +98,8 @@ private: // DataStreamMgr instance used to create this recvr. (Not owned) VDataStreamMgr* _mgr; + RuntimeState* _state; + // Fragment and node id of the destination exchange node this receiver is used by. TUniqueId _fragment_instance_id; PlanNodeId _dest_node_id; diff --git a/be/src/vec/sink/vdata_stream_sender.cpp b/be/src/vec/sink/vdata_stream_sender.cpp index 0ca087ea59..100341a197 100644 --- a/be/src/vec/sink/vdata_stream_sender.cpp +++ b/be/src/vec/sink/vdata_stream_sender.cpp @@ -139,7 +139,7 @@ Status VDataStreamSender::Channel::send_block(PBlock* block, bool eos) { _closure->ref(); } else { RETURN_IF_ERROR(_wait_last_brpc()); - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); _closure->cntl.Reset(); } VLOG_ROW << "Channel::send_batch() instance_id=" << _fragment_instance_id @@ -162,7 +162,7 @@ Status VDataStreamSender::Channel::send_block(PBlock* block, bool eos) { if (_parent->_transfer_large_data_by_brpc && _brpc_request.has_block() && _brpc_request.block().has_column_values() && _brpc_request.ByteSizeLong() > MIN_HTTP_BRPC_SIZE) { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); Status st = request_embed_attachment_contain_block>( &_brpc_request, _closure); @@ -179,7 +179,7 @@ Status VDataStreamSender::Channel::send_block(PBlock* block, bool eos) { _brpc_http_stub->transmit_block_by_http(&_closure->cntl, nullptr, &_closure->result, _closure); } else { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); _closure->cntl.http_request().Clear(); _brpc_stub->transmit_block(&_closure->cntl, &_brpc_request, &_closure->result, _closure); } diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp index 126e65cf85..04ce7c54dc 100644 --- a/be/src/vec/sink/vtablet_sink.cpp +++ b/be/src/vec/sink/vtablet_sink.cpp @@ -355,7 +355,7 @@ void VNodeChannel::try_send_block(RuntimeState* state) { _add_block_closure->cntl.http_request().set_content_type("application/json"); { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); _brpc_http_stub->tablet_writer_add_block_by_http(&_add_block_closure->cntl, NULL, &_add_block_closure->result, _add_block_closure); @@ -363,7 +363,7 @@ void VNodeChannel::try_send_block(RuntimeState* state) { } else { _add_block_closure->cntl.http_request().Clear(); { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); _stub->tablet_writer_add_block(&_add_block_closure->cntl, &request, &_add_block_closure->result, _add_block_closure); } diff --git a/be/test/exec/broker_scan_node_test.cpp b/be/test/exec/broker_scan_node_test.cpp index 8b8e376b34..5ca63810f2 100644 --- a/be/test/exec/broker_scan_node_test.cpp +++ b/be/test/exec/broker_scan_node_test.cpp @@ -40,7 +40,7 @@ class BrokerScanNodeTest : public testing::Test { public: BrokerScanNodeTest() : _runtime_state(TQueryGlobals()) { init(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); } void init(); static void SetUpTestCase() { diff --git a/be/test/exec/broker_scanner_test.cpp b/be/test/exec/broker_scanner_test.cpp index d750370aff..65387e575b 100644 --- a/be/test/exec/broker_scanner_test.cpp +++ b/be/test/exec/broker_scanner_test.cpp @@ -40,7 +40,7 @@ public: BrokerScannerTest() : _runtime_state(TQueryGlobals()) { init(); _profile = _runtime_state.runtime_profile(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); } void init(); diff --git a/be/test/exec/es_http_scan_node_test.cpp b/be/test/exec/es_http_scan_node_test.cpp index 8dc0e71b79..7d0a63596b 100644 --- a/be/test/exec/es_http_scan_node_test.cpp +++ b/be/test/exec/es_http_scan_node_test.cpp @@ -40,7 +40,7 @@ namespace doris { class EsHttpScanNodeTest : public testing::Test { public: EsHttpScanNodeTest() : _runtime_state(TQueryGlobals()) { - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); TDescriptorTable t_desc_table; // table descriptors diff --git a/be/test/exec/es_predicate_test.cpp b/be/test/exec/es_predicate_test.cpp index 8cf009b991..5a91580b36 100644 --- a/be/test/exec/es_predicate_test.cpp +++ b/be/test/exec/es_predicate_test.cpp @@ -43,7 +43,7 @@ class RuntimeState; class EsPredicateTest : public testing::Test { public: EsPredicateTest() : _runtime_state(TQueryGlobals()) { - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); TDescriptorTable t_desc_table; // table descriptors diff --git a/be/test/exec/hash_table_test.cpp b/be/test/exec/hash_table_test.cpp index 2a5e8f62d6..3a5ab8e3f0 100644 --- a/be/test/exec/hash_table_test.cpp +++ b/be/test/exec/hash_table_test.cpp @@ -49,7 +49,7 @@ public: HashTableTest() { _mem_pool.reset(new MemPool()); _state = _pool.add(new RuntimeState(TQueryGlobals())); - _state->init_instance_mem_tracker(); + _state->init_mem_trackers(); _state->_exec_env = ExecEnv::GetInstance(); } @@ -309,7 +309,7 @@ TEST_F(HashTableTest, GrowTableTest) { int64_t num_buckets = 4; HashTable hash_table(_build_expr, _probe_expr, 1, false, is_null_safe, initial_seed, num_buckets); - EXPECT_FALSE(hash_table.mem_tracker()->limit_exceeded(mem_limit)); + EXPECT_FALSE(hash_table.mem_tracker()->consumption() > mem_limit); for (int i = 0; i < LOOP_LESS_OR_MORE(1, 20); ++i) { for (int j = 0; j < num_to_add; ++build_row_val, ++j) { @@ -323,7 +323,7 @@ TEST_F(HashTableTest, GrowTableTest) { LOG(INFO) << "consume:" << hash_table.mem_tracker()->consumption() << ",expected_size:" << expected_size; - EXPECT_EQ(LOOP_LESS_OR_MORE(0, 1), hash_table.mem_tracker()->limit_exceeded(mem_limit)); + EXPECT_EQ(LOOP_LESS_OR_MORE(0, 1), hash_table.mem_tracker()->consumption() > mem_limit); // Validate that we can find the entries for (int i = 0; i < expected_size * 5; i += 100000) { diff --git a/be/test/exec/json_scanner_test.cpp b/be/test/exec/json_scanner_test.cpp index 5daa4ef19a..5fbc3b3c3d 100644 --- a/be/test/exec/json_scanner_test.cpp +++ b/be/test/exec/json_scanner_test.cpp @@ -42,7 +42,7 @@ class JsonScannerTest : public testing::Test { public: JsonScannerTest() : _runtime_state(TQueryGlobals()) { init(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); _runtime_state._exec_env = ExecEnv::GetInstance(); } void init(); diff --git a/be/test/exec/json_scanner_with_jsonpath_test.cpp b/be/test/exec/json_scanner_with_jsonpath_test.cpp index 578cd10443..e54113c0bc 100644 --- a/be/test/exec/json_scanner_with_jsonpath_test.cpp +++ b/be/test/exec/json_scanner_with_jsonpath_test.cpp @@ -41,7 +41,7 @@ class JsonScannerWithJsonPathTest : public testing::Test { public: JsonScannerWithJsonPathTest() : _runtime_state(TQueryGlobals()) { init(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); _runtime_state._exec_env = ExecEnv::GetInstance(); } void init(); diff --git a/be/test/exec/orc_scanner_test.cpp b/be/test/exec/orc_scanner_test.cpp index bcc4d79e53..7356458f47 100644 --- a/be/test/exec/orc_scanner_test.cpp +++ b/be/test/exec/orc_scanner_test.cpp @@ -44,7 +44,7 @@ class OrcScannerTest : public testing::Test { public: OrcScannerTest() : _runtime_state(TQueryGlobals()) { _profile = _runtime_state.runtime_profile(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); } static void SetUpTestCase() { diff --git a/be/test/exec/parquet_scanner_test.cpp b/be/test/exec/parquet_scanner_test.cpp index 35d0f6e359..d330a44b3f 100644 --- a/be/test/exec/parquet_scanner_test.cpp +++ b/be/test/exec/parquet_scanner_test.cpp @@ -40,7 +40,7 @@ class ParquetScannerTest : public testing::Test { public: ParquetScannerTest() : _runtime_state(TQueryGlobals()) { init(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); } void init(); static void SetUpTestCase() { diff --git a/be/test/exec/tablet_sink_test.cpp b/be/test/exec/tablet_sink_test.cpp index 4e2d36bc88..566d9675be 100644 --- a/be/test/exec/tablet_sink_test.cpp +++ b/be/test/exec/tablet_sink_test.cpp @@ -25,7 +25,6 @@ #include "runtime/decimalv2_value.h" #include "runtime/descriptor_helper.h" #include "runtime/exec_env.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "runtime/result_queue_mgr.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" @@ -56,7 +55,6 @@ public: _env->_load_stream_mgr = new LoadStreamMgr(); _env->_internal_client_cache = new BrpcClientCache(); _env->_function_client_cache = new BrpcClientCache(); - _env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool(); ThreadPoolBuilder("SendBatchThreadPool") .set_min_threads(1) .set_max_threads(5) @@ -72,7 +70,6 @@ public: SAFE_DELETE(_env->_load_stream_mgr); SAFE_DELETE(_env->_master_info); SAFE_DELETE(_env->_thread_mgr); - SAFE_DELETE(_env->_task_pool_mem_tracker_registry); if (_server) { _server->Stop(100); _server->Join(); diff --git a/be/test/exprs/runtime_filter_test.cpp b/be/test/exprs/runtime_filter_test.cpp index d4b66f295c..ee319ac6cb 100644 --- a/be/test/exprs/runtime_filter_test.cpp +++ b/be/test/exprs/runtime_filter_test.cpp @@ -42,7 +42,7 @@ public: exec_env = nullptr; _runtime_stat.reset( new RuntimeState(_fragment_id, _query_options, _query_globals, exec_env)); - _runtime_stat->init_instance_mem_tracker(); + _runtime_stat->init_mem_trackers(); } virtual void TearDown() { _obj_pool.clear(); } diff --git a/be/test/olap/lru_cache_test.cpp b/be/test/olap/lru_cache_test.cpp index c9fb0cbd93..bafe4e7288 100644 --- a/be/test/olap/lru_cache_test.cpp +++ b/be/test/olap/lru_cache_test.cpp @@ -21,6 +21,7 @@ #include +#include "runtime/memory/mem_tracker_limiter.h" #include "testutil/test_util.h" using namespace doris; @@ -221,7 +222,7 @@ static void insert_LRUCache(LRUCache& cache, const CacheKey& key, int value, CachePriority priority) { uint32_t hash = key.hash(key.data(), key.size(), 0); static std::unique_ptr lru_cache_tracker = - std::make_unique(-1, "TestLruCache"); + std::make_unique(MemTrackerLimiter::Type::GLOBAL, "TestLruCache"); cache.release(cache.insert(key, hash, EncodeValue(value), value, &deleter, lru_cache_tracker.get(), priority)); } diff --git a/be/test/runtime/mem_limit_test.cpp b/be/test/runtime/mem_limit_test.cpp index 811102b216..74b06033b0 100644 --- a/be/test/runtime/mem_limit_test.cpp +++ b/be/test/runtime/mem_limit_test.cpp @@ -24,7 +24,7 @@ namespace doris { TEST(MemTrackerTest, SingleTrackerNoLimit) { - auto t = std::make_unique(); + auto t = std::make_shared(MemTrackerLimiter::Type::GLOBAL); EXPECT_FALSE(t->has_limit()); t->consume(10); EXPECT_EQ(t->consumption(), 10); @@ -37,7 +37,8 @@ TEST(MemTrackerTest, SingleTrackerNoLimit) { } TEST(MemTestTest, SingleTrackerWithLimit) { - auto t = std::make_unique(11, "limit tracker"); + auto t = std::make_unique(MemTrackerLimiter::Type::GLOBAL, "limit tracker", + 11); EXPECT_TRUE(t->has_limit()); t->consume(10); EXPECT_EQ(t->consumption(), 10); @@ -51,95 +52,4 @@ TEST(MemTestTest, SingleTrackerWithLimit) { t->release(5); } -TEST(MemTestTest, TrackerHierarchy) { - auto p = std::make_shared(100); - auto c1 = std::make_unique(80, "c1", p); - auto c2 = std::make_unique(50, "c2", p); - - // everything below limits - c1->consume(60); - EXPECT_EQ(c1->consumption(), 60); - EXPECT_FALSE(c1->limit_exceeded()); - EXPECT_FALSE(c1->any_limit_exceeded()); - EXPECT_EQ(c2->consumption(), 0); - EXPECT_FALSE(c2->limit_exceeded()); - EXPECT_FALSE(c2->any_limit_exceeded()); - EXPECT_EQ(p->consumption(), 60); - EXPECT_FALSE(p->limit_exceeded()); - EXPECT_FALSE(p->any_limit_exceeded()); - - // p goes over limit - c2->consume(50); - EXPECT_EQ(c1->consumption(), 60); - EXPECT_FALSE(c1->limit_exceeded()); - EXPECT_TRUE(c1->any_limit_exceeded()); - EXPECT_EQ(c2->consumption(), 50); - EXPECT_FALSE(c2->limit_exceeded()); - EXPECT_TRUE(c2->any_limit_exceeded()); - EXPECT_EQ(p->consumption(), 110); - EXPECT_TRUE(p->limit_exceeded()); - - // c2 goes over limit, p drops below limit - c1->release(20); - c2->consume(10); - EXPECT_EQ(c1->consumption(), 40); - EXPECT_FALSE(c1->limit_exceeded()); - EXPECT_FALSE(c1->any_limit_exceeded()); - EXPECT_EQ(c2->consumption(), 60); - EXPECT_TRUE(c2->limit_exceeded()); - EXPECT_TRUE(c2->any_limit_exceeded()); - EXPECT_EQ(p->consumption(), 100); - EXPECT_FALSE(p->limit_exceeded()); - c1->release(40); - c2->release(60); -} - -TEST(MemTestTest, TrackerHierarchyTryConsume) { - auto p = std::make_shared(100); - auto c1 = std::make_unique(80, "c1", p); - auto c2 = std::make_unique(50, "c2", p); - - // everything below limits - std::string err_msg = ""; - bool consumption = c1->try_consume(60, err_msg); - EXPECT_EQ(consumption, true); - EXPECT_EQ(c1->consumption(), 60); - EXPECT_FALSE(c1->limit_exceeded()); - EXPECT_FALSE(c1->any_limit_exceeded()); - EXPECT_EQ(c2->consumption(), 0); - EXPECT_FALSE(c2->limit_exceeded()); - EXPECT_FALSE(c2->any_limit_exceeded()); - EXPECT_EQ(p->consumption(), 60); - EXPECT_FALSE(p->limit_exceeded()); - EXPECT_FALSE(p->any_limit_exceeded()); - - // p goes over limit - consumption = c2->try_consume(50, err_msg); - EXPECT_EQ(consumption, false); - EXPECT_EQ(c1->consumption(), 60); - EXPECT_FALSE(c1->limit_exceeded()); - EXPECT_FALSE(c1->any_limit_exceeded()); - EXPECT_EQ(c2->consumption(), 0); - EXPECT_FALSE(c2->limit_exceeded()); - EXPECT_FALSE(c2->any_limit_exceeded()); - EXPECT_EQ(p->consumption(), 60); - EXPECT_FALSE(p->limit_exceeded()); - EXPECT_FALSE(p->any_limit_exceeded()); - - // c2 goes over limit, p drops below limit - c1->release(20); - c2->consume(10); - EXPECT_EQ(c1->consumption(), 40); - EXPECT_FALSE(c1->limit_exceeded()); - EXPECT_FALSE(c1->any_limit_exceeded()); - EXPECT_EQ(c2->consumption(), 10); - EXPECT_FALSE(c2->limit_exceeded()); - EXPECT_FALSE(c2->any_limit_exceeded()); - EXPECT_EQ(p->consumption(), 50); - EXPECT_FALSE(p->limit_exceeded()); - - c1->release(40); - c2->release(10); -} - } // end namespace doris diff --git a/be/test/runtime/test_env.cc b/be/test/runtime/test_env.cc index db32d9a30e..dc2b53c9f6 100644 --- a/be/test/runtime/test_env.cc +++ b/be/test/runtime/test_env.cc @@ -24,7 +24,6 @@ #include "olap/storage_engine.h" #include "runtime/bufferpool/buffer_pool.h" #include "runtime/fragment_mgr.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "runtime/result_queue_mgr.h" #include "util/disk_info.h" #include "util/priority_thread_pool.hpp" @@ -35,7 +34,6 @@ TestEnv::TestEnv() { // Some code will use ExecEnv::GetInstance(), so init the global ExecEnv singleton _exec_env = ExecEnv::GetInstance(); _exec_env->_thread_mgr = new ThreadResourceMgr(2); - _exec_env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool(); _exec_env->_disk_io_mgr = new DiskIoMgr(1, 1, 1, 10); _exec_env->disk_io_mgr()->init(-1); _exec_env->_scan_thread_pool = new PriorityThreadPool(1, 16, "ut_scan"); @@ -62,7 +60,6 @@ TestEnv::~TestEnv() { SAFE_DELETE(_exec_env->_buffer_pool); SAFE_DELETE(_exec_env->_scan_thread_pool); SAFE_DELETE(_exec_env->_disk_io_mgr); - SAFE_DELETE(_exec_env->_task_pool_mem_tracker_registry); SAFE_DELETE(_exec_env->_thread_mgr); if (_engine == StorageEngine::_s_instance) { diff --git a/be/test/testutil/run_all_tests.cpp b/be/test/testutil/run_all_tests.cpp index a1e53f7ed0..570f428720 100644 --- a/be/test/testutil/run_all_tests.cpp +++ b/be/test/testutil/run_all_tests.cpp @@ -28,16 +28,10 @@ #include "util/mem_info.h" int main(int argc, char** argv) { - std::shared_ptr process_mem_tracker = - std::make_shared(-1, "Process"); std::shared_ptr orphan_mem_tracker = - std::make_shared(-1, "Orphan", process_mem_tracker); - std::shared_ptr nursery_mem_tracker = - std::make_shared(-1, "Nursery", orphan_mem_tracker); - std::shared_ptr bthread_mem_tracker = - std::make_shared(-1, "Bthread", orphan_mem_tracker); - doris::ExecEnv::GetInstance()->set_global_mem_tracker(process_mem_tracker, orphan_mem_tracker, - nursery_mem_tracker, bthread_mem_tracker); + std::make_shared(doris::MemTrackerLimiter::Type::GLOBAL, + "Orphan"); + doris::ExecEnv::GetInstance()->set_orphan_mem_tracker(orphan_mem_tracker); doris::thread_context()->_thread_mem_tracker_mgr->init(); doris::TabletSchemaCache::create_global_schema_cache(); doris::StoragePageCache::create_global_cache(1 << 30, 10); diff --git a/be/test/util/arrow/arrow_work_flow_test.cpp b/be/test/util/arrow/arrow_work_flow_test.cpp index 0475d479a4..7acd5e58b7 100644 --- a/be/test/util/arrow/arrow_work_flow_test.cpp +++ b/be/test/util/arrow/arrow_work_flow_test.cpp @@ -29,7 +29,6 @@ #include "gen_cpp/Types_types.h" #include "olap/row.h" #include "runtime/exec_env.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "runtime/result_queue_mgr.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" @@ -66,7 +65,6 @@ protected: if (_exec_env) { delete _exec_env->_result_queue_mgr; delete _exec_env->_thread_mgr; - delete _exec_env->_task_pool_mem_tracker_registry; } } @@ -92,7 +90,6 @@ void ArrowWorkFlowTest::init() { void ArrowWorkFlowTest::init_runtime_state() { _exec_env->_result_queue_mgr = new ResultQueueMgr(); _exec_env->_thread_mgr = new ThreadResourceMgr(); - _exec_env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool(); _exec_env->_is_init = true; TQueryOptions query_options; query_options.batch_size = 1024; @@ -100,7 +97,7 @@ void ArrowWorkFlowTest::init_runtime_state() { query_id.lo = 10; query_id.hi = 100; _state = new RuntimeState(query_id, query_options, TQueryGlobals(), _exec_env); - _state->init_instance_mem_tracker(); + _state->init_mem_trackers(); _state->set_desc_tbl(_desc_tbl); _state->_load_dir = "./test_run/output/"; _state->init_mem_trackers(TUniqueId()); diff --git a/be/test/vec/exec/parquet/parquet_reader_test.cpp b/be/test/vec/exec/parquet/parquet_reader_test.cpp index b2288338b3..480460f394 100644 --- a/be/test/vec/exec/parquet/parquet_reader_test.cpp +++ b/be/test/vec/exec/parquet/parquet_reader_test.cpp @@ -109,7 +109,7 @@ TEST_F(ParquetReaderTest, normal) { p_reader->set_file_reader(reader); RuntimeState runtime_state((TQueryGlobals())); runtime_state.set_desc_tbl(desc_tbl); - runtime_state.init_instance_mem_tracker(); + runtime_state.init_mem_trackers(); std::unordered_map colname_to_value_range; p_reader->init_reader(&colname_to_value_range); diff --git a/be/test/vec/exec/vbroker_scan_node_test.cpp b/be/test/vec/exec/vbroker_scan_node_test.cpp index 195f468841..5da2f255f3 100644 --- a/be/test/vec/exec/vbroker_scan_node_test.cpp +++ b/be/test/vec/exec/vbroker_scan_node_test.cpp @@ -45,7 +45,7 @@ class VBrokerScanNodeTest : public testing::Test { public: VBrokerScanNodeTest() : _runtime_state(TQueryGlobals()) { init(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); _runtime_state._query_options.enable_vectorized_engine = true; } void init(); diff --git a/be/test/vec/exec/vbroker_scanner_test.cpp b/be/test/vec/exec/vbroker_scanner_test.cpp index 5cb9afc4b2..225eeeb16b 100644 --- a/be/test/vec/exec/vbroker_scanner_test.cpp +++ b/be/test/vec/exec/vbroker_scanner_test.cpp @@ -40,7 +40,7 @@ public: VBrokerScannerTest() : _runtime_state(TQueryGlobals()) { init(); _profile = _runtime_state.runtime_profile(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); TUniqueId unique_id; TQueryOptions query_options; diff --git a/be/test/vec/exec/vjson_scanner_test.cpp b/be/test/vec/exec/vjson_scanner_test.cpp index f06b8233d6..55775a9555 100644 --- a/be/test/vec/exec/vjson_scanner_test.cpp +++ b/be/test/vec/exec/vjson_scanner_test.cpp @@ -47,7 +47,7 @@ class VJsonScannerTest : public testing::Test { public: VJsonScannerTest() : _runtime_state(TQueryGlobals()) { init(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); TUniqueId unique_id; TQueryOptions query_options; diff --git a/be/test/vec/exec/vorc_scanner_test.cpp b/be/test/vec/exec/vorc_scanner_test.cpp index 7e9aff95ba..0362ebb080 100644 --- a/be/test/vec/exec/vorc_scanner_test.cpp +++ b/be/test/vec/exec/vorc_scanner_test.cpp @@ -46,7 +46,7 @@ class VOrcScannerTest : public testing::Test { public: VOrcScannerTest() : _runtime_state(TQueryGlobals()) { _profile = _runtime_state.runtime_profile(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); _runtime_state._query_options.enable_vectorized_engine = true; } ~VOrcScannerTest() {} diff --git a/be/test/vec/exec/vparquet_scanner_test.cpp b/be/test/vec/exec/vparquet_scanner_test.cpp index bb1bb2c7f3..c08a69a005 100644 --- a/be/test/vec/exec/vparquet_scanner_test.cpp +++ b/be/test/vec/exec/vparquet_scanner_test.cpp @@ -41,7 +41,7 @@ class VParquetScannerTest : public testing::Test { public: VParquetScannerTest() : _runtime_state(TQueryGlobals()) { init(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); _runtime_state._query_options.enable_vectorized_engine = true; } ~VParquetScannerTest() {} diff --git a/be/test/vec/exec/vtablet_sink_test.cpp b/be/test/vec/exec/vtablet_sink_test.cpp index ae1f615035..4e75c6808e 100644 --- a/be/test/vec/exec/vtablet_sink_test.cpp +++ b/be/test/vec/exec/vtablet_sink_test.cpp @@ -28,7 +28,6 @@ #include "runtime/decimalv2_value.h" #include "runtime/descriptor_helper.h" #include "runtime/exec_env.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "runtime/result_queue_mgr.h" #include "runtime/runtime_state.h" #include "runtime/stream_load/load_stream_mgr.h" @@ -157,7 +156,6 @@ public: _env->_load_stream_mgr = new LoadStreamMgr(); _env->_internal_client_cache = new BrpcClientCache(); _env->_function_client_cache = new BrpcClientCache(); - _env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool(); ThreadPoolBuilder("SendBatchThreadPool") .set_min_threads(1) .set_max_threads(5) @@ -173,7 +171,6 @@ public: SAFE_DELETE(_env->_load_stream_mgr); SAFE_DELETE(_env->_master_info); SAFE_DELETE(_env->_thread_mgr); - SAFE_DELETE(_env->_task_pool_mem_tracker_registry); if (_server) { _server->Stop(100); _server->Join(); diff --git a/be/test/vec/exprs/vexpr_test.cpp b/be/test/vec/exprs/vexpr_test.cpp index 27c76c320f..593cfc932e 100644 --- a/be/test/vec/exprs/vexpr_test.cpp +++ b/be/test/vec/exprs/vexpr_test.cpp @@ -69,7 +69,7 @@ TEST(TEST_VEXPR, ABSTEST) { doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(), doris::TQueryGlobals(), nullptr); - runtime_stat.init_instance_mem_tracker(); + runtime_stat.init_mem_trackers(); runtime_stat.set_desc_tbl(desc_tbl); context->prepare(&runtime_stat, row_desc); context->open(&runtime_stat); @@ -112,7 +112,7 @@ TEST(TEST_VEXPR, ABSTEST2) { doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(), doris::TQueryGlobals(), nullptr); - runtime_stat.init_instance_mem_tracker(); + runtime_stat.init_mem_trackers(); DescriptorTbl desc_tbl; desc_tbl._slot_desc_map[0] = tuple_desc->slots()[0]; runtime_stat.set_desc_tbl(&desc_tbl); diff --git a/be/test/vec/runtime/vdata_stream_test.cpp b/be/test/vec/runtime/vdata_stream_test.cpp index 175fa64ee3..539dd51d74 100644 --- a/be/test/vec/runtime/vdata_stream_test.cpp +++ b/be/test/vec/runtime/vdata_stream_test.cpp @@ -114,7 +114,7 @@ TEST_F(VDataStreamTest, BasicTest) { doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(), doris::TQueryGlobals(), nullptr); - runtime_stat.init_instance_mem_tracker(); + runtime_stat.init_mem_trackers(); runtime_stat.set_desc_tbl(desc_tbl); runtime_stat.set_be_number(1); runtime_stat._exec_env = _object_pool.add(new ExecEnv);