diff --git a/be/src/exec/hash_table.cpp b/be/src/exec/hash_table.cpp index 2aa195bebc..b50b03460e 100644 --- a/be/src/exec/hash_table.cpp +++ b/be/src/exec/hash_table.cpp @@ -175,7 +175,7 @@ Status HashTable::resize_buckets(int64_t num_buckets) { int64_t old_num_buckets = _num_buckets; int64_t delta_bytes = (num_buckets - old_num_buckets) * sizeof(Bucket); - Status st = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->check_limit( + Status st = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit( delta_bytes); if (!st) { LOG_EVERY_N(WARNING, 100) << "resize bucket failed: " << st.to_string(); diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index 1dcd5c3754..b4bd204614 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -1535,7 +1535,7 @@ void OlapScanNode::transfer_thread(RuntimeState* state) { size_t thread_slot_num = 0; mem_consume = _scanner_mem_tracker->consumption(); // check limit for total memory and _scan_row_batches memory - if (mem_consume < (state->instance_mem_tracker()->limit() * 6) / 10 && + if (mem_consume < (state->query_mem_tracker()->limit() * 6) / 10 && _scan_row_batches_bytes < _max_scanner_queue_size_bytes / 2) { thread_slot_num = max_thread - assigned_thread_num; } else { diff --git a/be/src/exec/partitioned_aggregation_node.cc b/be/src/exec/partitioned_aggregation_node.cc index 624f7c71ce..49c02fa5a8 100644 --- a/be/src/exec/partitioned_aggregation_node.cc +++ b/be/src/exec/partitioned_aggregation_node.cc @@ -911,14 +911,14 @@ Tuple* PartitionedAggregationNode::ConstructIntermediateTuple( << "Backend: " << BackendOptions::get_localhost() << ", " << "fragment: " << print_id(state_->fragment_instance_id()) << " " << "Used: " - << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->consumption() + << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->consumption() << ", Limit: " - << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->limit() << ". " + << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->limit() << ". " << "You can change the limit by session variable exec_mem_limit."; string details = Substitute(str.str(), _id, tuple_data_size); *status = thread_context() - ->_thread_mem_tracker_mgr->limiter_mem_tracker_raw() - ->mem_limit_exceeded(state_, details, tuple_data_size); + ->_thread_mem_tracker_mgr->limiter_mem_tracker() + ->fragment_mem_limit_exceeded(state_, details, tuple_data_size); return nullptr; } memset(tuple_data, 0, fixed_size); diff --git a/be/src/exec/partitioned_hash_table.cc b/be/src/exec/partitioned_hash_table.cc index 83fe65d1b6..f9598b510b 100644 --- a/be/src/exec/partitioned_hash_table.cc +++ b/be/src/exec/partitioned_hash_table.cc @@ -307,7 +307,7 @@ Status PartitionedHashTableCtx::ExprValuesCache::Init(RuntimeState* state, MAX_EXPR_VALUES_ARRAY_SIZE / expr_values_bytes_per_row_)); int mem_usage = MemUsage(capacity_, expr_values_bytes_per_row_, num_exprs_); - if (UNLIKELY(!thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->check_limit( + if (UNLIKELY(!thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit( mem_usage))) { capacity_ = 0; string details = Substitute( diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp index 762feacb15..a7e03c11e2 100644 --- a/be/src/exec/tablet_sink.cpp +++ b/be/src/exec/tablet_sink.cpp @@ -48,7 +48,7 @@ NodeChannel::NodeChannel(OlapTableSink* parent, IndexChannel* index_channel, int : _parent(parent), _index_channel(index_channel), _node_id(node_id) { _node_channel_tracker = std::make_unique(fmt::format( "NodeChannel:indexID={}:threadId={}", std::to_string(_index_channel->_index_id), - thread_context()->thread_id_str())); + thread_context()->get_thread_id())); } NodeChannel::~NodeChannel() noexcept { @@ -624,7 +624,7 @@ void NodeChannel::try_send_batch(RuntimeState* state) { _add_batch_closure->cntl.http_request().set_method(brpc::HTTP_METHOD_POST); _add_batch_closure->cntl.http_request().set_content_type("application/json"); { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); _brpc_http_stub->tablet_writer_add_batch_by_http(&_add_batch_closure->cntl, NULL, &_add_batch_closure->result, _add_batch_closure); @@ -632,7 +632,7 @@ void NodeChannel::try_send_batch(RuntimeState* state) { } else { _add_batch_closure->cntl.http_request().Clear(); { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); _stub->tablet_writer_add_batch(&_add_batch_closure->cntl, &request, &_add_batch_closure->result, _add_batch_closure); } diff --git a/be/src/exec/tablet_sink.h b/be/src/exec/tablet_sink.h index 1ba4b46a60..45552329bf 100644 --- a/be/src/exec/tablet_sink.h +++ b/be/src/exec/tablet_sink.h @@ -96,7 +96,7 @@ public: ~ReusableClosure() override { // shouldn't delete when Run() is calling or going to be called, wait for current Run() done. join(); - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); cntl.Reset(); } @@ -124,7 +124,7 @@ public: // plz follow this order: reset() -> set_in_flight() -> send brpc batch void reset() { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); cntl.Reset(); cid = cntl.call_id(); } diff --git a/be/src/http/default_path_handlers.cpp b/be/src/http/default_path_handlers.cpp index a74fc740d5..7edc3ec7f0 100644 --- a/be/src/http/default_path_handlers.cpp +++ b/be/src/http/default_path_handlers.cpp @@ -84,9 +84,7 @@ void config_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* // Registered to handle "/memz", and prints out memory allocation statistics. void mem_usage_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* output) { (*output) << "
"
-              << "Mem Limit: "
-              << PrettyPrinter::print(ExecEnv::GetInstance()->process_mem_tracker()->limit(),
-                                      TUnit::BYTES)
+              << "Mem Limit: " << PrettyPrinter::print(MemInfo::mem_limit(), TUnit::BYTES)
               << std::endl
               << "Physical Mem From Perf: "
               << PrettyPrinter::print(PerfCounters::get_vm_rss(), TUnit::BYTES) << std::endl
@@ -121,14 +119,48 @@ void display_tablets_callback(const WebPageHandler::ArgumentMap& args, EasyJson*
 // Registered to handle "/mem_tracker", and prints out memory tracker information.
 void mem_tracker_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* output) {
     (*output) << "

Memory usage by subsystem

\n"; + std::vector snapshots; + auto iter = args.find("type"); + if (iter != args.end()) { + if (iter->second == "global") { + MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::GLOBAL); + } else if (iter->second == "query") { + MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::QUERY); + } else if (iter->second == "load") { + MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::LOAD); + } else if (iter->second == "compaction") { + MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::COMPACTION); + } else if (iter->second == "schema_change") { + MemTrackerLimiter::make_type_snapshots(&snapshots, + MemTrackerLimiter::Type::SCHEMA_CHANGE); + } else if (iter->second == "clone") { + MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::CLONE); + } else if (iter->second == "batch_load") { + MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::BATCHLOAD); + } else if (iter->second == "consistency") { + MemTrackerLimiter::make_type_snapshots(&snapshots, + MemTrackerLimiter::Type::CONSISTENCY); + } + } else { + (*output) << "

*Note: (see documentation for details)

\n"; + (*output) << "

1.`/mem_tracker?type=global` to view the memory statistics of each " + "type

\n"; + (*output) << "

2.`/mem_tracker` counts virtual memory, which is equal to `Actual " + "memory used` in `/memz`

\n"; + (*output) << "

3.`process` is equal to the sum of all types of memory, " + "`/mem_tracker` can be logically divided into 4 layers: 1)`process` 2)`type` " + "3)`query/load/compation task etc.` 4)`exec node etc.`

\n"; + MemTrackerLimiter::make_process_snapshots(&snapshots); + } + (*output) << "\n"; (*output) << "" - "" + "" "" - "" + "" "" "" @@ -136,35 +168,17 @@ void mem_tracker_handler(const WebPageHandler::ArgumentMap& args, std::stringstr "" "" - "" ""; (*output) << "\n"; - - size_t upper_level; - size_t cur_level = 1; - // the level equal or lower than upper_level will show in web page - auto iter = args.find("upper_level"); - if (iter != args.end()) { - upper_level = std::stol(iter->second); - } else { - upper_level = 3; - } - - std::vector snapshots; - ExecEnv::GetInstance()->process_mem_tracker()->make_snapshot(&snapshots, cur_level, - upper_level); - MemTracker::make_global_mem_tracker_snapshot(&snapshots); for (const auto& item : snapshots) { string limit_str = item.limit == -1 ? "none" : AccurateItoaKMGT(item.limit); string current_consumption_normalize = AccurateItoaKMGT(item.cur_consumption); string peak_consumption_normalize = AccurateItoaKMGT(item.peak_consumption); (*output) << strings::Substitute( "\n", - item.level, item.label, item.parent, limit_str, item.cur_consumption, - current_consumption_normalize, item.peak_consumption, peak_consumption_normalize, - item.child_count); + "td>\n", + item.type, item.label, item.parent_label, limit_str, item.cur_consumption, + current_consumption_normalize, item.peak_consumption, peak_consumption_normalize); } (*output) << "
LevelTypeLabelParentParent LabelLimitCurrent Consumption(Bytes)Peak Consumption(Bytes)Peak Consumption(Normalize)Child Count
$0$1$2$3$4$5$6$7$8
$7
\n"; } diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp index 63da436d23..0e1d737b15 100644 --- a/be/src/olap/base_compaction.cpp +++ b/be/src/olap/base_compaction.cpp @@ -68,7 +68,7 @@ Status BaseCompaction::execute_compact_impl() { return Status::OLAPInternalError(OLAP_ERR_BE_CLONE_OCCURRED); } - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::COMPACTION); + SCOPED_ATTACH_TASK(_mem_tracker); // 2. do base compaction, merge rowsets int64_t permits = get_compaction_permits(); diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 801cbf2990..e74dca760f 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -34,13 +34,7 @@ Compaction::Compaction(TabletSharedPtr tablet, const std::string& label) _input_rowsets_size(0), _input_row_num(0), _state(CompactionState::INITED) { -#ifndef BE_TEST - _mem_tracker = std::make_shared( - -1, label, StorageEngine::instance()->compaction_mem_tracker()); - _mem_tracker->enable_reset_zero(); -#else - _mem_tracker = std::make_shared(-1, label); -#endif + _mem_tracker = std::make_shared(MemTrackerLimiter::Type::COMPACTION, label); } Compaction::~Compaction() {} diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp index 4461a240b5..4736454c09 100644 --- a/be/src/olap/cumulative_compaction.cpp +++ b/be/src/olap/cumulative_compaction.cpp @@ -70,7 +70,7 @@ Status CumulativeCompaction::execute_compact_impl() { return Status::OLAPInternalError(OLAP_ERR_CUMULATIVE_CLONE_OCCURRED); } - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::COMPACTION); + SCOPED_ATTACH_TASK(_mem_tracker); // 3. do cumulative compaction, merge rowsets int64_t permits = get_compaction_permits(); diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 5ec23ea593..c9a4890fbb 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -25,6 +25,7 @@ #include "olap/schema.h" #include "olap/schema_change.h" #include "olap/storage_engine.h" +#include "runtime/load_channel_mgr.h" #include "runtime/row_batch.h" #include "runtime/tuple_row.h" #include "service/backend_options.h" @@ -283,12 +284,23 @@ void DeltaWriter::_reset_mem_table() { if (_tablet->enable_unique_key_merge_on_write() && _delete_bitmap == nullptr) { _delete_bitmap.reset(new DeleteBitmap(_tablet->tablet_id())); } +#ifndef BE_TEST + auto mem_table_insert_tracker = std::make_shared( + fmt::format("MemTableManualInsert:TabletId={}:MemTableNum={}#loadID={}", + std::to_string(tablet_id()), _mem_table_num, _load_id.to_string()), + nullptr, ExecEnv::GetInstance()->load_channel_mgr()->mem_tracker_set()); + auto mem_table_flush_tracker = std::make_shared( + fmt::format("MemTableHookFlush:TabletId={}:MemTableNum={}#loadID={}", + std::to_string(tablet_id()), _mem_table_num++, _load_id.to_string()), + nullptr, ExecEnv::GetInstance()->load_channel_mgr()->mem_tracker_set()); +#else auto mem_table_insert_tracker = std::make_shared( fmt::format("MemTableManualInsert:TabletId={}:MemTableNum={}#loadID={}", std::to_string(tablet_id()), _mem_table_num, _load_id.to_string())); auto mem_table_flush_tracker = std::make_shared( fmt::format("MemTableHookFlush:TabletId={}:MemTableNum={}#loadID={}", std::to_string(tablet_id()), _mem_table_num++, _load_id.to_string())); +#endif { std::lock_guard l(_mem_table_tracker_lock); _mem_table_tracker.push_back(mem_table_insert_tracker); diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp index a1b2a174ab..a6505d9bbe 100644 --- a/be/src/olap/lru_cache.cpp +++ b/be/src/olap/lru_cache.cpp @@ -436,7 +436,7 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity, _num_shards(num_shards), _shards(nullptr), _last_id(1) { - _mem_tracker = std::make_unique(-1, name); + _mem_tracker = std::make_unique(MemTrackerLimiter::Type::GLOBAL, name); CHECK(num_shards > 0) << "num_shards cannot be 0"; CHECK_EQ((num_shards & (num_shards - 1)), 0) << "num_shards should be power of two, but got " << num_shards; diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp index f61e945bda..9faf757bf9 100644 --- a/be/src/olap/memtable.cpp +++ b/be/src/olap/memtable.cpp @@ -157,7 +157,7 @@ MemTable::~MemTable() { _flush_mem_tracker->set_consumption(0); DCHECK_EQ(_insert_mem_tracker->consumption(), 0) << std::endl - << MemTracker::log_usage(_insert_mem_tracker->make_snapshot(0)); + << MemTracker::log_usage(_insert_mem_tracker->make_snapshot()); DCHECK_EQ(_flush_mem_tracker->consumption(), 0); } diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 958e60137e..ead81f7367 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -125,7 +125,7 @@ Status StorageEngine::start_bg_threads() { RETURN_IF_ERROR(Thread::create( "StorageEngine", "path_scan_thread", [this, data_dir]() { - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); this->_path_scan_thread_callback(data_dir); }, &path_scan_thread)); @@ -135,7 +135,7 @@ Status StorageEngine::start_bg_threads() { RETURN_IF_ERROR(Thread::create( "StorageEngine", "path_gc_thread", [this, data_dir]() { - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); this->_path_gc_thread_callback(data_dir); }, &path_gc_thread)); diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 795ca8e145..e89d945f53 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -272,8 +272,7 @@ Status BetaRowsetWriter::_check_correctness(std::unique_ptrsegcompaction_mem_tracker(), - ThreadContext::TaskType::COMPACTION); + SCOPED_CONSUME_MEM_TRACKER(StorageEngine::instance()->segcompaction_mem_tracker()); // throttle segcompaction task if memory depleted. if (MemTrackerLimiter::sys_mem_exceed_limit_check(GB_EXCHANGE_BYTE)) { LOG(WARNING) << "skip segcompaction due to memory shortage"; diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 4ab9b80bdb..3081b37886 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -1596,10 +1596,10 @@ Status VSchemaChangeWithSorting::_inner_process(RowsetReaderSharedPtr rowset_rea } RETURN_IF_ERROR(_changer.change_block(ref_block.get(), new_block.get())); - if (!_mem_tracker->check_limit(_memory_limitation, new_block->allocated_bytes())) { + if (_mem_tracker->consumption() + new_block->allocated_bytes() > _memory_limitation) { RETURN_IF_ERROR(create_rowset()); - if (!_mem_tracker->check_limit(_memory_limitation, new_block->allocated_bytes())) { + if (_mem_tracker->consumption() + new_block->allocated_bytes() > _memory_limitation) { LOG(WARNING) << "Memory limitation is too small for Schema Change." << " _memory_limitation=" << _memory_limitation << ", new_block->allocated_bytes()=" << new_block->allocated_bytes() diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 9da8c8e147..083814fef0 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -81,12 +81,6 @@ using strings::Substitute; namespace doris { DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(unused_rowsets_count, MetricUnit::ROWSETS); -DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(segcompaction_mem_consumption, MetricUnit::BYTES, "", - mem_consumption, Labels({{"type", "segcompaction"}})); -DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(compaction_mem_consumption, MetricUnit::BYTES, "", - mem_consumption, Labels({{"type", "compaction"}})); -DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(schema_change_mem_consumption, MetricUnit::BYTES, "", - mem_consumption, Labels({{"type", "schema_change"}})); StorageEngine* StorageEngine::_s_instance = nullptr; @@ -112,19 +106,9 @@ StorageEngine::StorageEngine(const EngineOptions& options) _available_storage_medium_type_count(0), _effective_cluster_id(-1), _is_all_cluster_id_exist(true), - _segcompaction_mem_tracker( - std::make_shared(-1, "StorageEngine::SegCompaction")), - _compaction_mem_tracker( - std::make_shared(-1, "StorageEngine::AutoCompaction")), - _segment_meta_mem_tracker(std::make_unique("StorageEngine::SegmentMeta")), - _schema_change_mem_tracker( - std::make_shared(-1, "StorageEngine::SchemaChange")), - _clone_mem_tracker(std::make_shared(-1, "StorageEngine::Clone")), - _batch_load_mem_tracker( - std::make_shared(-1, "StorageEngine::BatchLoad")), - _consistency_mem_tracker( - std::make_shared(-1, "StorageEngine::Consistency")), - _mem_tracker(std::make_shared(-1, "StorageEngine::Self")), + _mem_tracker(std::make_unique("StorageEngine")), + _segcompaction_mem_tracker(std::make_unique("SegCompaction")), + _segment_meta_mem_tracker(std::make_unique("SegmentMeta")), _stop_background_threads_latch(1), _tablet_manager(new TabletManager(config::tablet_map_shard_size)), _txn_manager(new TxnManager(config::txn_map_shard_size, config::txn_shard_size)), @@ -138,19 +122,10 @@ StorageEngine::StorageEngine(const EngineOptions& options) // std::lock_guard lock(_gc_mutex); return _unused_rowsets.size(); }); - REGISTER_HOOK_METRIC(segcompaction_mem_consumption, - [this]() { return _segcompaction_mem_tracker->consumption(); }); - REGISTER_HOOK_METRIC(compaction_mem_consumption, - [this]() { return _compaction_mem_tracker->consumption(); }); - REGISTER_HOOK_METRIC(schema_change_mem_consumption, - [this]() { return _schema_change_mem_tracker->consumption(); }); } StorageEngine::~StorageEngine() { DEREGISTER_HOOK_METRIC(unused_rowsets_count); - DEREGISTER_HOOK_METRIC(segcompaction_mem_consumption); - DEREGISTER_HOOK_METRIC(compaction_mem_consumption); - DEREGISTER_HOOK_METRIC(schema_change_mem_consumption); _clear(); if (_base_compaction_thread_pool) { @@ -177,7 +152,7 @@ void StorageEngine::load_data_dirs(const std::vector& data_dirs) { std::vector threads; for (auto data_dir : data_dirs) { threads.emplace_back([this, data_dir] { - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); auto res = data_dir->load(); if (!res.ok()) { LOG(WARNING) << "io error when init load tables. res=" << res @@ -223,7 +198,7 @@ Status StorageEngine::_init_store_map() { _tablet_manager.get(), _txn_manager.get()); tmp_stores.emplace_back(store); threads.emplace_back([this, store, &error_msg_lock, &error_msg]() { - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); auto st = store->init(); if (!st.ok()) { { diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index 93f1d0c438..ab7fa4ac7d 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -179,19 +179,8 @@ public: Status get_compaction_status_json(std::string* result); - std::shared_ptr segcompaction_mem_tracker() { - return _segcompaction_mem_tracker; - } - std::shared_ptr compaction_mem_tracker() { return _compaction_mem_tracker; } MemTracker* segment_meta_mem_tracker() { return _segment_meta_mem_tracker.get(); } - std::shared_ptr schema_change_mem_tracker() { - return _schema_change_mem_tracker; - } - std::shared_ptr clone_mem_tracker() { return _clone_mem_tracker; } - std::shared_ptr batch_load_mem_tracker() { return _batch_load_mem_tracker; } - std::shared_ptr consistency_mem_tracker() { - return _consistency_mem_tracker; - } + MemTracker* segcompaction_mem_tracker() { return _segcompaction_mem_tracker.get(); } // check cumulative compaction config void check_cumulative_compaction_config(); @@ -334,24 +323,13 @@ private: // map, if we use RowsetId as the key, we need custom hash func std::unordered_map _unused_rowsets; + // StorageEngine oneself + std::unique_ptr _mem_tracker; // Count the memory consumption of segment compaction tasks. - std::shared_ptr _segcompaction_mem_tracker; - // Count the memory consumption of all Base and Cumulative tasks. - std::shared_ptr _compaction_mem_tracker; + std::unique_ptr _segcompaction_mem_tracker; // This mem tracker is only for tracking memory use by segment meta data such as footer or index page. // The memory consumed by querying is tracked in segment iterator. std::unique_ptr _segment_meta_mem_tracker; - // Count the memory consumption of all SchemaChange tasks. - std::shared_ptr _schema_change_mem_tracker; - // Count the memory consumption of all EngineCloneTask. - // Note: Memory that does not contain make/release snapshots. - std::shared_ptr _clone_mem_tracker; - // Count the memory consumption of all EngineBatchLoadTask. - std::shared_ptr _batch_load_mem_tracker; - // Count the memory consumption of all EngineChecksumTask. - std::shared_ptr _consistency_mem_tracker; - // StorageEngine oneself - std::shared_ptr _mem_tracker; CountDownLatch _stop_background_threads_latch; scoped_refptr _unused_rowset_monitor_thread; diff --git a/be/src/olap/task/engine_alter_tablet_task.cpp b/be/src/olap/task/engine_alter_tablet_task.cpp index 8164049296..7ec1a4d4d7 100644 --- a/be/src/olap/task/engine_alter_tablet_task.cpp +++ b/be/src/olap/task/engine_alter_tablet_task.cpp @@ -26,15 +26,15 @@ namespace doris { EngineAlterTabletTask::EngineAlterTabletTask(const TAlterTabletReqV2& request) : _alter_tablet_req(request) { _mem_tracker = std::make_shared( - config::memory_limitation_per_thread_for_schema_change_bytes, + MemTrackerLimiter::Type::SCHEMA_CHANGE, fmt::format("EngineAlterTabletTask#baseTabletId={}:newTabletId={}", std::to_string(_alter_tablet_req.base_tablet_id), std::to_string(_alter_tablet_req.new_tablet_id)), - StorageEngine::instance()->schema_change_mem_tracker()); + config::memory_limitation_per_thread_for_schema_change_bytes); } Status EngineAlterTabletTask::execute() { - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_ATTACH_TASK(_mem_tracker); DorisMetrics::instance()->create_rollup_requests_total->increment(1); Status res = SchemaChangeHandler::process_alter_tablet_v2(_alter_tablet_req); diff --git a/be/src/olap/task/engine_batch_load_task.cpp b/be/src/olap/task/engine_batch_load_task.cpp index 9b56055339..59a9cf16bc 100644 --- a/be/src/olap/task/engine_batch_load_task.cpp +++ b/be/src/olap/task/engine_batch_load_task.cpp @@ -49,16 +49,15 @@ namespace doris { EngineBatchLoadTask::EngineBatchLoadTask(TPushReq& push_req, std::vector* tablet_infos) : _push_req(push_req), _tablet_infos(tablet_infos) { _mem_tracker = std::make_shared( - -1, + MemTrackerLimiter::Type::BATCHLOAD, fmt::format("EngineBatchLoadTask#pushType={}:tabletId={}", _push_req.push_type, - std::to_string(_push_req.tablet_id)), - StorageEngine::instance()->batch_load_mem_tracker()); + std::to_string(_push_req.tablet_id))); } EngineBatchLoadTask::~EngineBatchLoadTask() {} Status EngineBatchLoadTask::execute() { - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_ATTACH_TASK(_mem_tracker); Status status; if (_push_req.push_type == TPushType::LOAD || _push_req.push_type == TPushType::LOAD_V2) { RETURN_IF_ERROR(_init()); diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp index b6f25bb2a4..e040ffcece 100644 --- a/be/src/olap/task/engine_checksum_task.cpp +++ b/be/src/olap/task/engine_checksum_task.cpp @@ -27,12 +27,12 @@ EngineChecksumTask::EngineChecksumTask(TTabletId tablet_id, TSchemaHash schema_h TVersion version, uint32_t* checksum) : _tablet_id(tablet_id), _schema_hash(schema_hash), _version(version), _checksum(checksum) { _mem_tracker = std::make_shared( - -1, "EngineChecksumTask#tabletId=" + std::to_string(tablet_id), - StorageEngine::instance()->consistency_mem_tracker()); + MemTrackerLimiter::Type::CONSISTENCY, + "EngineChecksumTask#tabletId=" + std::to_string(tablet_id)); } Status EngineChecksumTask::execute() { - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_ATTACH_TASK(_mem_tracker); return _compute_checksum(); } // execute diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index 9a42d8e0e9..b015adfe87 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -55,13 +55,13 @@ EngineCloneTask::EngineCloneTask(const TCloneReq& clone_req, const TMasterInfo& _signature(signature), _master_info(master_info) { _mem_tracker = std::make_shared( - -1, "EngineCloneTask#tabletId=" + std::to_string(_clone_req.tablet_id), - StorageEngine::instance()->clone_mem_tracker()); + MemTrackerLimiter::Type::CLONE, + "EngineCloneTask#tabletId=" + std::to_string(_clone_req.tablet_id)); } Status EngineCloneTask::execute() { // register the tablet to avoid it is deleted by gc thread during clone process - SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE); + SCOPED_ATTACH_TASK(_mem_tracker); StorageEngine::instance()->tablet_manager()->register_clone_tablet(_clone_req.tablet_id); Status st = _do_clone(); StorageEngine::instance()->tablet_manager()->unregister_clone_tablet(_clone_req.tablet_id); diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index bd515e1c06..69af332894 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -99,7 +99,6 @@ set(RUNTIME_FILES memory/chunk_allocator.cpp memory/mem_tracker_limiter.cpp memory/mem_tracker.cpp - memory/mem_tracker_task_pool.cpp memory/thread_mem_tracker_mgr.cpp fold_constant_executor.cpp cache/result_node.cpp diff --git a/be/src/runtime/buffer_control_block.cpp b/be/src/runtime/buffer_control_block.cpp index 79496b449b..03bd6d3466 100644 --- a/be/src/runtime/buffer_control_block.cpp +++ b/be/src/runtime/buffer_control_block.cpp @@ -30,7 +30,7 @@ void GetResultBatchCtx::on_failure(const Status& status) { status.to_protobuf(result->mutable_status()); { // call by result sink - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); done->Run(); } delete this; @@ -45,7 +45,7 @@ void GetResultBatchCtx::on_close(int64_t packet_seq, QueryStatistics* statistics result->set_packet_seq(packet_seq); result->set_eos(true); { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); done->Run(); } delete this; @@ -73,7 +73,7 @@ void GetResultBatchCtx::on_data(const std::unique_ptr& t_resul } st.to_protobuf(result->mutable_status()); { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); done->Run(); } delete this; diff --git a/be/src/runtime/buffered_block_mgr2.cc b/be/src/runtime/buffered_block_mgr2.cc index fa2d1b70b0..e610cf3803 100644 --- a/be/src/runtime/buffered_block_mgr2.cc +++ b/be/src/runtime/buffered_block_mgr2.cc @@ -251,7 +251,7 @@ int64_t BufferedBlockMgr2::remaining_unreserved_buffers() const { int64_t num_buffers = _free_io_buffers.size() + _unpinned_blocks.size() + _non_local_outstanding_writes; num_buffers += - thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->spare_capacity() / + thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->spare_capacity() / max_block_size(); num_buffers -= _unfullfilled_reserved_buffers; return num_buffers; @@ -358,9 +358,9 @@ Status BufferedBlockMgr2::get_new_block(Client* client, Block* unpin_block, Bloc if (len > 0 && len < _max_block_size) { DCHECK(unpin_block == nullptr); - Status st = thread_context() - ->_thread_mem_tracker_mgr->limiter_mem_tracker_raw() - ->check_limit(len); + Status st = + thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit( + len); WARN_IF_ERROR(st, "get_new_block failed"); if (st) { client->_tracker->consume(len); @@ -986,7 +986,7 @@ Status BufferedBlockMgr2::find_buffer(unique_lock& lock, BufferDescriptor // First, try to allocate a new buffer. if (_free_io_buffers.size() < _block_write_threshold && - thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->check_limit( + thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit( _max_block_size)) { _mem_tracker->consume(_max_block_size); uint8_t* new_buffer = new uint8_t[_max_block_size]; @@ -1155,9 +1155,9 @@ string BufferedBlockMgr2::debug_internal() const { << " Unfullfilled reserved buffers: " << _unfullfilled_reserved_buffers << endl << " BUffer Block Mgr Used memory: " << _mem_tracker->consumption() << " Instance remaining memory: " - << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->spare_capacity() + << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->spare_capacity() << " (#blocks=" - << (thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->spare_capacity() / + << (thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->spare_capacity() / _max_block_size) << ")" << endl << " Block write threshold: " << _block_write_threshold; diff --git a/be/src/runtime/data_stream_recvr.cc b/be/src/runtime/data_stream_recvr.cc index c9533959f8..59d46102b3 100644 --- a/be/src/runtime/data_stream_recvr.cc +++ b/be/src/runtime/data_stream_recvr.cc @@ -186,10 +186,7 @@ Status DataStreamRecvr::SenderQueue::get_batch(RowBatch** next_batch) { if (!_pending_closures.empty()) { auto closure_pair = _pending_closures.front(); - { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); - closure_pair.first->Run(); - } + closure_pair.first->Run(); _pending_closures.pop_front(); closure_pair.second.stop(); @@ -339,11 +336,8 @@ void DataStreamRecvr::SenderQueue::cancel() { { std::lock_guard l(_lock); - { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); - for (auto closure_pair : _pending_closures) { - closure_pair.first->Run(); - } + for (auto closure_pair : _pending_closures) { + closure_pair.first->Run(); } _pending_closures.clear(); } @@ -357,11 +351,8 @@ void DataStreamRecvr::SenderQueue::close() { std::lock_guard l(_lock); _is_cancelled = true; - { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); - for (auto closure_pair : _pending_closures) { - closure_pair.first->Run(); - } + for (auto closure_pair : _pending_closures) { + closure_pair.first->Run(); } _pending_closures.clear(); } diff --git a/be/src/runtime/data_stream_sender.cpp b/be/src/runtime/data_stream_sender.cpp index cb02491701..df1495b750 100644 --- a/be/src/runtime/data_stream_sender.cpp +++ b/be/src/runtime/data_stream_sender.cpp @@ -138,7 +138,7 @@ Status DataStreamSender::Channel::send_batch(PRowBatch* batch, bool eos) { _closure->ref(); } else { RETURN_IF_ERROR(_wait_last_brpc()); - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); _closure->cntl.Reset(); } VLOG_ROW << "Channel::send_batch() instance_id=" << _fragment_instance_id @@ -160,7 +160,6 @@ Status DataStreamSender::Channel::send_batch(PRowBatch* batch, bool eos) { if (_parent->_transfer_large_data_by_brpc && _brpc_request.has_row_batch() && _brpc_request.row_batch().has_tuple_data() && _brpc_request.ByteSizeLong() > MIN_HTTP_BRPC_SIZE) { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); Status st = request_embed_attachment_contain_tuple>( &_brpc_request, _closure); @@ -174,11 +173,17 @@ Status DataStreamSender::Channel::send_batch(PRowBatch* batch, bool eos) { brpc_url + "/PInternalServiceImpl/transmit_data_by_http"; _closure->cntl.http_request().set_method(brpc::HTTP_METHOD_POST); _closure->cntl.http_request().set_content_type("application/json"); - _brpc_http_stub->transmit_data_by_http(&_closure->cntl, NULL, &_closure->result, _closure); + { + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); + _brpc_http_stub->transmit_data_by_http(&_closure->cntl, NULL, &_closure->result, + _closure); + } } else { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); _closure->cntl.http_request().Clear(); - _brpc_stub->transmit_data(&_closure->cntl, &_brpc_request, &_closure->result, _closure); + { + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); + _brpc_stub->transmit_data(&_closure->cntl, &_brpc_request, &_closure->result, _closure); + } } if (batch != nullptr) { diff --git a/be/src/runtime/disk_io_mgr.cc b/be/src/runtime/disk_io_mgr.cc index 1f506d82b1..2f330f250c 100644 --- a/be/src/runtime/disk_io_mgr.cc +++ b/be/src/runtime/disk_io_mgr.cc @@ -347,7 +347,8 @@ DiskIoMgr::~DiskIoMgr() { } Status DiskIoMgr::init(const int64_t mem_limit) { - _mem_tracker = std::make_unique(mem_limit, "DiskIO"); + _mem_tracker = std::make_unique(MemTrackerLimiter::Type::GLOBAL, "DiskIO", + mem_limit); for (int i = 0; i < _disk_queues.size(); ++i) { _disk_queues[i] = new DiskQueue(i); diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index 81af4a18f3..4bd9fd73af 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -49,7 +49,6 @@ class LoadStreamMgr; class MemTrackerLimiter; class MemTracker; class StorageEngine; -class MemTrackerTaskPool; class PriorityThreadPool; class PriorityWorkStealingThreadPool; class ResultBufferMgr; @@ -117,27 +116,12 @@ public: return nullptr; } - std::shared_ptr process_mem_tracker() { return _process_mem_tracker; } - void set_global_mem_tracker(const std::shared_ptr& process_tracker, - const std::shared_ptr& orphan_tracker, - const std::shared_ptr& nursery_mem_tracker, - const std::shared_ptr& bthread_mem_tracker) { - _process_mem_tracker = process_tracker; + void set_orphan_mem_tracker(const std::shared_ptr& orphan_tracker) { _orphan_mem_tracker = orphan_tracker; _orphan_mem_tracker_raw = orphan_tracker.get(); - _nursery_mem_tracker = nursery_mem_tracker; - _bthread_mem_tracker = bthread_mem_tracker; - } - std::shared_ptr allocator_cache_mem_tracker() { - return _allocator_cache_mem_tracker; } std::shared_ptr orphan_mem_tracker() { return _orphan_mem_tracker; } MemTrackerLimiter* orphan_mem_tracker_raw() { return _orphan_mem_tracker_raw; } - std::shared_ptr nursery_mem_tracker() { return _nursery_mem_tracker; } - std::shared_ptr bthread_mem_tracker() { return _bthread_mem_tracker; } - std::shared_ptr query_pool_mem_tracker() { return _query_pool_mem_tracker; } - std::shared_ptr load_pool_mem_tracker() { return _load_pool_mem_tracker; } - MemTrackerTaskPool* task_pool_mem_tracker_registry() { return _task_pool_mem_tracker_registry; } ThreadResourceMgr* thread_mgr() { return _thread_mgr; } PriorityThreadPool* scan_thread_pool() { return _scan_thread_pool; } PriorityThreadPool* remote_scan_thread_pool() { return _remote_scan_thread_pool; } @@ -194,7 +178,7 @@ private: Status _init(const std::vector& store_paths); void _destroy(); - Status _init_mem_tracker(); + Status _init_mem_env(); /// Initialise 'buffer_pool_' with given capacity. void _init_buffer_pool(int64_t min_page_len, int64_t capacity, int64_t clean_pages_limit); @@ -217,11 +201,6 @@ private: ClientCache* _broker_client_cache = nullptr; ThreadResourceMgr* _thread_mgr = nullptr; - // The ancestor for all trackers. Every tracker is visible from the process down. - // Not limit total memory by process tracker, and it's just used to track virtual memory of process. - std::shared_ptr _process_mem_tracker; - // tcmalloc/jemalloc allocator cache tracker, Including thread cache, free heap, etc. - std::shared_ptr _allocator_cache_mem_tracker; // The default tracker consumed by mem hook. If the thread does not attach other trackers, // by default all consumption will be passed to the process tracker through the orphan tracker. // In real time, `consumption of all limiter trackers` + `orphan tracker consumption` = `process tracker consumption`. @@ -229,15 +208,6 @@ private: // and the consumption of the orphan mem tracker is close to 0, but greater than 0. std::shared_ptr _orphan_mem_tracker; MemTrackerLimiter* _orphan_mem_tracker_raw; - // Parent is orphan, Nursery of orphan memory after manually switching thread mem tracker - std::shared_ptr _nursery_mem_tracker; - // Parent is orphan, bthread default mem tracker - std::shared_ptr _bthread_mem_tracker; - // The ancestor for all querys tracker. - std::shared_ptr _query_pool_mem_tracker; - // The ancestor for all load tracker. - std::shared_ptr _load_pool_mem_tracker; - MemTrackerTaskPool* _task_pool_mem_tracker_registry; // The following two thread pools are used in different scenarios. // _scan_thread_pool is a priority thread pool. diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index d18e02b90b..bec666859c 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -39,7 +39,6 @@ #include "runtime/load_channel_mgr.h" #include "runtime/load_path_mgr.h" #include "runtime/memory/mem_tracker.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "runtime/result_buffer_mgr.h" #include "runtime/result_queue_mgr.h" #include "runtime/routine_load/routine_load_task_executor.h" @@ -72,10 +71,6 @@ DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(send_batch_thread_pool_thread_num, MetricUnit DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(send_batch_thread_pool_queue_size, MetricUnit::NOUNIT); DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(download_cache_thread_pool_thread_num, MetricUnit::NOUNIT); DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(download_cache_thread_pool_queue_size, MetricUnit::NOUNIT); -DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(query_mem_consumption, MetricUnit::BYTES, "", mem_consumption, - Labels({{"type", "query"}})); -DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(load_mem_consumption, MetricUnit::BYTES, "", mem_consumption, - Labels({{"type", "load"}})); Status ExecEnv::init(ExecEnv* env, const std::vector& store_paths) { return env->_init(store_paths); @@ -100,7 +95,6 @@ Status ExecEnv::_init(const std::vector& store_paths) { _backend_client_cache = new BackendServiceClientCache(config::max_client_cache_size_per_host); _frontend_client_cache = new FrontendServiceClientCache(config::max_client_cache_size_per_host); _broker_client_cache = new BrokerServiceClientCache(config::max_client_cache_size_per_host); - _task_pool_mem_tracker_registry = new MemTrackerTaskPool(); _thread_mgr = new ThreadResourceMgr(); if (config::doris_enable_scanner_thread_pool_per_disk && config::doris_scanner_thread_pool_thread_num >= store_paths.size() && @@ -169,42 +163,22 @@ Status ExecEnv::_init(const std::vector& store_paths) { _small_file_mgr->init(); _scanner_scheduler->init(this); - _init_mem_tracker(); + _init_mem_env(); - RETURN_IF_ERROR( - _load_channel_mgr->init(ExecEnv::GetInstance()->process_mem_tracker()->limit())); + RETURN_IF_ERROR(_load_channel_mgr->init(MemInfo::mem_limit())); _heartbeat_flags = new HeartbeatFlags(); _register_metrics(); _is_init = true; return Status::OK(); } -Status ExecEnv::_init_mem_tracker() { - // 1. init global memory limit. - int64_t global_memory_limit_bytes = 0; +Status ExecEnv::_init_mem_env() { bool is_percent = false; std::stringstream ss; - global_memory_limit_bytes = - ParseUtil::parse_mem_spec(config::mem_limit, -1, MemInfo::physical_mem(), &is_percent); - if (global_memory_limit_bytes <= 0) { - ss << "Failed to parse mem limit from '" + config::mem_limit + "'."; - return Status::InternalError(ss.str()); - } - - if (global_memory_limit_bytes > MemInfo::physical_mem()) { - LOG(WARNING) << "Memory limit " - << PrettyPrinter::print(global_memory_limit_bytes, TUnit::BYTES) - << " exceeds physical memory of " - << PrettyPrinter::print(MemInfo::physical_mem(), TUnit::BYTES) - << ". Using physical memory instead"; - global_memory_limit_bytes = MemInfo::physical_mem(); - } - _process_mem_tracker = - std::make_shared(global_memory_limit_bytes, "Process"); - _orphan_mem_tracker = std::make_shared(-1, "Orphan", _process_mem_tracker); + // 1. init mem tracker + _orphan_mem_tracker = + std::make_shared(MemTrackerLimiter::Type::GLOBAL, "Orphan"); _orphan_mem_tracker_raw = _orphan_mem_tracker.get(); - _nursery_mem_tracker = std::make_shared(-1, "Nursery", _orphan_mem_tracker); - _bthread_mem_tracker = std::make_shared(-1, "Bthread", _orphan_mem_tracker); thread_context()->_thread_mem_tracker_mgr->init(); thread_context()->_thread_mem_tracker_mgr->set_check_attach(false); #if defined(USE_MEM_TRACKER) && !defined(__SANITIZE_ADDRESS__) && !defined(ADDRESS_SANITIZER) && \ @@ -213,18 +187,6 @@ Status ExecEnv::_init_mem_tracker() { init_hook(); } #endif - _allocator_cache_mem_tracker = std::make_shared("Tc/JemallocAllocatorCache"); - _query_pool_mem_tracker = - std::make_shared(-1, "QueryPool", _process_mem_tracker); - REGISTER_HOOK_METRIC(query_mem_consumption, - [this]() { return _query_pool_mem_tracker->consumption(); }); - _load_pool_mem_tracker = - std::make_shared(-1, "LoadPool", _process_mem_tracker); - REGISTER_HOOK_METRIC(load_mem_consumption, - [this]() { return _load_pool_mem_tracker->consumption(); }); - LOG(INFO) << "Using global memory limit: " - << PrettyPrinter::print(global_memory_limit_bytes, TUnit::BYTES) - << ", origin config value: " << config::mem_limit; // 2. init buffer pool if (!BitUtil::IsPowerOf2(config::min_buffer_size)) { @@ -232,9 +194,8 @@ Status ExecEnv::_init_mem_tracker() { return Status::InternalError(ss.str()); } - int64_t buffer_pool_limit = - ParseUtil::parse_mem_spec(config::buffer_pool_limit, global_memory_limit_bytes, - MemInfo::physical_mem(), &is_percent); + int64_t buffer_pool_limit = ParseUtil::parse_mem_spec( + config::buffer_pool_limit, MemInfo::mem_limit(), MemInfo::physical_mem(), &is_percent); if (buffer_pool_limit <= 0) { ss << "Invalid config buffer_pool_limit value, must be a percentage or " "positive bytes value or percentage: " @@ -242,7 +203,7 @@ Status ExecEnv::_init_mem_tracker() { return Status::InternalError(ss.str()); } buffer_pool_limit = BitUtil::RoundDown(buffer_pool_limit, config::min_buffer_size); - while (!is_percent && buffer_pool_limit > global_memory_limit_bytes / 2) { + while (!is_percent && buffer_pool_limit > MemInfo::mem_limit() / 2) { // If buffer_pool_limit is not a percentage, and the value exceeds 50% of the total memory limit, // it is forced to be reduced to less than 50% of the total memory limit. // This is to ensure compatibility. In principle, buffer_pool_limit should be set as a percentage. @@ -271,9 +232,9 @@ Status ExecEnv::_init_mem_tracker() { // 3. init storage page cache int64_t storage_cache_limit = - ParseUtil::parse_mem_spec(config::storage_page_cache_limit, global_memory_limit_bytes, + ParseUtil::parse_mem_spec(config::storage_page_cache_limit, MemInfo::mem_limit(), MemInfo::physical_mem(), &is_percent); - while (!is_percent && storage_cache_limit > global_memory_limit_bytes / 2) { + while (!is_percent && storage_cache_limit > MemInfo::mem_limit() / 2) { // Reason same as buffer_pool_limit storage_cache_limit = storage_cache_limit / 2; } @@ -301,7 +262,7 @@ Status ExecEnv::_init_mem_tracker() { SegmentLoader::create_global_instance(segment_cache_capacity); // 4. init other managers - RETURN_IF_ERROR(_disk_io_mgr->init(global_memory_limit_bytes)); + RETURN_IF_ERROR(_disk_io_mgr->init(MemInfo::mem_limit())); RETURN_IF_ERROR(_tmp_file_mgr->init()); // 5. init chunk allocator @@ -312,7 +273,7 @@ Status ExecEnv::_init_mem_tracker() { } int64_t chunk_reserved_bytes_limit = - ParseUtil::parse_mem_spec(config::chunk_reserved_bytes_limit, global_memory_limit_bytes, + ParseUtil::parse_mem_spec(config::chunk_reserved_bytes_limit, MemInfo::mem_limit(), MemInfo::physical_mem(), &is_percent); if (chunk_reserved_bytes_limit <= 0) { ss << "Invalid config chunk_reserved_bytes_limit value, must be a percentage or " @@ -408,12 +369,8 @@ void ExecEnv::_destroy() { SAFE_DELETE(_routine_load_task_executor); SAFE_DELETE(_external_scan_context_mgr); SAFE_DELETE(_heartbeat_flags); - SAFE_DELETE(_task_pool_mem_tracker_registry); SAFE_DELETE(_scanner_scheduler); - DEREGISTER_HOOK_METRIC(query_mem_consumption); - DEREGISTER_HOOK_METRIC(load_mem_consumption); - _is_init = false; } diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp index 8b5a1c5c44..8085ddf13b 100644 --- a/be/src/runtime/fragment_mgr.cpp +++ b/be/src/runtime/fragment_mgr.cpp @@ -640,6 +640,30 @@ Status FragmentMgr::exec_plan_fragment(const TExecPlanFragmentParams& params, Fi fragments_ctx->timeout_second = params.query_options.query_timeout; _set_scan_concurrency(params, fragments_ctx.get()); + bool has_query_mem_tracker = + params.query_options.__isset.mem_limit && (params.query_options.mem_limit > 0); + int64_t bytes_limit = has_query_mem_tracker ? params.query_options.mem_limit : -1; + if (bytes_limit > MemInfo::mem_limit()) { + VLOG_NOTICE << "Query memory limit " << PrettyPrinter::print(bytes_limit, TUnit::BYTES) + << " exceeds process memory limit of " + << PrettyPrinter::print(MemInfo::mem_limit(), TUnit::BYTES) + << ". Using process memory limit instead"; + bytes_limit = MemInfo::mem_limit(); + } + if (params.query_options.query_type == TQueryType::SELECT) { + fragments_ctx->query_mem_tracker = std::make_shared( + MemTrackerLimiter::Type::QUERY, + fmt::format("Query#Id={}", print_id(fragments_ctx->query_id)), bytes_limit); + } else if (params.query_options.query_type == TQueryType::LOAD) { + fragments_ctx->query_mem_tracker = std::make_shared( + MemTrackerLimiter::Type::LOAD, + fmt::format("Load#Id={}", print_id(fragments_ctx->query_id)), bytes_limit); + } + if (params.query_options.__isset.is_report_success && + params.query_options.is_report_success) { + fragments_ctx->query_mem_tracker->enable_print_log_usage(); + } + { // Find _fragments_ctx_map again, in case some other request has already // create the query fragments context. @@ -647,6 +671,9 @@ Status FragmentMgr::exec_plan_fragment(const TExecPlanFragmentParams& params, Fi auto search = _fragments_ctx_map.find(params.params.query_id); if (search == _fragments_ctx_map.end()) { _fragments_ctx_map.insert(std::make_pair(fragments_ctx->query_id, fragments_ctx)); + LOG(INFO) << "Register query/load memory tracker, query/load id: " + << print_id(fragments_ctx->query_id) + << " limit: " << PrettyPrinter::print(bytes_limit, TUnit::BYTES); } else { // Already has a query fragmentscontext, use it fragments_ctx = search->second; diff --git a/be/src/runtime/load_channel_mgr.cpp b/be/src/runtime/load_channel_mgr.cpp index 8292b25656..6eef349fe1 100644 --- a/be/src/runtime/load_channel_mgr.cpp +++ b/be/src/runtime/load_channel_mgr.cpp @@ -71,6 +71,8 @@ Status LoadChannelMgr::init(int64_t process_mem_limit) { _load_hard_mem_limit = calc_process_max_load_memory(process_mem_limit); _load_soft_mem_limit = _load_hard_mem_limit * config::load_process_soft_mem_limit_percent / 100; _mem_tracker = std::make_unique("LoadChannelMgr"); + _mem_tracker_set = std::make_unique(MemTrackerLimiter::Type::LOAD, + "LoadChannelMgrTrackerSet"); REGISTER_HOOK_METRIC(load_channel_mem_consumption, [this]() { return _mem_tracker->consumption(); }); _last_success_channel = new_lru_cache("LastestSuccessChannelCache", 1024); @@ -94,8 +96,15 @@ Status LoadChannelMgr::open(const PTabletWriterOpenRequest& params) { bool is_high_priority = (params.has_is_high_priority() && params.is_high_priority()); // Use the same mem limit as LoadChannelMgr for a single load channel +#ifndef BE_TEST + auto channel_mem_tracker = std::make_unique( + fmt::format("LoadChannel#senderIp={}#loadID={}", params.sender_ip(), + load_id.to_string()), + nullptr, ExecEnv::GetInstance()->load_channel_mgr()->mem_tracker_set()); +#else auto channel_mem_tracker = std::make_unique(fmt::format( "LoadChannel#senderIp={}#loadID={}", params.sender_ip(), load_id.to_string())); +#endif channel.reset(new LoadChannel(load_id, std::move(channel_mem_tracker), channel_timeout_s, is_high_priority, params.sender_ip(), params.is_vectorized())); diff --git a/be/src/runtime/load_channel_mgr.h b/be/src/runtime/load_channel_mgr.h index 6b30503f0c..3f27eafd0e 100644 --- a/be/src/runtime/load_channel_mgr.h +++ b/be/src/runtime/load_channel_mgr.h @@ -67,6 +67,7 @@ public: } _mem_tracker->set_consumption(mem_usage); } + MemTrackerLimiter* mem_tracker_set() { return _mem_tracker_set.get(); } private: template @@ -91,6 +92,8 @@ protected: // check the total load channel mem consumption of this Backend std::unique_ptr _mem_tracker; + // Associate load channel tracker and memtable tracker, avoid default association to Orphan tracker. + std::unique_ptr _mem_tracker_set; int64_t _load_hard_mem_limit = -1; int64_t _load_soft_mem_limit = -1; diff --git a/be/src/runtime/mem_pool.cpp b/be/src/runtime/mem_pool.cpp index d49389353d..6d2760b389 100644 --- a/be/src/runtime/mem_pool.cpp +++ b/be/src/runtime/mem_pool.cpp @@ -134,7 +134,7 @@ Status MemPool::find_chunk(size_t min_size, bool check_limits) { chunk_size = BitUtil::RoundUpToPowerOfTwo(chunk_size); if (check_limits && - !thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->check_limit( + !thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit( chunk_size)) { return Status::MemoryAllocFailed("MemPool find new chunk {} bytes faild, exceed limit", chunk_size); diff --git a/be/src/runtime/memory/chunk_allocator.cpp b/be/src/runtime/memory/chunk_allocator.cpp index 43acc79538..b812734b4c 100644 --- a/be/src/runtime/memory/chunk_allocator.cpp +++ b/be/src/runtime/memory/chunk_allocator.cpp @@ -135,7 +135,8 @@ ChunkAllocator::ChunkAllocator(size_t reserve_limit) _steal_arena_limit(reserve_limit * 0.1), _reserved_bytes(0), _arenas(CpuInfo::get_max_num_cores()) { - _mem_tracker = std::make_unique(-1, "ChunkAllocator"); + _mem_tracker = + std::make_unique(MemTrackerLimiter::Type::GLOBAL, "ChunkAllocator"); for (int i = 0; i < _arenas.size(); ++i) { _arenas[i].reset(new ChunkArena()); } diff --git a/be/src/runtime/memory/mem_tracker.cpp b/be/src/runtime/memory/mem_tracker.cpp index 0604d538dc..bf7e308ff3 100644 --- a/be/src/runtime/memory/mem_tracker.cpp +++ b/be/src/runtime/memory/mem_tracker.cpp @@ -41,7 +41,8 @@ struct TrackerGroup { // Multiple groups are used to reduce the impact of locks. static std::vector mem_tracker_pool(1000); -MemTracker::MemTracker(const std::string& label, RuntimeProfile* profile) { +MemTracker::MemTracker(const std::string& label, RuntimeProfile* profile, MemTrackerLimiter* parent) + : _label(label) { if (profile == nullptr) { _consumption = std::make_shared(TUnit::BYTES); } else { @@ -57,75 +58,57 @@ MemTracker::MemTracker(const std::string& label, RuntimeProfile* profile) { _consumption = profile->AddSharedHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES); } - DCHECK(thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw() != nullptr); - MemTrackerLimiter* parent = - thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw(); - _label = fmt::format("[Observer] {} | {}", label, parent->label()); - _bind_group_num = parent->group_num(); + if (parent) { + _parent_label = parent->label(); + _parent_group_num = parent->group_num(); + } else { + DCHECK(thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker() != nullptr); + _parent_label = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->label(); + _parent_group_num = + thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->group_num(); + } { - std::lock_guard l(mem_tracker_pool[_bind_group_num].group_lock); - _tracker_group_it = mem_tracker_pool[_bind_group_num].trackers.insert( - mem_tracker_pool[_bind_group_num].trackers.end(), this); + std::lock_guard l(mem_tracker_pool[_parent_group_num].group_lock); + _tracker_group_it = mem_tracker_pool[_parent_group_num].trackers.insert( + mem_tracker_pool[_parent_group_num].trackers.end(), this); } } MemTracker::~MemTracker() { - if (_bind_group_num != -1) { - std::lock_guard l(mem_tracker_pool[_bind_group_num].group_lock); - if (_tracker_group_it != mem_tracker_pool[_bind_group_num].trackers.end()) { - mem_tracker_pool[_bind_group_num].trackers.erase(_tracker_group_it); - _tracker_group_it = mem_tracker_pool[_bind_group_num].trackers.end(); + if (_parent_group_num != -1) { + std::lock_guard l(mem_tracker_pool[_parent_group_num].group_lock); + if (_tracker_group_it != mem_tracker_pool[_parent_group_num].trackers.end()) { + mem_tracker_pool[_parent_group_num].trackers.erase(_tracker_group_it); + _tracker_group_it = mem_tracker_pool[_parent_group_num].trackers.end(); } } } -MemTracker::Snapshot MemTracker::make_snapshot(size_t level) const { +MemTracker::Snapshot MemTracker::make_snapshot() const { Snapshot snapshot; - snapshot.label = split(_label, " | ")[0]; - snapshot.parent = split(_label, " | ")[1]; - snapshot.level = level; + snapshot.label = _label; + snapshot.parent_label = _parent_label; snapshot.limit = -1; snapshot.cur_consumption = _consumption->current_value(); snapshot.peak_consumption = _consumption->value(); - snapshot.child_count = 0; return snapshot; } -void MemTracker::make_group_snapshot(std::vector* snapshots, size_t level, - int64_t group_num, std::string related_label) { +void MemTracker::make_group_snapshot(std::vector* snapshots, + int64_t group_num, std::string parent_label) { std::lock_guard l(mem_tracker_pool[group_num].group_lock); for (auto tracker : mem_tracker_pool[group_num].trackers) { - if (split(tracker->label(), " | ")[1] == related_label) { - snapshots->push_back(tracker->make_snapshot(level)); + if (tracker->parent_label() == parent_label) { + snapshots->push_back(tracker->make_snapshot()); } } } std::string MemTracker::log_usage(MemTracker::Snapshot snapshot) { return fmt::format("MemTracker Label={}, Parent Label={}, Used={}({} B), Peak={}({} B)", - snapshot.label, snapshot.parent, print_bytes(snapshot.cur_consumption), + snapshot.label, snapshot.type, print_bytes(snapshot.cur_consumption), snapshot.cur_consumption, print_bytes(snapshot.peak_consumption), snapshot.peak_consumption); } -static std::unordered_map> global_mem_trackers; -static std::mutex global_trackers_lock; - -std::shared_ptr MemTracker::get_global_mem_tracker(const std::string& label) { - std::lock_guard l(global_trackers_lock); - if (global_mem_trackers.find(label) != global_mem_trackers.end()) { - return global_mem_trackers[label]; - } else { - global_mem_trackers.emplace( - label, std::make_shared(fmt::format("[Global] {}", label))); - return global_mem_trackers[label]; - } -} - -void MemTracker::make_global_mem_tracker_snapshot(std::vector* snapshots) { - std::lock_guard l(global_trackers_lock); - for (auto& v : global_mem_trackers) { - snapshots->push_back(v.second->make_snapshot(1)); - } -} } // namespace doris \ No newline at end of file diff --git a/be/src/runtime/memory/mem_tracker.h b/be/src/runtime/memory/mem_tracker.h index 01a0d58cdb..5ffcaa30a1 100644 --- a/be/src/runtime/memory/mem_tracker.h +++ b/be/src/runtime/memory/mem_tracker.h @@ -24,6 +24,8 @@ namespace doris { +class MemTrackerLimiter; + // Used to track memory usage. // // MemTracker can be consumed manually by consume()/release(), or put into SCOPED_CONSUME_MEM_TRACKER, @@ -33,30 +35,22 @@ namespace doris { class MemTracker { public: struct Snapshot { + std::string type = ""; std::string label; - // For MemTracker, it is only weakly related to parent through label, ensuring MemTracker Independence. - // For MemTrackerLimiter, it is strongly related to parent and saves pointer objects to each other. - std::string parent = ""; - size_t level = 0; + std::string parent_label = ""; int64_t limit = 0; int64_t cur_consumption = 0; int64_t peak_consumption = 0; - size_t child_count = 0; }; // Creates and adds the tracker to the mem_tracker_pool. - MemTracker(const std::string& label, RuntimeProfile* profile = nullptr); + MemTracker(const std::string& label, RuntimeProfile* profile = nullptr, + MemTrackerLimiter* parent = nullptr); // For MemTrackerLimiter - MemTracker() { _bind_group_num = -1; } + MemTracker() { _parent_group_num = -1; } ~MemTracker(); - // Get a global tracker with a specified label, and the tracker will be created when the label is first get. - // use SCOPED_CONSUME_MEM_TRACKER count the memory in the scope to a global tracker with the specified label name. - // which is usually used for debugging, to finding memory hotspots. - static std::shared_ptr get_global_mem_tracker(const std::string& label); - static void make_global_mem_tracker_snapshot(std::vector* snapshots); - static std::string print_bytes(int64_t bytes) { return bytes >= 0 ? PrettyPrinter::print(bytes, TUnit::BYTES) : "-" + PrettyPrinter::print(std::abs(bytes), TUnit::BYTES); @@ -64,28 +58,23 @@ public: public: const std::string& label() const { return _label; } + const std::string& parent_label() const { return _parent_label; } // Returns the memory consumed in bytes. int64_t consumption() const { return _consumption->current_value(); } int64_t peak_consumption() const { return _consumption->value(); } - void consume(int64_t bytes); + void consume(int64_t bytes) { + if (bytes == 0) return; + _consumption->add(bytes); + } void release(int64_t bytes) { consume(-bytes); } - // Transfer 'bytes' of consumption from this tracker to 'dst'. - void transfer_to(MemTracker* dst, int64_t bytes); void set_consumption(int64_t bytes) { _consumption->set(bytes); } public: - bool limit_exceeded(int64_t limit) const { return limit >= 0 && limit < consumption(); } - // Return true, no exceeded limit - bool check_limit(int64_t limit, int64_t bytes) const { - return limit >= 0 && limit > consumption() + bytes; - } - - Snapshot make_snapshot(size_t level) const; - // Specify group_num from mem_tracker_pool to generate snapshot, requiring tracker.label to be related - // with parameter related_label - static void make_group_snapshot(std::vector* snapshots, size_t level, - int64_t group_num, std::string related_label); + Snapshot make_snapshot() const; + // Specify group_num from mem_tracker_pool to generate snapshot. + static void make_group_snapshot(std::vector* snapshots, int64_t group_num, + std::string parent_label); static std::string log_usage(MemTracker::Snapshot snapshot); std::string debug_string() { @@ -105,23 +94,11 @@ protected: std::shared_ptr _consumption; // in bytes // Tracker is located in group num in mem_tracker_pool - int64_t _bind_group_num; + int64_t _parent_group_num; + std::string _parent_label; // Iterator into mem_tracker_pool for this object. Stored to have O(1) remove. std::list::iterator _tracker_group_it; }; -inline void MemTracker::consume(int64_t bytes) { - if (bytes == 0) { - return; - } else { - _consumption->add(bytes); - } -} - -inline void MemTracker::transfer_to(MemTracker* dst, int64_t bytes) { - release(bytes); - dst->consume(bytes); -} - } // namespace doris \ No newline at end of file diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp index 20639326f8..1fca6009ee 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.cpp +++ b/be/src/runtime/memory/mem_tracker_limiter.cpp @@ -22,7 +22,6 @@ #include #include "gutil/once.h" -#include "gutil/walltime.h" #include "runtime/runtime_state.h" #include "runtime/thread_context.h" #include "util/pretty_printer.h" @@ -30,8 +29,19 @@ namespace doris { -MemTrackerLimiter::MemTrackerLimiter(int64_t byte_limit, const std::string& label, - const std::shared_ptr& parent, +struct TrackerLimiterGroup { + std::list trackers; + std::mutex group_lock; +}; + +// Save all MemTrackerLimiters in use. +// Each group corresponds to several MemTrackerLimiters and has a lock. +// Multiple groups are used to reduce the impact of locks. +static std::vector mem_tracker_limiter_pool(1000); + +std::atomic MemTrackerLimiter::_enable_print_log_process_usage {true}; + +MemTrackerLimiter::MemTrackerLimiter(Type type, const std::string& label, int64_t byte_limit, RuntimeProfile* profile) { DCHECK_GE(byte_limit, -1); if (profile == nullptr) { @@ -39,268 +49,215 @@ MemTrackerLimiter::MemTrackerLimiter(int64_t byte_limit, const std::string& labe } else { _consumption = profile->AddSharedHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES); } + _type = type; _label = label; _limit = byte_limit; - _group_num = GetCurrentTimeMicros() % 1000; - if (parent || label == "Process") { - _parent = parent; - } else if (thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->label() == - "Orphan") { - _parent = ExecEnv::GetInstance()->process_mem_tracker(); + if (_type == Type::GLOBAL) { + _group_num = 0; } else { - _parent = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker(); + _group_num = random() % 999 + 1; } - DCHECK(_parent || label == "Process"); - - // Walks the MemTrackerLimiter hierarchy and populates _all_ancestors and _limited_ancestors - MemTrackerLimiter* tracker = this; - while (tracker != nullptr) { - _all_ancestors.push_back(tracker); - // Process tracker does not participate in the process memory limit, process tracker consumption is virtual memory, - // and there is a diff between the real physical memory value of the process. It is replaced by check_sys_mem_info. - if (tracker->has_limit() && tracker->label() != "Process") - _limited_ancestors.push_back(tracker); - tracker = tracker->_parent.get(); - } - DCHECK_GT(_all_ancestors.size(), 0); - DCHECK_EQ(_all_ancestors[0], this); - if (_parent) { - std::lock_guard l(_parent->_child_tracker_limiter_lock); - _child_tracker_it = _parent->_child_tracker_limiters.insert( - _parent->_child_tracker_limiters.end(), this); - _had_child_count++; + { + std::lock_guard l(mem_tracker_limiter_pool[_group_num].group_lock); + _tracker_limiter_group_it = mem_tracker_limiter_pool[_group_num].trackers.insert( + mem_tracker_limiter_pool[_group_num].trackers.end(), this); } } MemTrackerLimiter::~MemTrackerLimiter() { - // TCMalloc hook will be triggered during destructor memtracker, may cause crash. - if (_label == "Process") doris::thread_context_ptr.init = false; - DCHECK(remain_child_count() == 0 || _label == "Process"); + // mem hook record tracker cannot guarantee that the final consumption is 0, + // nor can it guarantee that the memory alloc and free are recorded in a one-to-one correspondence. // In order to ensure `consumption of all limiter trackers` + `orphan tracker consumption` = `process tracker consumption` // in real time. Merge its consumption into orphan when parent is process, to avoid repetition. - if (_parent && _parent->label() == "Process") { - ExecEnv::GetInstance()->orphan_mem_tracker_raw()->cache_consume_local( - _consumption->current_value()); - } - if (_reset_zero) { - reset_zero(); - _all_ancestors.clear(); - _all_ancestors.push_back(ExecEnv::GetInstance()->orphan_mem_tracker_raw()); - } - consume_local(_untracked_mem); - if (_parent) { - std::lock_guard l(_parent->_child_tracker_limiter_lock); - if (_child_tracker_it != _parent->_child_tracker_limiters.end()) { - _parent->_child_tracker_limiters.erase(_child_tracker_it); - _child_tracker_it = _parent->_child_tracker_limiters.end(); + ExecEnv::GetInstance()->orphan_mem_tracker()->consume(_consumption->current_value()); + _consumption->set(0); + { + std::lock_guard l(mem_tracker_limiter_pool[_group_num].group_lock); + if (_tracker_limiter_group_it != mem_tracker_limiter_pool[_group_num].trackers.end()) { + mem_tracker_limiter_pool[_group_num].trackers.erase(_tracker_limiter_group_it); + _tracker_limiter_group_it = mem_tracker_limiter_pool[_group_num].trackers.end(); } } } -MemTracker::Snapshot MemTrackerLimiter::make_snapshot(size_t level) const { +MemTracker::Snapshot MemTrackerLimiter::make_snapshot() const { Snapshot snapshot; + snapshot.type = TypeString[_type]; snapshot.label = _label; - snapshot.parent = _parent != nullptr ? _parent->label() : "Root"; - snapshot.level = level; snapshot.limit = _limit; snapshot.cur_consumption = _consumption->current_value(); snapshot.peak_consumption = _consumption->value(); - snapshot.child_count = remain_child_count(); return snapshot; } -void MemTrackerLimiter::make_snapshot(std::vector* snapshots, - size_t cur_level, size_t upper_level) const { - Snapshot snapshot = MemTrackerLimiter::make_snapshot(cur_level); +void MemTrackerLimiter::refresh_global_counter() { + std::unordered_map type_mem_sum = { + {Type::GLOBAL, 0}, {Type::QUERY, 0}, {Type::LOAD, 0}, + {Type::COMPACTION, 0}, {Type::SCHEMA_CHANGE, 0}, {Type::CLONE, 0}, + {Type::BATCHLOAD, 0}, {Type::CONSISTENCY, 0}}; + for (unsigned i = 0; i < mem_tracker_limiter_pool.size(); ++i) { + std::lock_guard l(mem_tracker_limiter_pool[i].group_lock); + for (auto tracker : mem_tracker_limiter_pool[i].trackers) { + type_mem_sum[tracker->type()] += tracker->consumption(); + } + } + for (auto it : type_mem_sum) { + MemTrackerLimiter::TypeMemSum[it.first]->set(it.second); + } +} + +void MemTrackerLimiter::make_process_snapshots(std::vector* snapshots) { + MemTrackerLimiter::refresh_global_counter(); + int64_t process_mem_sum = 0; + Snapshot snapshot; + for (auto it : MemTrackerLimiter::TypeMemSum) { + snapshot.type = TypeString[it.first]; + snapshot.label = ""; + snapshot.limit = -1; + snapshot.cur_consumption = it.second->current_value(); + snapshot.peak_consumption = it.second->value(); + (*snapshots).emplace_back(snapshot); + process_mem_sum += it.second->current_value(); + } + + snapshot.type = "tc/jemalloc_cache"; + snapshot.label = ""; + snapshot.limit = -1; + snapshot.cur_consumption = MemInfo::allocator_cache_mem(); + snapshot.peak_consumption = -1; (*snapshots).emplace_back(snapshot); - if (cur_level < upper_level) { - { - std::lock_guard l(_child_tracker_limiter_lock); - for (const auto& child : _child_tracker_limiters) { - child->make_snapshot(snapshots, cur_level + 1, upper_level); + process_mem_sum += MemInfo::allocator_cache_mem(); + + snapshot.type = "process"; + snapshot.label = ""; + snapshot.limit = -1; + snapshot.cur_consumption = process_mem_sum; + snapshot.peak_consumption = -1; + (*snapshots).emplace_back(snapshot); +} + +void MemTrackerLimiter::make_type_snapshots(std::vector* snapshots, + MemTrackerLimiter::Type type) { + if (type == Type::GLOBAL) { + std::lock_guard l(mem_tracker_limiter_pool[0].group_lock); + for (auto tracker : mem_tracker_limiter_pool[0].trackers) { + (*snapshots).emplace_back(tracker->make_snapshot()); + MemTracker::make_group_snapshot(snapshots, tracker->group_num(), tracker->label()); + } + } else { + for (unsigned i = 1; i < mem_tracker_limiter_pool.size(); ++i) { + std::lock_guard l(mem_tracker_limiter_pool[i].group_lock); + for (auto tracker : mem_tracker_limiter_pool[i].trackers) { + if (tracker->type() == type) { + (*snapshots).emplace_back(tracker->make_snapshot()); + MemTracker::make_group_snapshot(snapshots, tracker->group_num(), + tracker->label()); + } } } - MemTracker::make_group_snapshot(snapshots, cur_level + 1, _group_num, _label); } } -int64_t MemTrackerLimiter::spare_capacity() const { - int64_t result = std::numeric_limits::max(); - for (const auto& tracker : _limited_ancestors) { - int64_t mem_left = tracker->limit() - tracker->consumption(); - result = std::min(result, mem_left); - } - return result; -} - -int64_t MemTrackerLimiter::get_lowest_limit() const { - if (_limited_ancestors.empty()) return -1; - int64_t min_limit = std::numeric_limits::max(); - for (const auto& tracker : _limited_ancestors) { - DCHECK(tracker->has_limit()); - min_limit = std::min(min_limit, tracker->limit()); - } - return min_limit; -} - -// Calling this on the query tracker results in output like: -// -// Query(4a4c81fedaed337d:4acadfda00000000) Limit=10.00 GB Total=508.28 MB Peak=508.45 MB -// Fragment 4a4c81fedaed337d:4acadfda00000000: Total=8.00 KB Peak=8.00 KB -// EXCHANGE_NODE (id=4): Total=0 Peak=0 -// DataStreamRecvr: Total=0 Peak=0 -// Block Manager: Limit=6.68 GB Total=394.00 MB Peak=394.00 MB -// Fragment 4a4c81fedaed337d:4acadfda00000006: Total=233.72 MB Peak=242.24 MB -// AGGREGATION_NODE (id=1): Total=139.21 MB Peak=139.84 MB -// HDFS_SCAN_NODE (id=0): Total=93.94 MB Peak=102.24 MB -// DataStreamSender (dst_id=2): Total=45.99 KB Peak=85.99 KB -// Fragment 4a4c81fedaed337d:4acadfda00000003: Total=274.55 MB Peak=274.62 MB -// AGGREGATION_NODE (id=3): Total=274.50 MB Peak=274.50 MB -// EXCHANGE_NODE (id=2): Total=0 Peak=0 -// DataStreamRecvr: Total=45.91 KB Peak=684.07 KB -// DataStreamSender (dst_id=4): Total=680.00 B Peak=680.00 B -// -// If 'reservation_metrics_' are set, we ge a more granular breakdown: -// TrackerName: Limit=5.00 MB Reservation=5.00 MB OtherMemory=1.04 MB -// Total=6.04 MB Peak=6.45 MB -// -std::string MemTrackerLimiter::log_usage(int max_recursive_depth, int64_t* logged_consumption) { - int64_t curr_consumption = consumption(); - int64_t peak_consumption = _consumption->value(); - if (logged_consumption != nullptr) *logged_consumption = curr_consumption; - - std::string detail = - "MemTrackerLimiter Label={}, Limit={}({} B), Used={}({} B), Peak={}({} B), Exceeded={}"; - detail = fmt::format(detail, _label, print_bytes(_limit), _limit, print_bytes(curr_consumption), - curr_consumption, print_bytes(peak_consumption), peak_consumption, - limit_exceeded() ? "true" : "false"); - - // This call does not need the children, so return early. - if (max_recursive_depth == 0) return detail; - - // Recurse and get information about the children - int64_t child_consumption; - std::string child_trackers_usage; - { - std::lock_guard l(_child_tracker_limiter_lock); - child_trackers_usage = - log_usage(max_recursive_depth - 1, _child_tracker_limiters, &child_consumption); - } - std::vector snapshots; - MemTracker::make_group_snapshot(&snapshots, 0, _group_num, _label); - for (const auto& snapshot : snapshots) { - child_trackers_usage += "\n " + MemTracker::log_usage(snapshot); - } - if (!child_trackers_usage.empty()) detail += child_trackers_usage; - return detail; -} - -std::string MemTrackerLimiter::log_usage(int max_recursive_depth, - const std::list& trackers, - int64_t* logged_consumption) { - *logged_consumption = 0; - std::vector usage_strings; - for (const auto& tracker : trackers) { - int64_t tracker_consumption; - std::string usage_string = tracker->log_usage(max_recursive_depth, &tracker_consumption); - if (!usage_string.empty()) usage_strings.push_back(usage_string); - *logged_consumption += tracker_consumption; - } - return usage_strings.size() == 0 ? "" : "\n " + join(usage_strings, "\n "); +std::string MemTrackerLimiter::log_usage(MemTracker::Snapshot snapshot) { + return fmt::format( + "MemTrackerLimiter Label={}, Type={}, Limit={}({} B), Used={}({} B), Peak={}({} B)", + snapshot.label, snapshot.type, print_bytes(snapshot.limit), snapshot.limit, + print_bytes(snapshot.cur_consumption), snapshot.cur_consumption, + print_bytes(snapshot.peak_consumption), snapshot.peak_consumption); } void MemTrackerLimiter::print_log_usage(const std::string& msg) { - // only print the tracker log_usage in be log. std::string detail = msg; - detail += "\n " + fmt::format( - "process memory used {}, limit {}, hard limit {}, tc/jemalloc " - "allocator cache {}", - PerfCounters::get_vm_rss_str(), MemInfo::mem_limit_str(), - print_bytes(MemInfo::hard_mem_limit()), - MemInfo::allocator_cache_mem_str()); - if (_print_log_usage) { - if (_label == "Process") { - // Dumping the process MemTracker is expensive. Limiting the recursive depth to two - // levels limits the level of detail to a one-line summary for each query MemTracker. - detail += "\n " + log_usage(2); - } else { - detail += "\n " + log_usage(); + detail += "\n " + MemTrackerLimiter::process_mem_log_str(); + if (_enable_print_log_usage) { + detail += log_usage(); + std::string child_trackers_usage; + std::vector snapshots; + MemTracker::make_group_snapshot(&snapshots, _group_num, _label); + for (const auto& snapshot : snapshots) { + child_trackers_usage += "\n " + MemTracker::log_usage(snapshot); } + if (!child_trackers_usage.empty()) detail += child_trackers_usage; + // TODO: memory leak by calling `boost::stacktrace` in tcmalloc hook, // test whether overwriting malloc/free is the same problem in jemalloc/tcmalloc. // detail += "\n" + boost::stacktrace::to_string(boost::stacktrace::stacktrace()); LOG(WARNING) << detail; - _print_log_usage = false; + _enable_print_log_usage = false; } } -std::string MemTrackerLimiter::mem_limit_exceeded(const std::string& msg, - int64_t failed_allocation_size) { - STOP_CHECK_THREAD_MEM_TRACKER_LIMIT(); - std::string detail = fmt::format("Memory limit exceeded:, ", _label); - MemTrackerLimiter* exceeded_tracker = nullptr; - MemTrackerLimiter* max_consumption_tracker = nullptr; - int64_t free_size = INT64_MAX; - // Find the tracker that exceed limit and has the least free. - for (const auto& tracker : _limited_ancestors) { - int64_t max_consumption = tracker->peak_consumption() > tracker->consumption() - ? tracker->peak_consumption() - : tracker->consumption(); - if (tracker->limit() < max_consumption + failed_allocation_size) { - exceeded_tracker = tracker; - break; - } - if (tracker->limit() - max_consumption < free_size) { - free_size = tracker->limit() - max_consumption; - max_consumption_tracker = tracker; - } +void MemTrackerLimiter::print_log_process_usage(const std::string& msg) { + MemTrackerLimiter::_enable_print_log_process_usage = false; + std::string detail = msg; + detail += "\n " + MemTrackerLimiter::process_mem_log_str(); + std::vector snapshots; + MemTrackerLimiter::make_process_snapshots(&snapshots); + MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::GLOBAL); + for (const auto& snapshot : snapshots) { + detail += "\n " + MemTrackerLimiter::log_usage(snapshot); } - - MemTrackerLimiter* print_log_usage_tracker = nullptr; - if (exceeded_tracker != nullptr) { - detail += limit_exceeded_errmsg_prefix_str(failed_allocation_size, exceeded_tracker); - print_log_usage_tracker = exceeded_tracker; - } else if (sys_mem_exceed_limit_check(failed_allocation_size)) { - detail += fmt::format("{}>, executing msg:<{}>", - limit_exceeded_errmsg_sys_str(failed_allocation_size), msg); - } else if (max_consumption_tracker != nullptr) { - // must after check_sys_mem_info false - detail += fmt::format( - "failed alloc size {}, max consumption tracker:<{}>, limit {}, peak used {}, " - "current used {}>, executing msg:<{}>", - print_bytes(failed_allocation_size), max_consumption_tracker->label(), - print_bytes(max_consumption_tracker->limit()), - print_bytes(max_consumption_tracker->peak_consumption()), - print_bytes(max_consumption_tracker->consumption()), msg); - print_log_usage_tracker = max_consumption_tracker; - } else { - // The limit of the current tracker and parents is less than 0, the consume will not fail, - // and the current process memory has no excess limit. - detail += fmt::format("unknown exceed reason, executing msg:<{}>", msg); - print_log_usage_tracker = ExecEnv::GetInstance()->process_mem_tracker().get(); - } - auto failed_msg = MemTrackerLimiter::limit_exceeded_errmsg_suffix_str(detail); - if (print_log_usage_tracker != nullptr) print_log_usage_tracker->print_log_usage(failed_msg); - return failed_msg; + LOG(WARNING) << detail; } std::string MemTrackerLimiter::mem_limit_exceeded(const std::string& msg, - MemTrackerLimiter* failed_tracker, const std::string& limit_exceeded_errmsg_prefix) { + DCHECK(_limit != -1); STOP_CHECK_THREAD_MEM_TRACKER_LIMIT(); - std::string detail = - fmt::format("Memory limit exceeded:, {}>, executing msg:<{}>", - _label, limit_exceeded_errmsg_prefix, msg); - auto failed_msg = MemTrackerLimiter::limit_exceeded_errmsg_suffix_str(detail); - failed_tracker->print_log_usage(failed_msg); - return failed_msg; + std::string detail = fmt::format( + "Memory limit exceeded:, {}>, executing msg:<{}>. backend {} " + "process memory used {}, limit {}. If query tracker exceed, `set " + "exec_mem_limit=8G` to change limit, details mem usage see be.INFO.", + _label, limit_exceeded_errmsg_prefix, msg, BackendOptions::get_localhost(), + PerfCounters::get_vm_rss_str(), MemInfo::mem_limit_str()); + print_log_usage(detail); + return detail; } -Status MemTrackerLimiter::mem_limit_exceeded(RuntimeState* state, const std::string& msg, - int64_t failed_alloc_size) { - auto failed_msg = mem_limit_exceeded(msg, failed_alloc_size); +Status MemTrackerLimiter::fragment_mem_limit_exceeded(RuntimeState* state, const std::string& msg, + int64_t failed_alloc_size) { + auto failed_msg = + mem_limit_exceeded(msg, tracker_limit_exceeded_errmsg_str(failed_alloc_size, this)); state->log_error(failed_msg); return Status::MemoryLimitExceeded(failed_msg); } +// TODO(zxy) More observable methods +// /// Logs the usage of 'limit' number of queries based on maximum total memory +// /// consumption. +// std::string MemTracker::LogTopNQueries(int limit) { +// if (limit == 0) return ""; +// priority_queue, std::vector>, +// std::greater>> +// min_pq; +// GetTopNQueries(min_pq, limit); +// std::vector usage_strings(min_pq.size()); +// while (!min_pq.empty()) { +// usage_strings.push_back(min_pq.top().second); +// min_pq.pop(); +// } +// std::reverse(usage_strings.begin(), usage_strings.end()); +// return join(usage_strings, "\n"); +// } + +// /// Helper function for LogTopNQueries that iterates through the MemTracker hierarchy +// /// and populates 'min_pq' with 'limit' number of elements (that contain state related +// /// to query MemTrackers) based on maximum total memory consumption. +// void MemTracker::GetTopNQueries( +// priority_queue, std::vector>, +// greater>>& min_pq, +// int limit) { +// list> children; +// { +// lock_guard l(child_trackers_lock_); +// children = child_trackers_; +// } +// for (const auto& child_weak : children) { +// shared_ptr child = child_weak.lock(); +// if (child) { +// child->GetTopNQueries(min_pq, limit); +// } +// } +// } + } // namespace doris diff --git a/be/src/runtime/memory/mem_tracker_limiter.h b/be/src/runtime/memory/mem_tracker_limiter.h index 6bc9449c20..2182e84f00 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.h +++ b/be/src/runtime/memory/mem_tracker_limiter.h @@ -31,45 +31,59 @@ namespace doris { class RuntimeState; // Track and limit the memory usage of process and query. -// Contains an limit, arranged into a tree structure, the consumption also tracked by its ancestors. +// Contains an limit, arranged into a tree structure. // // Automatically track every once malloc/free of the system memory allocator (Currently, based on TCMlloc hook). // Put Query MemTrackerLimiter into SCOPED_ATTACH_TASK when the thread starts,all memory used by this thread -// will be recorded on this Query, otherwise it will be recorded in Process Tracker by default. -// -// We use a five-level hierarchy of mem trackers: process, query pool, query, instance, node. -// The first four layers are MemTrackerLimiter with limit, and the fifth layer is MemTracker without limit. -// Specific parts of the fragment (exec nodes, sinks, etc) will add a fifth level when they are initialized. +// will be recorded on this Query, otherwise it will be recorded in Orphan Tracker by default. class MemTrackerLimiter final : public MemTracker { public: - // Creates and adds the tracker limiter to the tree - MemTrackerLimiter( - int64_t byte_limit = -1, const std::string& label = std::string(), - const std::shared_ptr& parent = std::shared_ptr(), - RuntimeProfile* profile = nullptr); + enum Type { + GLOBAL = 0, // Life cycle is the same as the process, e.g. Cache and default Orphan + QUERY = 1, // Count the memory consumption of all Query tasks. + LOAD = 2, // Count the memory consumption of all Load tasks. + COMPACTION = 3, // Count the memory consumption of all Base and Cumulative tasks. + SCHEMA_CHANGE = 4, // Count the memory consumption of all SchemaChange tasks. + CLONE = 5, // Count the memory consumption of all EngineCloneTask. Note: Memory that does not contain make/release snapshots. + BATCHLOAD = 6, // Count the memory consumption of all EngineBatchLoadTask. + CONSISTENCY = 7 // Count the memory consumption of all EngineChecksumTask. + }; - // If the final consumption is not as expected, this usually means that the same memory is calling - // consume and release on different trackers. If the two trackers have a parent-child relationship, - // the parent tracker consumption is correct, and the child tracker is wrong; if the two trackers have - // no parent-child relationship, the two tracker consumptions are wrong. - ~MemTrackerLimiter(); + inline static std::unordered_map> + TypeMemSum = {{Type::GLOBAL, + std::make_shared(TUnit::BYTES)}, + {Type::QUERY, + std::make_shared(TUnit::BYTES)}, + {Type::LOAD, + std::make_shared(TUnit::BYTES)}, + {Type::COMPACTION, + std::make_shared(TUnit::BYTES)}, + {Type::SCHEMA_CHANGE, + std::make_shared(TUnit::BYTES)}, + {Type::CLONE, + std::make_shared(TUnit::BYTES)}, + {Type::BATCHLOAD, + std::make_shared(TUnit::BYTES)}, + {Type::CONSISTENCY, + std::make_shared(TUnit::BYTES)}}; - std::shared_ptr parent() const { return _parent; } - - size_t remain_child_count() const { return _child_tracker_limiters.size(); } - size_t had_child_count() const { return _had_child_count; } - - Snapshot make_snapshot(size_t level) const; - // Returns a list of all the valid tracker snapshots. - void make_snapshot(std::vector* snapshots, size_t cur_level, - size_t upper_level) const; + inline static const std::string TypeString[] = {"global", "query", "load", + "compaction", "schema_change", "clone", + "batch_load", "consistency"}; public: + // byte_limit equal to -1 means no consumption limit, only participate in process memory statistics. + MemTrackerLimiter(Type type, const std::string& label = std::string(), int64_t byte_limit = -1, + RuntimeProfile* profile = nullptr); + + ~MemTrackerLimiter(); + static bool sys_mem_exceed_limit_check(int64_t bytes) { // Limit process memory usage using the actual physical memory of the process in `/proc/self/status`. // This is independent of the consumption value of the mem tracker, which counts the virtual memory // of the process malloc. // for fast, expect MemInfo::initialized() to be true. + // // tcmalloc/jemalloc allocator cache does not participate in the mem check as part of the process physical memory. // because `new/malloc` will trigger mem hook when using tcmalloc/jemalloc allocator cache, // but it may not actually alloc physical memory, which is not expected in mem hook fail. @@ -77,127 +91,78 @@ public: // TODO: In order to ensure no OOM, currently reserve 200M, and then use the free mem in /proc/meminfo to ensure no OOM. if (MemInfo::proc_mem_no_allocator_cache() + bytes >= MemInfo::mem_limit() || PerfCounters::get_vm_rss() + bytes >= MemInfo::hard_mem_limit()) { + print_log_process_usage("sys_mem_exceed_limit_check"); return true; } return false; } void set_consumption() { LOG(FATAL) << "MemTrackerLimiter set_consumption not supported"; } + Type type() const { return _type; } int64_t group_num() const { return _group_num; } bool has_limit() const { return _limit >= 0; } int64_t limit() const { return _limit; } - void update_limit(int64_t limit) { - DCHECK(has_limit()); - _limit = limit; - } bool limit_exceeded() const { return _limit >= 0 && _limit < consumption(); } - // Returns true if a valid limit of this tracker limiter or one of its ancestors is exceeded. - bool any_limit_exceeded() const { - for (const auto& tracker : _limited_ancestors) { - if (tracker->limit_exceeded()) { - return true; - } - } - return false; - } - Status check_limit(int64_t bytes); // Returns the maximum consumption that can be made without exceeding the limit on - // this tracker limiter or any of its parents. Returns int64_t::max() if there are no - // limits and a negative value if any limit is already exceeded. - int64_t spare_capacity() const; - - // Returns the lowest limit for this tracker limiter and its ancestors. Returns -1 if there is no limit. - int64_t get_lowest_limit() const; + // this tracker limiter. + int64_t spare_capacity() const { return _limit - consumption(); } public: - // up to (but not including) end_tracker. - // This happens when we want to update tracking on a particular mem tracker but the consumption - // against the limit recorded in one of its ancestors already happened. - // It is used for revise mem tracker consumption. - // If the location of memory alloc and free is different, the consumption value of mem tracker will be inaccurate. - // But the consumption value of the process mem tracker is not affecte - void cache_consume_local(int64_t bytes); + // If need to consume the tracker frequently, use it + void cache_consume(int64_t bytes); - // Will not change the value of process_mem_tracker, even though mem_tracker == process_mem_tracker. + // Transfer 'bytes' of consumption from this tracker to 'dst'. void transfer_to(int64_t size, MemTrackerLimiter* dst) { - cache_consume_local(-size); - dst->cache_consume_local(size); + cache_consume(-size); + dst->cache_consume(size); } - void enable_print_log_usage() { _print_log_usage = true; } - void enable_reset_zero() { _reset_zero = true; } + static void refresh_global_counter(); + Snapshot make_snapshot() const; + // Returns a list of all the valid tracker snapshots. + static void make_process_snapshots(std::vector* snapshots); + static void make_type_snapshots(std::vector* snapshots, Type type); - void reset_zero() { - ExecEnv::GetInstance()->orphan_mem_tracker_raw()->cache_consume_local( - _consumption->current_value()); - cache_consume_local(-_consumption->current_value()); - } - - // Logs the usage of this tracker limiter and optionally its children (recursively). - // If 'logged_consumption' is non-nullptr, sets the consumption value logged. - // 'max_recursive_depth' specifies the maximum number of levels of children - // to include in the dump. If it is zero, then no children are dumped. - // Limiting the recursive depth reduces the cost of dumping, particularly - // for the process tracker limiter. - std::string log_usage(int max_recursive_depth = INT_MAX, int64_t* logged_consumption = nullptr); + static std::string log_usage(MemTracker::Snapshot snapshot); + std::string log_usage() { return log_usage(make_snapshot()); } void print_log_usage(const std::string& msg); + void enable_print_log_usage() { _enable_print_log_usage = true; } + static void enable_print_log_process_usage() { _enable_print_log_process_usage = true; } + static void print_log_process_usage(const std::string& msg); - // Log the memory usage when memory limit is exceeded and return a status object with - // msg of the allocation which caused the limit to be exceeded. - // If 'failed_allocation_size' is greater than zero, logs the allocation size. If - // 'failed_allocation_size' is zero, nothing about the allocation size is logged. - // If 'state' is non-nullptr, logs the error to 'state'. - std::string mem_limit_exceeded(const std::string& msg, int64_t failed_allocation_size = 0); - std::string mem_limit_exceeded(const std::string& msg, MemTrackerLimiter* failed_tracker, + // Log the memory usage when memory limit is exceeded. + std::string mem_limit_exceeded(const std::string& msg, const std::string& limit_exceeded_errmsg_prefix); - Status mem_limit_exceeded(RuntimeState* state, const std::string& msg, - int64_t failed_allocation_size = 0); + Status fragment_mem_limit_exceeded(RuntimeState* state, const std::string& msg, + int64_t failed_allocation_size = 0); std::string debug_string() { std::stringstream msg; msg << "limit: " << _limit << "; " << "consumption: " << _consumption->current_value() << "; " << "label: " << _label << "; " - << "all ancestor size: " << _all_ancestors.size() - 1 << "; " - << "limited ancestor size: " << _limited_ancestors.size() - 1 << "; "; + << "type: " << TypeString[_type] << "; "; return msg.str(); } private: - // The following func, for automatic memory tracking and limiting based on system memory allocation. friend class ThreadMemTrackerMgr; - // Increases consumption of this tracker and its ancestors by 'bytes'. - void consume(int64_t bytes); - - // Decreases consumption of this tracker and its ancestors by 'bytes'. - void release(int64_t bytes) { consume(-bytes); } - - // Increases consumption of this tracker and its ancestors by 'bytes' only if - // they can all consume 'bytes' without exceeding limit. If limit would be exceed, - // no MemTrackerLimiters are updated. Returns true if the consumption was successfully updated. + // Increases consumption of this tracker by 'bytes' only if will not exceeding limit. + // Returns true if the consumption was successfully updated. WARN_UNUSED_RESULT bool try_consume(int64_t bytes, std::string& failed_msg); - void consume_local(int64_t bytes); - // When the accumulated untracked memory value exceeds the upper limit, // the current value is returned and set to 0. // Thread safety. int64_t add_untracked_mem(int64_t bytes); - // Log consumption of all the trackers provided. Returns the sum of consumption in - // 'logged_consumption'. 'max_recursive_depth' specifies the maximum number of levels - // of children to include in the dump. If it is zero, then no children are dumped. - static std::string log_usage(int max_recursive_depth, - const std::list& trackers, - int64_t* logged_consumption); - - static std::string limit_exceeded_errmsg_prefix_str(int64_t bytes, - MemTrackerLimiter* exceed_tracker) { + static std::string tracker_limit_exceeded_errmsg_str(int64_t bytes, + MemTrackerLimiter* exceed_tracker) { return fmt::format( "failed alloc size {}, exceeded tracker:<{}>, limit {}, peak " "used {}, current used {}", @@ -206,68 +171,43 @@ private: print_bytes(exceed_tracker->_consumption->current_value())); } - static std::string limit_exceeded_errmsg_suffix_str(const std::string& msg) { + static std::string process_limit_exceeded_errmsg_str(int64_t bytes) { return fmt::format( - "{}. backend {} process memory used {}, limit {}. If query tracker exceed, `set " - "exec_mem_limit=8G` to change limit, details mem usage see be.INFO.", - msg, BackendOptions::get_localhost(), PerfCounters::get_vm_rss_str(), - MemInfo::mem_limit_str()); - } - - static std::string limit_exceeded_errmsg_sys_str(int64_t bytes) { - auto err_msg = fmt::format( "process memory used {}, tc/jemalloc allocator cache {}, exceed limit {}, failed " "alloc size {}", PerfCounters::get_vm_rss_str(), MemInfo::allocator_cache_mem_str(), MemInfo::mem_limit_str(), print_bytes(bytes)); - ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err_msg); - return err_msg; + } + + static std::string process_mem_log_str() { + return fmt::format( + "process memory used {}, limit {}, hard limit {}, tc/jemalloc " + "allocator cache {}", + PerfCounters::get_vm_rss_str(), MemInfo::mem_limit_str(), + print_bytes(MemInfo::hard_mem_limit()), MemInfo::allocator_cache_mem_str()); } private: - // Limit on memory consumption, in bytes. If limit_ == -1, there is no consumption limit. Used in log_usage。 + Type _type; + + // Limit on memory consumption, in bytes. int64_t _limit; - // Group number in MemTracker::mem_tracker_pool, generated by the timestamp. + // Group number in MemTracker::mem_tracker_limiter_pool and MemTracker::mem_tracker_pool, generated by the timestamp. int64_t _group_num; - std::shared_ptr _parent; // The parent of this tracker. - - // this tracker limiter plus all of its ancestors - std::vector _all_ancestors; - // _all_ancestors with valid limits, except process tracker - std::vector _limited_ancestors; - // Consume size smaller than mem_tracker_consume_min_size_bytes will continue to accumulate // to avoid frequent calls to consume/release of MemTracker. std::atomic _untracked_mem = 0; - // Child trackers of this tracker limiter. Used for error reporting and - // listing only (i.e. updating the consumption of a parent tracker limiter does not - // update that of its children). - mutable std::mutex _child_tracker_limiter_lock; - std::list _child_tracker_limiters; - // Iterator into parent_->_child_tracker_limiters for this object. Stored to have O(1) remove. - std::list::iterator _child_tracker_it; + // Avoid frequent printing. + bool _enable_print_log_usage = false; + static std::atomic _enable_print_log_process_usage; - // The number of child trackers that have been added. - std::atomic_size_t _had_child_count = 0; - - bool _print_log_usage = false; - // mem hook record tracker cannot guarantee that the final consumption is 0, - // nor can it guarantee that the memory alloc and free are recorded in a one-to-one correspondence. - // In some cases, in order to avoid the cumulative error of the upper global tracker, - // the consumption of the current tracker is reset to zero. - bool _reset_zero = false; + // Iterator into mem_tracker_limiter_pool for this object. Stored to have O(1) remove. + std::list::iterator _tracker_limiter_group_it; }; -inline void MemTrackerLimiter::consume(int64_t bytes) { - if (bytes == 0) return; - for (auto& tracker : _all_ancestors) { - tracker->_consumption->add(bytes); - } -} - inline int64_t MemTrackerLimiter::add_untracked_mem(int64_t bytes) { _untracked_mem += bytes; if (std::abs(_untracked_mem) >= config::mem_tracker_consume_min_size_bytes) { @@ -276,18 +216,10 @@ inline int64_t MemTrackerLimiter::add_untracked_mem(int64_t bytes) { return 0; } -inline void MemTrackerLimiter::consume_local(int64_t bytes) { - if (bytes == 0) return; - for (auto& tracker : _all_ancestors) { - if (tracker->label() == "Process") return; - tracker->_consumption->add(bytes); - } -} - -inline void MemTrackerLimiter::cache_consume_local(int64_t bytes) { +inline void MemTrackerLimiter::cache_consume(int64_t bytes) { if (bytes == 0) return; int64_t consume_bytes = add_untracked_mem(bytes); - consume_local(consume_bytes); + consume(consume_bytes); } inline bool MemTrackerLimiter::try_consume(int64_t bytes, std::string& failed_msg) { @@ -297,30 +229,20 @@ inline bool MemTrackerLimiter::try_consume(int64_t bytes, std::string& failed_ms return true; } if (sys_mem_exceed_limit_check(bytes)) { - failed_msg = limit_exceeded_errmsg_sys_str(bytes); + failed_msg = process_limit_exceeded_errmsg_str(bytes); return false; } - int i; - // Walk the tracker tree top-down. - for (i = _all_ancestors.size() - 1; i >= 0; --i) { - MemTrackerLimiter* tracker = _all_ancestors[i]; - // Process tracker does not participate in the process memory limit, process tracker consumption is virtual memory, - // and there is a diff between the real physical memory value of the process. It is replaced by check_sys_mem_info. - if (tracker->limit() < 0 || tracker->label() == "Process") { - tracker->_consumption->add(bytes); // No limit at this tracker. - } else { - if (!tracker->_consumption->try_add(bytes, tracker->limit())) { - // Failed for this mem tracker. Roll back the ones that succeeded. - for (int j = _all_ancestors.size() - 1; j > i; --j) { - _all_ancestors[j]->_consumption->add(-bytes); - } - failed_msg = limit_exceeded_errmsg_prefix_str(bytes, tracker); - return false; - } + + if (_limit < 0) { + _consumption->add(bytes); // No limit at this tracker. + } else { + if (!_consumption->try_add(bytes, _limit)) { + // Failed for this mem tracker. Roll back the ones that succeeded. + _consumption->add(-bytes); + failed_msg = tracker_limit_exceeded_errmsg_str(bytes, this); + return false; } } - // Everyone succeeded, return. - DCHECK_EQ(i, -1); failed_msg = std::string(); return true; } @@ -328,15 +250,10 @@ inline bool MemTrackerLimiter::try_consume(int64_t bytes, std::string& failed_ms inline Status MemTrackerLimiter::check_limit(int64_t bytes) { if (bytes <= 0) return Status::OK(); if (sys_mem_exceed_limit_check(bytes)) { - return Status::MemoryLimitExceeded(limit_exceeded_errmsg_sys_str(bytes)); + return Status::MemoryLimitExceeded(process_limit_exceeded_errmsg_str(bytes)); } - int i; - // Walk the tracker tree top-down. - for (i = _limited_ancestors.size() - 1; i >= 0; --i) { - MemTrackerLimiter* tracker = _limited_ancestors[i]; - if (tracker->_consumption->current_value() + bytes > tracker->limit()) { - return Status::MemoryLimitExceeded(limit_exceeded_errmsg_prefix_str(bytes, tracker)); - } + if (_limit > 0 && _consumption->current_value() + bytes > _limit) { + return Status::MemoryLimitExceeded(tracker_limit_exceeded_errmsg_str(bytes, this)); } return Status::OK(); } diff --git a/be/src/runtime/memory/mem_tracker_task_pool.cpp b/be/src/runtime/memory/mem_tracker_task_pool.cpp deleted file mode 100644 index 28539703b4..0000000000 --- a/be/src/runtime/memory/mem_tracker_task_pool.cpp +++ /dev/null @@ -1,162 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "runtime/memory/mem_tracker_task_pool.h" - -#include "common/config.h" -#include "runtime/exec_env.h" -#include "util/pretty_printer.h" - -namespace doris { - -std::shared_ptr MemTrackerTaskPool::register_task_mem_tracker_impl( - const std::string& task_id, int64_t mem_limit, const std::string& label, - const std::shared_ptr& parent) { - DCHECK(!task_id.empty()); - std::lock_guard l(_task_tracker_lock); - // First time this task_id registered, make a new object, otherwise do nothing. - // Combine new tracker and emplace into one operation to avoid the use of locks - // Name for task MemTrackers. '$0' is replaced with the task id. - std::shared_ptr tracker; - bool new_emplace = _task_mem_trackers.lazy_emplace_l( - task_id, [&](const std::shared_ptr& v) { tracker = v; }, - [&](const auto& ctor) { - tracker = std::make_shared(mem_limit, label, parent); - ctor(task_id, tracker); - }); - if (new_emplace) { - LOG(INFO) << "Register query/load memory tracker, query/load id: " << task_id - << " limit: " << PrettyPrinter::print(mem_limit, TUnit::BYTES); - } - return tracker; -} - -std::shared_ptr MemTrackerTaskPool::register_query_mem_tracker( - const std::string& query_id, int64_t mem_limit) { - return register_task_mem_tracker_impl(query_id, mem_limit, fmt::format("Query#Id={}", query_id), - ExecEnv::GetInstance()->query_pool_mem_tracker()); -} - -std::shared_ptr MemTrackerTaskPool::register_query_scanner_mem_tracker( - const std::string& query_id) { - return register_task_mem_tracker_impl("Scanner#" + query_id, -1, - fmt::format("Scanner#Query#Id={}", query_id), - get_task_mem_tracker(query_id)); -} - -std::shared_ptr MemTrackerTaskPool::register_load_mem_tracker( - const std::string& load_id, int64_t mem_limit) { - // In load, the query id of the fragment is executed, which is the same as the load id of the load channel. - return register_task_mem_tracker_impl(load_id, mem_limit, fmt::format("Load#Id={}", load_id), - ExecEnv::GetInstance()->load_pool_mem_tracker()); -} - -std::shared_ptr MemTrackerTaskPool::register_load_scanner_mem_tracker( - const std::string& load_id) { - return register_task_mem_tracker_impl("Scanner#" + load_id, -1, - fmt::format("Scanner#Load#Id={}", load_id), - get_task_mem_tracker(load_id)); -} - -std::shared_ptr MemTrackerTaskPool::get_task_mem_tracker( - const std::string& task_id) { - DCHECK(!task_id.empty()); - std::shared_ptr tracker = nullptr; - // Avoid using locks to resolve erase conflicts - _task_mem_trackers.if_contains( - task_id, [&tracker](const std::shared_ptr& v) { tracker = v; }); - return tracker; -} - -void MemTrackerTaskPool::logout_task_mem_tracker() { - std::lock_guard l(_task_tracker_lock); - std::vector expired_task_ids; - for (auto it = _task_mem_trackers.begin(); it != _task_mem_trackers.end(); it++) { - if (!it->second) { - // Unknown exception case with high concurrency, after _task_mem_trackers.erase, - // the key still exists in _task_mem_trackers. https://github.com/apache/incubator-doris/issues/10006 - expired_task_ids.emplace_back(it->first); - } else if (it->second.use_count() == 1 && it->second->had_child_count() != 0) { - // No RuntimeState uses this task MemTrackerLimiter, it is only referenced by this map, - // and tracker was not created soon, delete it. - // - // If consumption is not equal to 0 before query mem tracker is destructed, - // there are two possibilities in theory. - // 1. A memory leak occurs. - // 2. memory consumed on query mem tracker, released on other trackers, and no manual transfer - // between the two trackers. - // At present, it is impossible to effectively locate which memory consume and release on different trackers, - // so query memory leaks cannot be found. - LOG(INFO) << fmt::format( - "Deregister query/load memory tracker, queryId={}, Limit={}, CurrUsed={}, " - "PeakUsed={}", - it->first, MemTracker::print_bytes(it->second->limit()), - MemTracker::print_bytes(it->second->consumption()), - MemTracker::print_bytes(it->second->peak_consumption())); - expired_task_ids.emplace_back(it->first); - } else if (config::memory_debug) { - it->second->print_log_usage("query routine"); - it->second->enable_print_log_usage(); - } - } - for (auto tid : expired_task_ids) { - // Verify the condition again to make sure the tracker is not being used again. - _task_mem_trackers.erase_if(tid, [&](const std::shared_ptr& v) { - return !v || v.use_count() == 1; - }); - } -} - -// TODO(zxy) More observable methods -// /// Logs the usage of 'limit' number of queries based on maximum total memory -// /// consumption. -// std::string MemTracker::LogTopNQueries(int limit) { -// if (limit == 0) return ""; -// priority_queue, std::vector>, -// std::greater>> -// min_pq; -// GetTopNQueries(min_pq, limit); -// std::vector usage_strings(min_pq.size()); -// while (!min_pq.empty()) { -// usage_strings.push_back(min_pq.top().second); -// min_pq.pop(); -// } -// std::reverse(usage_strings.begin(), usage_strings.end()); -// return join(usage_strings, "\n"); -// } - -// /// Helper function for LogTopNQueries that iterates through the MemTracker hierarchy -// /// and populates 'min_pq' with 'limit' number of elements (that contain state related -// /// to query MemTrackers) based on maximum total memory consumption. -// void MemTracker::GetTopNQueries( -// priority_queue, std::vector>, -// greater>>& min_pq, -// int limit) { -// list> children; -// { -// lock_guard l(child_trackers_lock_); -// children = child_trackers_; -// } -// for (const auto& child_weak : children) { -// shared_ptr child = child_weak.lock(); -// if (child) { -// child->GetTopNQueries(min_pq, limit); -// } -// } -// } - -} // namespace doris diff --git a/be/src/runtime/memory/mem_tracker_task_pool.h b/be/src/runtime/memory/mem_tracker_task_pool.h deleted file mode 100644 index 9e5813ba03..0000000000 --- a/be/src/runtime/memory/mem_tracker_task_pool.h +++ /dev/null @@ -1,68 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "runtime/memory/mem_tracker_limiter.h" - -namespace doris { - -// TODO: phmap `parallel_flat_hash_map` is not thread-safe. If it is not fixed in the future, -// can consider using other maps instead. -using TaskTrackersMap = phmap::parallel_flat_hash_map< - std::string, std::shared_ptr, - phmap::priv::hash_default_hash, phmap::priv::hash_default_eq, - std::allocator>>, 12, - std::mutex>; - -// Global task pool for query MemTrackers. Owned by ExecEnv. -class MemTrackerTaskPool { -public: - // Construct a MemTrackerLimiter object for 'task_id' with 'mem_limit' as the memory limit. - // The MemTrackerLimiter is a child of the pool MemTrackerLimiter, Calling this with the same - // 'task_id' will return the same MemTrackerLimiter object. This is used to track the local - // memory usage of all tasks executing. The first time this is called for a task, - // a new MemTrackerLimiter object is created with the pool tracker as its parent. - // Newly created trackers will always have a limit of -1. - std::shared_ptr register_task_mem_tracker_impl( - const std::string& task_id, int64_t mem_limit, const std::string& label, - const std::shared_ptr& parent); - std::shared_ptr register_query_mem_tracker(const std::string& query_id, - int64_t mem_limit); - std::shared_ptr register_query_scanner_mem_tracker( - const std::string& query_id); - std::shared_ptr register_load_mem_tracker(const std::string& load_id, - int64_t mem_limit); - std::shared_ptr register_load_scanner_mem_tracker( - const std::string& load_id); - - std::shared_ptr get_task_mem_tracker(const std::string& task_id); - - // Remove the mem tracker that has ended the query. - void logout_task_mem_tracker(); - -private: - // All per-task MemTrackerLimiter objects. - // The life cycle of task MemTrackerLimiter in the process is the same as task runtime state, - // MemTrackers will be removed from this map after query finish or cancel. - TaskTrackersMap _task_mem_trackers; - std::mutex _task_tracker_lock; -}; - -} // namespace doris \ No newline at end of file diff --git a/be/src/runtime/memory/system_allocator.cpp b/be/src/runtime/memory/system_allocator.cpp index 78a9e7911e..f8dd402ad1 100644 --- a/be/src/runtime/memory/system_allocator.cpp +++ b/be/src/runtime/memory/system_allocator.cpp @@ -45,7 +45,7 @@ uint8_t* SystemAllocator::allocate_via_malloc(size_t length) { char buf[64]; auto err = fmt::format("fail to allocate mem via posix_memalign, res={}, errmsg={}.", res, strerror_r(res, buf, 64)); - ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + MemTrackerLimiter::print_log_process_usage(err); LOG(ERROR) << err; return nullptr; } diff --git a/be/src/runtime/memory/thread_mem_tracker_mgr.cpp b/be/src/runtime/memory/thread_mem_tracker_mgr.cpp index b19f9181b0..4273860468 100644 --- a/be/src/runtime/memory/thread_mem_tracker_mgr.cpp +++ b/be/src/runtime/memory/thread_mem_tracker_mgr.cpp @@ -19,35 +19,32 @@ #include "runtime/exec_env.h" #include "runtime/fragment_mgr.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "service/backend_options.h" namespace doris { void ThreadMemTrackerMgr::attach_limiter_tracker( - const std::string& task_id, const TUniqueId& fragment_instance_id, - const std::shared_ptr& mem_tracker) { + const std::shared_ptr& mem_tracker, + const TUniqueId& fragment_instance_id) { DCHECK(mem_tracker); flush_untracked_mem(); - _task_id_stack.push_back(task_id); - _fragment_instance_id_stack.push_back(fragment_instance_id); - _limiter_tracker_stack.push_back(mem_tracker); + _fragment_instance_id = fragment_instance_id; + _limiter_tracker = mem_tracker; _limiter_tracker_raw = mem_tracker.get(); } -void ThreadMemTrackerMgr::detach_limiter_tracker() { - DCHECK(!_limiter_tracker_stack.empty()); +void ThreadMemTrackerMgr::detach_limiter_tracker( + const std::shared_ptr& old_mem_tracker) { flush_untracked_mem(); - _task_id_stack.pop_back(); - _fragment_instance_id_stack.pop_back(); - _limiter_tracker_stack.pop_back(); - _limiter_tracker_raw = _limiter_tracker_stack.back().get(); + _fragment_instance_id = TUniqueId(); + _limiter_tracker = old_mem_tracker; + _limiter_tracker_raw = old_mem_tracker.get(); } void ThreadMemTrackerMgr::exceeded_cancel_task(const std::string& cancel_details) { - if (_fragment_instance_id_stack.back() != TUniqueId()) { + if (_fragment_instance_id != TUniqueId()) { ExecEnv::GetInstance()->fragment_mgr()->cancel( - _fragment_instance_id_stack.back(), PPlanFragmentCancelReason::MEMORY_LIMIT_EXCEED, + _fragment_instance_id, PPlanFragmentCancelReason::MEMORY_LIMIT_EXCEED, cancel_details); } } @@ -57,8 +54,7 @@ void ThreadMemTrackerMgr::exceeded(const std::string& failed_msg) { _cb_func(); } auto cancel_msg = _limiter_tracker_raw->mem_limit_exceeded( - fmt::format("exec node:<{}>", last_consumer_tracker()), - _limiter_tracker_raw->parent().get(), failed_msg); + fmt::format("execute:<{}>", last_consumer_tracker()), failed_msg); if (is_attach_query()) { exceeded_cancel_task(cancel_msg); } diff --git a/be/src/runtime/memory/thread_mem_tracker_mgr.h b/be/src/runtime/memory/thread_mem_tracker_mgr.h index 6c8cae39f9..1ab3e7bce4 100644 --- a/be/src/runtime/memory/thread_mem_tracker_mgr.h +++ b/be/src/runtime/memory/thread_mem_tracker_mgr.h @@ -27,56 +27,34 @@ namespace doris { -extern bthread_key_t btls_key; -static const bthread_key_t EMPTY_BTLS_KEY = {0, 0}; - using ExceedCallBack = void (*)(); -// TCMalloc new/delete Hook is counted in the memory_tracker of the current thread. -// -// In the original design, the MemTracker consume method is called before the memory is allocated. -// If the consume succeeds, the memory is actually allocated, otherwise an exception is thrown. -// But the statistics of memory through TCMalloc new/delete Hook are after the memory is actually allocated, -// which is different from the previous behavior. Therefore, when alloc for some large memory. +// Memory Hook is counted in the memory tracker of the current thread. class ThreadMemTrackerMgr { public: ThreadMemTrackerMgr() {} ~ThreadMemTrackerMgr() { // if _init == false, exec env is not initialized when init(). and never consumed mem tracker once. - if (_init) { - flush_untracked_mem(); - if (bthread_self() == 0) { - DCHECK(_consumer_tracker_stack.empty()); - DCHECK(_limiter_tracker_stack.size() == 1) - << ", limiter_tracker_stack.size(): " << _limiter_tracker_stack.size(); - } - } + if (_init) flush_untracked_mem(); } - // only for tcmalloc hook + // only for memory hook static void consume_no_attach(int64_t size) { if (ExecEnv::GetInstance()->initialized()) { - ExecEnv::GetInstance()->orphan_mem_tracker_raw()->consume(size); + ExecEnv::GetInstance()->orphan_mem_tracker()->consume(size); } } - // After thread initialization, calling `init` again must call `clear_untracked_mems` first - // to avoid memory tracking loss. void init(); - void init_impl(); - void clear(); - // After attach, the current thread TCMalloc Hook starts to consume/release task mem_tracker - void attach_limiter_tracker(const std::string& task_id, const TUniqueId& fragment_instance_id, - const std::shared_ptr& mem_tracker); - void detach_limiter_tracker(); - // Usually there are only two layers, the first is the default trackerOrphan; - // the second is the query tracker or bthread tracker. - int64_t get_attach_layers() { return _limiter_tracker_stack.size(); } + // After attach, the current thread Memory Hook starts to consume/release task mem_tracker + void attach_limiter_tracker(const std::shared_ptr& mem_tracker, + const TUniqueId& fragment_instance_id); + void detach_limiter_tracker(const std::shared_ptr& old_mem_tracker = + ExecEnv::GetInstance()->orphan_mem_tracker()); // Must be fast enough! Thread update_tracker may be called very frequently. - // So for performance, add tracker as early as possible, and then call update_tracker. bool push_consumer_tracker(MemTracker* mem_tracker); void pop_consumer_tracker(); std::string last_consumer_tracker() { @@ -96,7 +74,7 @@ public: void set_exceed_call_back(ExceedCallBack cb_func) { _cb_func = cb_func; } - // Note that, If call the memory allocation operation in TCMalloc new/delete Hook, + // Note that, If call the memory allocation operation in Memory Hook, // such as calling LOG/iostream/sstream/stringstream/etc. related methods, // must increase the control to avoid entering infinite recursion, otherwise it may cause crash or stuck, void consume(int64_t size); @@ -104,11 +82,11 @@ public: template void flush_untracked_mem(); - bool is_attach_query() { return _fragment_instance_id_stack.back() != TUniqueId(); } + bool is_attach_query() { return _fragment_instance_id != TUniqueId(); } std::shared_ptr limiter_mem_tracker() { - if (!_init) init(); - return _limiter_tracker_stack.back(); + if (!_init) init(); // ExecEnv not initialized when thread is created. + return _limiter_tracker; } MemTrackerLimiter* limiter_mem_tracker_raw() { if (!_init) init(); @@ -122,14 +100,13 @@ public: std::string print_debug_string() { fmt::memory_buffer consumer_tracker_buf; for (const auto& v : _consumer_tracker_stack) { - fmt::format_to(consumer_tracker_buf, "{}, ", - MemTracker::log_usage(v->make_snapshot(0))); + fmt::format_to(consumer_tracker_buf, "{}, ", MemTracker::log_usage(v->make_snapshot())); } return fmt::format( - "ThreadMemTrackerMgr debug, _untracked_mem:{}, _task_id:{}, " + "ThreadMemTrackerMgr debug, _untracked_mem:{}, " "_limiter_tracker:<{}>, _consumer_tracker_stack:<{}>", - std::to_string(_untracked_mem), _task_id_stack.back(), - _limiter_tracker_raw->log_usage(1), fmt::to_string(consumer_tracker_buf)); + std::to_string(_untracked_mem), _limiter_tracker_raw->log_usage(), + fmt::to_string(consumer_tracker_buf)); } private: @@ -141,8 +118,7 @@ private: private: // is false: ExecEnv::GetInstance()->initialized() = false when thread local is initialized bool _init = false; - // Cache untracked mem, only update to _untracked_mems when switching mem tracker. - // Frequent calls to unordered_map _untracked_mems[] in consume will degrade performance. + // Cache untracked mem. int64_t _untracked_mem = 0; int64_t old_untracked_mem = 0; @@ -151,8 +127,7 @@ private: std::string failed_msg = std::string(); - // _limiter_tracker_stack[0] = orphan_mem_tracker - std::vector> _limiter_tracker_stack; + std::shared_ptr _limiter_tracker; MemTrackerLimiter* _limiter_tracker_raw = nullptr; std::vector _consumer_tracker_stack; @@ -161,35 +136,18 @@ private: // If there is a memory new/delete operation in the consume method, it may enter infinite recursion. bool _stop_consume = false; bool _check_attach = true; - std::vector _task_id_stack; - std::vector _fragment_instance_id_stack; + TUniqueId _fragment_instance_id = TUniqueId(); ExceedCallBack _cb_func = nullptr; }; inline void ThreadMemTrackerMgr::init() { - DCHECK(_limiter_tracker_stack.size() == 0); - DCHECK(_limiter_tracker_raw == nullptr); - init_impl(); -} - -inline void ThreadMemTrackerMgr::init_impl() { - _limiter_tracker_stack.push_back(ExecEnv::GetInstance()->orphan_mem_tracker()); + DCHECK(_limiter_tracker == nullptr); + _limiter_tracker = ExecEnv::GetInstance()->orphan_mem_tracker(); _limiter_tracker_raw = ExecEnv::GetInstance()->orphan_mem_tracker_raw(); - _task_id_stack.push_back(""); - _fragment_instance_id_stack.push_back(TUniqueId()); _check_limit = true; _init = true; } -inline void ThreadMemTrackerMgr::clear() { - flush_untracked_mem(); - std::vector>().swap(_limiter_tracker_stack); - std::vector().swap(_consumer_tracker_stack); - std::vector().swap(_task_id_stack); - std::vector().swap(_fragment_instance_id_stack); - init_impl(); -} - inline bool ThreadMemTrackerMgr::push_consumer_tracker(MemTracker* tracker) { DCHECK(tracker) << print_debug_string(); if (std::count(_consumer_tracker_stack.begin(), _consumer_tracker_stack.end(), tracker)) { @@ -226,9 +184,9 @@ inline void ThreadMemTrackerMgr::consume(int64_t size) { template inline void ThreadMemTrackerMgr::flush_untracked_mem() { // Temporary memory may be allocated during the consumption of the mem tracker, which will lead to entering - // the TCMalloc Hook again, so suspend consumption to avoid falling into an infinite loop. + // the Memory Hook again, so suspend consumption to avoid falling into an infinite loop. _stop_consume = true; - if (!_init) init(); + if (!_init) init(); // ExecEnv not initialized when thread is created. DCHECK(_limiter_tracker_raw); old_untracked_mem = _untracked_mem; if (_count_scope_mem) _scope_mem += _untracked_mem; diff --git a/be/src/runtime/plan_fragment_executor.cpp b/be/src/runtime/plan_fragment_executor.cpp index 6e60c27cb9..5f83fe454a 100644 --- a/be/src/runtime/plan_fragment_executor.cpp +++ b/be/src/runtime/plan_fragment_executor.cpp @@ -95,10 +95,11 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request, fragments_ctx == nullptr ? request.query_globals : fragments_ctx->query_globals; _runtime_state.reset(new RuntimeState(params, request.query_options, query_globals, _exec_env)); _runtime_state->set_query_fragments_ctx(fragments_ctx); + _runtime_state->set_query_mem_tracker(fragments_ctx->query_mem_tracker); _runtime_state->set_tracer(std::move(tracer)); - RETURN_IF_ERROR(_runtime_state->init_mem_trackers(_query_id)); SCOPED_ATTACH_TASK(_runtime_state.get()); + _runtime_state->init_scanner_mem_trackers(); _runtime_state->runtime_filter_mgr()->init(); _runtime_state->set_be_number(request.backend_num); if (request.__isset.backend_id) { @@ -230,7 +231,7 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request, } Status PlanFragmentExecutor::open() { - int64_t mem_limit = _runtime_state->instance_mem_tracker()->limit(); + int64_t mem_limit = _runtime_state->query_mem_tracker()->limit(); LOG_INFO("PlanFragmentExecutor::open") .tag("query_id", _query_id) .tag("instance_id", _runtime_state->fragment_instance_id()) @@ -459,7 +460,7 @@ void PlanFragmentExecutor::_collect_node_statistics() { DCHECK(_runtime_state->backend_id() != -1); NodeStatistics* node_statistics = _query_statistics->add_nodes_statistics(_runtime_state->backend_id()); - node_statistics->add_peak_memory(_runtime_state->instance_mem_tracker()->peak_consumption()); + node_statistics->add_peak_memory(_runtime_state->query_mem_tracker()->peak_consumption()); } void PlanFragmentExecutor::report_profile() { diff --git a/be/src/runtime/query_fragments_ctx.h b/be/src/runtime/query_fragments_ctx.h index 8f9ceb38d6..1fc58f2f28 100644 --- a/be/src/runtime/query_fragments_ctx.h +++ b/be/src/runtime/query_fragments_ctx.h @@ -26,6 +26,8 @@ #include "gen_cpp/Types_types.h" // for TUniqueId #include "runtime/datetime_value.h" #include "runtime/exec_env.h" +#include "runtime/memory/mem_tracker_limiter.h" +#include "util/pretty_printer.h" #include "util/threadpool.h" namespace doris { @@ -41,6 +43,21 @@ public: _start_time = DateTimeValue::local_time(); } + ~QueryFragmentsCtx() { + // query mem tracker consumption is equal to 0, it means that after QueryFragmentsCtx is created, + // it is found that query already exists in _fragments_ctx_map, and query mem tracker is not used. + // query mem tracker consumption is not equal to 0 after use, because there is memory consumed + // on query mem tracker, released on other trackers. + if (query_mem_tracker->consumption() != 0) { + LOG(INFO) << fmt::format( + "Deregister query/load memory tracker, queryId={}, Limit={}, CurrUsed={}, " + "PeakUsed={}", + print_id(query_id), MemTracker::print_bytes(query_mem_tracker->limit()), + MemTracker::print_bytes(query_mem_tracker->consumption()), + MemTracker::print_bytes(query_mem_tracker->peak_consumption())); + } + } + bool countdown() { return fragment_num.fetch_sub(1) == 1; } bool is_timeout(const DateTimeValue& now) const { @@ -99,6 +116,8 @@ public: std::atomic fragment_num; int timeout_second; ObjectPool obj_pool; + // MemTracker that is shared by all fragment instances running on this host. + std::shared_ptr query_mem_tracker; private: ExecEnv* _exec_env; diff --git a/be/src/runtime/runtime_filter_mgr.cpp b/be/src/runtime/runtime_filter_mgr.cpp index 8087d6efc5..7bc894e1da 100644 --- a/be/src/runtime/runtime_filter_mgr.cpp +++ b/be/src/runtime/runtime_filter_mgr.cpp @@ -44,7 +44,7 @@ RuntimeFilterMgr::RuntimeFilterMgr(const UniqueId& query_id, RuntimeState* state RuntimeFilterMgr::~RuntimeFilterMgr() {} Status RuntimeFilterMgr::init() { - DCHECK(_state->instance_mem_tracker() != nullptr); + DCHECK(_state->query_mem_tracker() != nullptr); _tracker = std::make_unique("RuntimeFilterMgr"); return Status::OK(); } diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp index 3766431f50..07b04f5f09 100644 --- a/be/src/runtime/runtime_state.cpp +++ b/be/src/runtime/runtime_state.cpp @@ -34,7 +34,6 @@ #include "runtime/exec_env.h" #include "runtime/load_path_mgr.h" #include "runtime/memory/mem_tracker.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "runtime/runtime_filter_mgr.h" #include "util/file_utils.h" #include "util/load_error_hub.h" @@ -160,7 +159,6 @@ RuntimeState::~RuntimeState() { _error_hub->close(); } - // Manually release the child mem tracker before _instance_mem_tracker is destructed. _obj_pool->clear(); _runtime_filter_mgr.reset(); } @@ -216,53 +214,10 @@ Status RuntimeState::init(const TUniqueId& fragment_instance_id, const TQueryOpt } Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) { - bool has_query_mem_tracker = _query_options.__isset.mem_limit && (_query_options.mem_limit > 0); - int64_t bytes_limit = has_query_mem_tracker ? _query_options.mem_limit : -1; - if (bytes_limit > ExecEnv::GetInstance()->process_mem_tracker()->limit()) { - VLOG_NOTICE << "Query memory limit " << PrettyPrinter::print(bytes_limit, TUnit::BYTES) - << " exceeds process memory limit of " - << PrettyPrinter::print(ExecEnv::GetInstance()->process_mem_tracker()->limit(), - TUnit::BYTES) - << ". Using process memory limit instead"; - bytes_limit = ExecEnv::GetInstance()->process_mem_tracker()->limit(); - } - auto mem_tracker_counter = ADD_COUNTER(&_profile, "MemoryLimit", TUnit::BYTES); - mem_tracker_counter->set(bytes_limit); - - if (query_type() == TQueryType::SELECT) { - _query_mem_tracker = - _exec_env->task_pool_mem_tracker_registry()->register_query_mem_tracker( - print_id(query_id), bytes_limit); - _scanner_mem_tracker = - _exec_env->task_pool_mem_tracker_registry()->register_query_scanner_mem_tracker( - print_id(query_id)); - } else if (query_type() == TQueryType::LOAD) { - _query_mem_tracker = _exec_env->task_pool_mem_tracker_registry()->register_load_mem_tracker( - print_id(query_id), bytes_limit); - _scanner_mem_tracker = - _exec_env->task_pool_mem_tracker_registry()->register_load_scanner_mem_tracker( - print_id(query_id)); - } else { - DCHECK(false); - _query_mem_tracker = ExecEnv::GetInstance()->query_pool_mem_tracker(); - } - _query_mem_tracker->enable_reset_zero(); - - _instance_mem_tracker = std::make_shared( - -1, "RuntimeState:instance:" + print_id(_fragment_instance_id), _query_mem_tracker, - &_profile); - - if (_query_options.is_report_success) { - _query_mem_tracker->enable_print_log_usage(); - _instance_mem_tracker->enable_print_log_usage(); - } - - return Status::OK(); -} - -Status RuntimeState::init_instance_mem_tracker() { - _query_mem_tracker = nullptr; - _instance_mem_tracker = std::make_shared(-1, "RuntimeState:instance"); + _query_mem_tracker = std::make_shared( + MemTrackerLimiter::Type::QUERY, fmt::format("TestQuery#Id={}", print_id(query_id))); + _scanner_mem_tracker = + std::make_shared(fmt::format("TestScanner#QueryId={}", print_id(query_id))); return Status::OK(); } @@ -326,9 +281,7 @@ Status RuntimeState::set_mem_limit_exceeded(const std::string& msg) { Status RuntimeState::check_query_state(const std::string& msg) { // TODO: it would be nice if this also checked for cancellation, but doing so breaks // cases where we use Status::Cancelled("Cancelled") to indicate that the limit was reached. - if (thread_context() - ->_thread_mem_tracker_mgr->limiter_mem_tracker_raw() - ->any_limit_exceeded()) { + if (thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->limit_exceeded()) { RETURN_LIMIT_EXCEEDED(this, msg); } return query_status(); diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index 83f651725c..69b40f6c6f 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -76,14 +76,13 @@ public: Status init(const TUniqueId& fragment_instance_id, const TQueryOptions& query_options, const TQueryGlobals& query_globals, ExecEnv* exec_env); - // Set up four-level hierarchy of mem trackers: process, query, fragment instance. - // The instance tracker is tied to our profile. - // Specific parts of the fragment (i.e. exec nodes, sinks, data stream senders, etc) - // will add a fourth level when they are initialized. - Status init_mem_trackers(const TUniqueId& query_id); - - // for ut only - Status init_instance_mem_tracker(); + // after SCOPED_ATTACH_TASK; + void init_scanner_mem_trackers() { + _scanner_mem_tracker = std::make_shared( + fmt::format("Scanner#QueryId={}", print_id(_query_id))); + } + // for ut and non-query. + Status init_mem_trackers(const TUniqueId& query_id = TUniqueId()); // Gets/Creates the query wide block mgr. Status create_block_mgr(); @@ -116,8 +115,7 @@ public: const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; } ExecEnv* exec_env() { return _exec_env; } std::shared_ptr query_mem_tracker() { return _query_mem_tracker; } - std::shared_ptr instance_mem_tracker() { return _instance_mem_tracker; } - std::shared_ptr scanner_mem_tracker() { return _scanner_mem_tracker; } + std::shared_ptr scanner_mem_tracker() { return _scanner_mem_tracker; } ThreadResourceMgr::ResourcePool* resource_pool() { return _resource_pool; } void set_fragment_root_id(PlanNodeId id) { @@ -382,6 +380,10 @@ public: QueryFragmentsCtx* get_query_fragments_ctx() { return _query_ctx; } + void set_query_mem_tracker(const std::shared_ptr& tracker) { + _query_mem_tracker = tracker; + } + OpentelemetryTracer get_tracer() { return _tracer; } void set_tracer(OpentelemetryTracer&& tracer) { _tracer = std::move(tracer); } @@ -398,15 +400,9 @@ private: static const int DEFAULT_BATCH_SIZE = 2048; - // MemTracker that is shared by all fragment instances running on this host. - // The query mem tracker must be released after the _instance_mem_tracker. std::shared_ptr _query_mem_tracker; - // Memory usage of this fragment instance - std::shared_ptr _instance_mem_tracker; - // Count the memory consumption of Scanner, independent and unique for each query, - // this means that scnner memory does not count into query mem tracker, - // label is `Scanner#{queryId}`. - std::shared_ptr _scanner_mem_tracker; + // Count the memory consumption of Scanner + std::shared_ptr _scanner_mem_tracker; // put runtime state before _obj_pool, so that it will be deconstructed after // _obj_pool. Because some of object in _obj_pool will use profile when deconstructing. diff --git a/be/src/runtime/sorted_run_merger.cc b/be/src/runtime/sorted_run_merger.cc index 28d347462f..32bdad9a6d 100644 --- a/be/src/runtime/sorted_run_merger.cc +++ b/be/src/runtime/sorted_run_merger.cc @@ -183,7 +183,7 @@ private: std::condition_variable _batch_prepared_cv; void process_sorted_run_task(const std::shared_ptr& mem_tracker) { - SCOPED_ATTACH_TASK(mem_tracker, ThreadContext::TaskType::QUERY); + SCOPED_ATTACH_TASK(mem_tracker); std::unique_lock lock(_mutex); while (true) { _batch_prepared_cv.wait(lock, [this]() { return !_backup_ready.load(); }); diff --git a/be/src/runtime/stream_load/stream_load_pipe.h b/be/src/runtime/stream_load/stream_load_pipe.h index 106426653e..fac16b81ff 100644 --- a/be/src/runtime/stream_load/stream_load_pipe.h +++ b/be/src/runtime/stream_load/stream_load_pipe.h @@ -47,7 +47,7 @@ public: _use_proto(use_proto) {} virtual ~StreamLoadPipe() { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->nursery_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); while (!_buf_queue.empty()) _buf_queue.pop_front(); } @@ -119,7 +119,7 @@ public: } Status read(uint8_t* data, int64_t data_size, int64_t* bytes_read, bool* eof) override { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->nursery_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); *bytes_read = 0; while (*bytes_read < data_size) { std::unique_lock l(_lock); diff --git a/be/src/runtime/thread_context.cpp b/be/src/runtime/thread_context.cpp index 442fe755a6..cd593f05ad 100644 --- a/be/src/runtime/thread_context.cpp +++ b/be/src/runtime/thread_context.cpp @@ -39,22 +39,14 @@ ScopeMemCount::~ScopeMemCount() { } AttachTask::AttachTask(const std::shared_ptr& mem_tracker, - const ThreadContext::TaskType& type, const std::string& task_id, - const TUniqueId& fragment_instance_id) { - DCHECK(mem_tracker); - thread_context()->attach_task(type, task_id, fragment_instance_id, mem_tracker); + const std::string& task_id, const TUniqueId& fragment_instance_id) { + thread_context()->attach_task(task_id, fragment_instance_id, mem_tracker); } AttachTask::AttachTask(RuntimeState* runtime_state) { -#ifndef BE_TEST - DCHECK(print_id(runtime_state->query_id()) != ""); - DCHECK(runtime_state->fragment_instance_id() != TUniqueId()); -#endif // BE_TEST - DCHECK(runtime_state->instance_mem_tracker()); - thread_context()->attach_task(ThreadContext::query_to_task_type(runtime_state->query_type()), - print_id(runtime_state->query_id()), + thread_context()->attach_task(print_id(runtime_state->query_id()), runtime_state->fragment_instance_id(), - runtime_state->instance_mem_tracker()); + runtime_state->query_mem_tracker()); } AttachTask::~AttachTask() { @@ -65,14 +57,13 @@ AttachTask::~AttachTask() { } SwitchThreadMemTrackerLimiter::SwitchThreadMemTrackerLimiter( - const std::shared_ptr& mem_tracker_limiter) { - DCHECK(mem_tracker_limiter); - thread_context()->_thread_mem_tracker_mgr->attach_limiter_tracker("", TUniqueId(), - mem_tracker_limiter); + const std::shared_ptr& mem_tracker) { + _old_mem_tracker = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker(); + thread_context()->_thread_mem_tracker_mgr->attach_limiter_tracker(mem_tracker, TUniqueId()); } SwitchThreadMemTrackerLimiter::~SwitchThreadMemTrackerLimiter() { - thread_context()->_thread_mem_tracker_mgr->detach_limiter_tracker(); + thread_context()->_thread_mem_tracker_mgr->detach_limiter_tracker(_old_mem_tracker); } AddThreadMemTrackerConsumer::AddThreadMemTrackerConsumer(MemTracker* mem_tracker) { diff --git a/be/src/runtime/thread_context.h b/be/src/runtime/thread_context.h index ca09df0ecf..d07cbe7056 100644 --- a/be/src/runtime/thread_context.h +++ b/be/src/runtime/thread_context.h @@ -68,8 +68,8 @@ // Usually used after SCOPED_ATTACH_TASK, during query execution. #define RETURN_LIMIT_EXCEEDED(state, msg, ...) \ return doris::thread_context() \ - ->_thread_mem_tracker_mgr->limiter_mem_tracker_raw() \ - ->mem_limit_exceeded( \ + ->_thread_mem_tracker_mgr->limiter_mem_tracker() \ + ->fragment_mem_limit_exceeded( \ state, \ fmt::format("exec node:<{}>, {}", \ doris::thread_context() \ @@ -140,82 +140,36 @@ inline thread_local bthread_t bthread_id; // // There may be other optional info to be added later. class ThreadContext { -public: - enum TaskType { - UNKNOWN = 0, - QUERY = 1, - LOAD = 2, - COMPACTION = 3, - STORAGE = 4, - BRPC = 5 - // to be added ... - }; - inline static const std::string TaskTypeStr[] = {"UNKNOWN", "QUERY", "LOAD", - "COMPACTION", "STORAGE", "BRPC"}; - public: ThreadContext() { _thread_mem_tracker_mgr.reset(new ThreadMemTrackerMgr()); - init(); - } - - ~ThreadContext() { - // Restore to the memory state before init=true to ensure accurate overall memory statistics. - // Thereby ensuring that the memory alloc size is not tracked during the initialization of the - // ThreadContext before `init = true in ThreadContextPtr()`, - // Equal to the size of the memory release that is not tracked during the destruction of the - // ThreadContext after `init = false in ~ThreadContextPtr()`, - if (ExecEnv::GetInstance()->initialized()) _thread_mem_tracker_mgr->clear(); - thread_context_ptr.init = false; - } - - void init() { - _type = TaskType::UNKNOWN; if (ExecEnv::GetInstance()->initialized()) _thread_mem_tracker_mgr->init(); - _thread_id = get_thread_id(); } - void attach_task(const TaskType& type, const std::string& task_id, - const TUniqueId& fragment_instance_id, + ~ThreadContext() { thread_context_ptr.init = false; } + + void attach_task(const std::string& task_id, const TUniqueId& fragment_instance_id, const std::shared_ptr& mem_tracker) { #ifndef BE_TEST // will only attach_task at the beginning of the thread function, there should be no duplicate attach_task. - DCHECK((_type == TaskType::UNKNOWN || _type == TaskType::BRPC) && - type != TaskType::UNKNOWN && _task_id == "" && mem_tracker != nullptr) - << ",new tracker label: " << mem_tracker->label() << ",old tracker label: " - << _thread_mem_tracker_mgr->limiter_mem_tracker_raw()->label(); + DCHECK(mem_tracker); + // Orphan is thread default tracker. + DCHECK(_thread_mem_tracker_mgr->limiter_mem_tracker()->label() == "Orphan") + << ", attach mem tracker label: " << mem_tracker->label(); #endif - _type = type; _task_id = task_id; _fragment_instance_id = fragment_instance_id; - _thread_mem_tracker_mgr->attach_limiter_tracker(task_id, fragment_instance_id, mem_tracker); + _thread_mem_tracker_mgr->attach_limiter_tracker(mem_tracker, fragment_instance_id); } void detach_task() { - _type = TaskType::UNKNOWN; _task_id = ""; _fragment_instance_id = TUniqueId(); _thread_mem_tracker_mgr->detach_limiter_tracker(); } - const TaskType& type() const { return _type; } - const void set_type(const TaskType& type) { _type = type; } - const std::string& task_id() const { return _task_id; } - const std::string& thread_id_str() const { return _thread_id; } const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; } - static TaskType query_to_task_type(const TQueryType::type& query_type) { - switch (query_type) { - case TQueryType::SELECT: - return TaskType::QUERY; - case TQueryType::LOAD: - return TaskType::LOAD; - default: - DCHECK(false); - return TaskType::UNKNOWN; - } - } - std::string get_thread_id() { std::stringstream ss; ss << std::this_thread::get_id(); @@ -231,23 +185,23 @@ public: std::unique_ptr _thread_mem_tracker_mgr; private: - std::string _thread_id; - TaskType _type; - std::string _task_id; + std::string _task_id = ""; TUniqueId _fragment_instance_id; }; -static void attach_bthread() { +// Cache the pointer of bthread local in pthead local, +// Avoid calling bthread_getspecific frequently to get bthread local, which has performance problems. +static void pthread_attach_bthread() { bthread_id = bthread_self(); bthread_context = static_cast(bthread_getspecific(btls_key)); if (bthread_context == nullptr) { // A new bthread starts, two scenarios: // 1. First call to bthread_getspecific (and before any bthread_setspecific) returns NULL // 2. There are not enough reusable btls in btls pool. -#ifndef BE_TEST - DCHECK(ExecEnv::GetInstance()->initialized()); -#endif - // Create thread-local data on demand. + // else, two scenarios: + // 1. A new bthread starts, but get a reuses btls. + // 2. A pthread switch occurs. Because the pthread switch cannot be accurately identified at the moment. + // So tracker call reset 0 like reuses btls. bthread_context = new ThreadContext; // set the data so that next time bthread_getspecific in the thread returns the data. CHECK_EQ(0, bthread_setspecific(btls_key, bthread_context)); @@ -257,9 +211,9 @@ static void attach_bthread() { static ThreadContext* thread_context() { if (bthread_self() != 0) { if (bthread_self() != bthread_id) { - // A new bthread starts or pthread switch occurs. + // A new bthread starts or pthread switch occurs, during this period, stop the use of thread_context. thread_context_ptr.init = false; - attach_bthread(); + pthread_attach_bthread(); thread_context_ptr.init = true; } return bthread_context; @@ -281,7 +235,6 @@ private: class AttachTask { public: explicit AttachTask(const std::shared_ptr& mem_tracker, - const ThreadContext::TaskType& type = ThreadContext::TaskType::UNKNOWN, const std::string& task_id = "", const TUniqueId& fragment_instance_id = TUniqueId()); @@ -292,10 +245,12 @@ public: class SwitchThreadMemTrackerLimiter { public: - explicit SwitchThreadMemTrackerLimiter( - const std::shared_ptr& mem_tracker_limiter); + explicit SwitchThreadMemTrackerLimiter(const std::shared_ptr& mem_tracker); ~SwitchThreadMemTrackerLimiter(); + +private: + std::shared_ptr _old_mem_tracker; }; class AddThreadMemTrackerConsumer { @@ -345,6 +300,10 @@ private: size, doris::thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()) // Mem Hook to consume thread mem tracker +// TODO: In the original design, the MemTracker consume method is called before the memory is allocated. +// If the consume succeeds, the memory is actually allocated, otherwise an exception is thrown. +// But the statistics of memory through TCMalloc new/delete Hook are after the memory is actually allocated, +// which is different from the previous behavior. #define MEM_MALLOC_HOOK(size) \ do { \ if (doris::thread_context_ptr.init) { \ diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp index 3b53e6dc31..714e4757bc 100644 --- a/be/src/service/doris_main.cpp +++ b/be/src/service/doris_main.cpp @@ -53,7 +53,6 @@ #include "runtime/exec_env.h" #include "runtime/heartbeat_flags.h" #include "runtime/load_channel_mgr.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "service/backend_options.h" #include "service/backend_service.h" #include "service/brpc_service.h" @@ -500,29 +499,16 @@ int main(int argc, char** argv) { __lsan_do_leak_check(); #endif doris::PerfCounters::refresh_proc_status(); + doris::MemTrackerLimiter::refresh_global_counter(); + doris::ExecEnv::GetInstance()->load_channel_mgr()->refresh_mem_tracker(); #if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER) && \ !defined(USE_JEMALLOC) doris::MemInfo::refresh_allocator_mem(); #endif - int64_t allocator_cache_mem_diff = - doris::MemInfo::allocator_cache_mem() - - doris::ExecEnv::GetInstance()->allocator_cache_mem_tracker()->consumption(); - doris::ExecEnv::GetInstance()->allocator_cache_mem_tracker()->consume( - allocator_cache_mem_diff); - CONSUME_THREAD_MEM_TRACKER(allocator_cache_mem_diff); - doris::ExecEnv::GetInstance()->load_channel_mgr()->refresh_mem_tracker(); - - // 1s clear the expired task mem tracker, a query mem tracker is about 57 bytes. - // this will cause coredump for ASAN build when running regression test, - // disable temporarily. - doris::ExecEnv::GetInstance()->task_pool_mem_tracker_registry()->logout_task_mem_tracker(); - // The process tracker print log usage interval is 1s to avoid a large number of tasks being - // canceled when the process exceeds the mem limit, resulting in too many duplicate logs. - doris::ExecEnv::GetInstance()->process_mem_tracker()->enable_print_log_usage(); if (doris::config::memory_debug) { - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage("main routine"); - doris::ExecEnv::GetInstance()->process_mem_tracker()->enable_print_log_usage(); + doris::MemTrackerLimiter::print_log_process_usage("memory_debug"); } + doris::MemTrackerLimiter::enable_print_log_process_usage(); sleep(1); } diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index 3e672ab910..26301b8f4e 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -34,7 +34,6 @@ #include "runtime/fold_constant_executor.h" #include "runtime/fragment_mgr.h" #include "runtime/load_channel_mgr.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "runtime/result_buffer_mgr.h" #include "runtime/routine_load/routine_load_task_executor.h" #include "runtime/runtime_state.h" @@ -69,10 +68,12 @@ static void thread_context_deleter(void* d) { template class NewHttpClosure : public ::google::protobuf::Closure { public: + NewHttpClosure(google::protobuf::Closure* done) : _done(done) {} NewHttpClosure(T* request, google::protobuf::Closure* done) : _request(request), _done(done) {} ~NewHttpClosure() {} void Run() { + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); if (_request != nullptr) { delete _request; _request = nullptr; @@ -108,22 +109,23 @@ void PInternalServiceImpl::transmit_data(google::protobuf::RpcController* cntl_b PTransmitDataResult* response, google::protobuf::Closure* done) { // TODO(zxy) delete in 1.2 version + google::protobuf::Closure* new_done = new NewHttpClosure(done); brpc::Controller* cntl = static_cast(cntl_base); attachment_transfer_request_row_batch(request, cntl); - _transmit_data(cntl_base, request, response, done, Status::OK()); + _transmit_data(cntl_base, request, response, new_done, Status::OK()); } void PInternalServiceImpl::transmit_data_by_http(google::protobuf::RpcController* cntl_base, const PEmptyRequest* request, PTransmitDataResult* response, google::protobuf::Closure* done) { - PTransmitDataParams* request_raw = new PTransmitDataParams(); - google::protobuf::Closure* done_raw = - new NewHttpClosure(request_raw, done); + PTransmitDataParams* new_request = new PTransmitDataParams(); + google::protobuf::Closure* new_done = + new NewHttpClosure(new_request, done); brpc::Controller* cntl = static_cast(cntl_base); - Status st = attachment_extract_request_contain_tuple(request_raw, cntl); - _transmit_data(cntl_base, request_raw, response, done_raw, st); + Status st = attachment_extract_request_contain_tuple(new_request, cntl); + _transmit_data(cntl_base, new_request, response, new_done, st); } void PInternalServiceImpl::_transmit_data(google::protobuf::RpcController* cntl_base, @@ -133,17 +135,10 @@ void PInternalServiceImpl::_transmit_data(google::protobuf::RpcController* cntl_ const Status& extract_st) { std::string query_id; TUniqueId finst_id; - std::shared_ptr transmit_tracker = nullptr; if (request->has_query_id()) { query_id = print_id(request->query_id()); finst_id.__set_hi(request->finst_id().hi()); finst_id.__set_lo(request->finst_id().lo()); - transmit_tracker = - _exec_env->task_pool_mem_tracker_registry()->get_task_mem_tracker(query_id); - } - if (!transmit_tracker) { - query_id = "unkown_transmit_data"; - transmit_tracker = std::make_shared(-1, "unkown_transmit_data"); } VLOG_ROW << "transmit data: fragment_instance_id=" << print_id(request->finst_id()) << " query_id=" << query_id << " node=" << request->node_id(); @@ -152,7 +147,6 @@ void PInternalServiceImpl::_transmit_data(google::protobuf::RpcController* cntl_ Status st; st.to_protobuf(response->mutable_status()); if (extract_st.ok()) { - SCOPED_ATTACH_TASK(transmit_tracker, ThreadContext::TaskType::QUERY, query_id, finst_id); st = _exec_env->stream_mgr()->transmit_data(request, &done); if (!st.ok()) { LOG(WARNING) << "transmit_data failed, message=" << st.get_error_msg() @@ -225,23 +219,24 @@ void PInternalServiceImpl::tablet_writer_add_block(google::protobuf::RpcControll PTabletWriterAddBlockResult* response, google::protobuf::Closure* done) { // TODO(zxy) delete in 1.2 version + google::protobuf::Closure* new_done = new NewHttpClosure(done); brpc::Controller* cntl = static_cast(cntl_base); attachment_transfer_request_block(request, cntl); - _tablet_writer_add_block(cntl_base, request, response, done); + _tablet_writer_add_block(cntl_base, request, response, new_done); } void PInternalServiceImpl::tablet_writer_add_block_by_http( google::protobuf::RpcController* cntl_base, const ::doris::PEmptyRequest* request, PTabletWriterAddBlockResult* response, google::protobuf::Closure* done) { - PTabletWriterAddBlockRequest* request_raw = new PTabletWriterAddBlockRequest(); - google::protobuf::Closure* done_raw = - new NewHttpClosure(request_raw, done); + PTabletWriterAddBlockRequest* new_request = new PTabletWriterAddBlockRequest(); + google::protobuf::Closure* new_done = + new NewHttpClosure(new_request, done); brpc::Controller* cntl = static_cast(cntl_base); - Status st = attachment_extract_request_contain_block(request_raw, + Status st = attachment_extract_request_contain_block(new_request, cntl); if (st.ok()) { - _tablet_writer_add_block(cntl_base, request_raw, response, done_raw); + _tablet_writer_add_block(cntl_base, new_request, response, new_done); } else { st.to_protobuf(response->mutable_status()); } @@ -280,20 +275,21 @@ void PInternalServiceImpl::tablet_writer_add_batch(google::protobuf::RpcControll const PTabletWriterAddBatchRequest* request, PTabletWriterAddBatchResult* response, google::protobuf::Closure* done) { - _tablet_writer_add_batch(cntl_base, request, response, done); + google::protobuf::Closure* new_done = new NewHttpClosure(done); + _tablet_writer_add_batch(cntl_base, request, response, new_done); } void PInternalServiceImpl::tablet_writer_add_batch_by_http( google::protobuf::RpcController* cntl_base, const ::doris::PEmptyRequest* request, PTabletWriterAddBatchResult* response, google::protobuf::Closure* done) { - PTabletWriterAddBatchRequest* request_raw = new PTabletWriterAddBatchRequest(); - google::protobuf::Closure* done_raw = - new NewHttpClosure(request_raw, done); + PTabletWriterAddBatchRequest* new_request = new PTabletWriterAddBatchRequest(); + google::protobuf::Closure* new_done = + new NewHttpClosure(new_request, done); brpc::Controller* cntl = static_cast(cntl_base); - Status st = attachment_extract_request_contain_tuple(request_raw, + Status st = attachment_extract_request_contain_tuple(new_request, cntl); if (st.ok()) { - _tablet_writer_add_batch(cntl_base, request_raw, response, done_raw); + _tablet_writer_add_batch(cntl_base, new_request, response, new_done); } else { st.to_protobuf(response->mutable_status()); } @@ -689,22 +685,23 @@ void PInternalServiceImpl::transmit_block(google::protobuf::RpcController* cntl_ PTransmitDataResult* response, google::protobuf::Closure* done) { // TODO(zxy) delete in 1.2 version + google::protobuf::Closure* new_done = new NewHttpClosure(done); brpc::Controller* cntl = static_cast(cntl_base); attachment_transfer_request_block(request, cntl); - _transmit_block(cntl_base, request, response, done, Status::OK()); + _transmit_block(cntl_base, request, response, new_done, Status::OK()); } void PInternalServiceImpl::transmit_block_by_http(google::protobuf::RpcController* cntl_base, const PEmptyRequest* request, PTransmitDataResult* response, google::protobuf::Closure* done) { - PTransmitDataParams* request_raw = new PTransmitDataParams(); - google::protobuf::Closure* done_raw = - new NewHttpClosure(request_raw, done); + PTransmitDataParams* new_request = new PTransmitDataParams(); + google::protobuf::Closure* new_done = + new NewHttpClosure(new_request, done); brpc::Controller* cntl = static_cast(cntl_base); - Status st = attachment_extract_request_contain_block(request_raw, cntl); - _transmit_block(cntl_base, request_raw, response, done_raw, st); + Status st = attachment_extract_request_contain_block(new_request, cntl); + _transmit_block(cntl_base, new_request, response, new_done, st); } void PInternalServiceImpl::_transmit_block(google::protobuf::RpcController* cntl_base, @@ -714,18 +711,10 @@ void PInternalServiceImpl::_transmit_block(google::protobuf::RpcController* cntl const Status& extract_st) { std::string query_id; TUniqueId finst_id; - std::shared_ptr transmit_tracker = nullptr; if (request->has_query_id()) { query_id = print_id(request->query_id()); finst_id.__set_hi(request->finst_id().hi()); finst_id.__set_lo(request->finst_id().lo()); - // phmap `parallel_flat_hash_map` is not thread safe, so get query mem tracker may be null pointer. - transmit_tracker = - _exec_env->task_pool_mem_tracker_registry()->get_task_mem_tracker(query_id); - } - if (!transmit_tracker) { - query_id = "unkown_transmit_block"; - transmit_tracker = std::make_shared(-1, "unkown_transmit_block"); } VLOG_ROW << "transmit block: fragment_instance_id=" << print_id(request->finst_id()) << " query_id=" << query_id << " node=" << request->node_id(); @@ -734,7 +723,6 @@ void PInternalServiceImpl::_transmit_block(google::protobuf::RpcController* cntl Status st; st.to_protobuf(response->mutable_status()); if (extract_st.ok()) { - SCOPED_ATTACH_TASK(transmit_tracker, ThreadContext::TaskType::QUERY, query_id, finst_id); st = _exec_env->vstream_mgr()->transmit_block(request, &done); if (!st.ok()) { LOG(WARNING) << "transmit_block failed, message=" << st.get_error_msg() diff --git a/be/src/util/mem_info.cpp b/be/src/util/mem_info.cpp index ab17542100..9070b09527 100644 --- a/be/src/util/mem_info.cpp +++ b/be/src/util/mem_info.cpp @@ -104,11 +104,23 @@ void MemInfo::init() { bool is_percent = true; _s_mem_limit = ParseUtil::parse_mem_spec(config::mem_limit, -1, _s_physical_mem, &is_percent); + if (_s_mem_limit <= 0) { + LOG(WARNING) << "Failed to parse mem limit from '" + config::mem_limit + "'."; + } + if (_s_mem_limit > _s_physical_mem) { + LOG(WARNING) << "Memory limit " << PrettyPrinter::print(_s_mem_limit, TUnit::BYTES) + << " exceeds physical memory of " + << PrettyPrinter::print(_s_physical_mem, TUnit::BYTES) + << ". Using physical memory instead"; + _s_mem_limit = _s_physical_mem; + } _s_mem_limit_str = PrettyPrinter::print(_s_mem_limit, TUnit::BYTES); _s_hard_mem_limit = _s_physical_mem - std::max(209715200L, _s_physical_mem / 10); // 200M - LOG(INFO) << "Physical Memory: " << PrettyPrinter::print(_s_physical_mem, TUnit::BYTES); + LOG(INFO) << "Physical Memory: " << PrettyPrinter::print(_s_physical_mem, TUnit::BYTES) + << ", Mem Limit: " << _s_mem_limit_str + << ", origin config value: " << config::mem_limit; _s_initialized = true; } #else diff --git a/be/src/util/ref_count_closure.h b/be/src/util/ref_count_closure.h index b91d6225ed..c278dae9a3 100644 --- a/be/src/util/ref_count_closure.h +++ b/be/src/util/ref_count_closure.h @@ -37,6 +37,7 @@ public: bool unref() { return _refs.fetch_sub(1) == 1; } void Run() override { + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); if (unref()) { delete this; } diff --git a/be/src/vec/common/allocator.h b/be/src/vec/common/allocator.h index 8f9eba9fae..36a11fc6f9 100644 --- a/be/src/vec/common/allocator.h +++ b/be/src/vec/common/allocator.h @@ -132,7 +132,7 @@ public: if (MAP_FAILED == buf) { RELEASE_THREAD_MEM_TRACKER(size); auto err = fmt::format("Allocator: Cannot mmap {}.", size); - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + doris::MemTrackerLimiter::print_log_process_usage(err); doris::vectorized::throwFromErrno(err, doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY); } @@ -142,7 +142,7 @@ public: doris::Chunk chunk; if (!doris::ChunkAllocator::instance()->allocate_align(size, &chunk)) { auto err = fmt::format("Allocator: Cannot allocate chunk {}.", size); - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + doris::MemTrackerLimiter::print_log_process_usage(err); doris::vectorized::throwFromErrno(err, doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY); } @@ -157,7 +157,7 @@ public: if (nullptr == buf) { auto err = fmt::format("Allocator: Cannot malloc {}.", size); - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + doris::MemTrackerLimiter::print_log_process_usage(err); doris::vectorized::throwFromErrno( err, doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY); } @@ -167,7 +167,7 @@ public: if (0 != res) { auto err = fmt::format("Cannot allocate memory (posix_memalign) {}.", size); - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + doris::MemTrackerLimiter::print_log_process_usage(err); doris::vectorized::throwFromErrno( err, doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY, res); } @@ -183,7 +183,7 @@ public: if (size >= MMAP_THRESHOLD) { if (0 != munmap(buf, size)) { auto err = fmt::format("Allocator: Cannot munmap {}.", size); - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + doris::MemTrackerLimiter::print_log_process_usage(err); doris::vectorized::throwFromErrno(err, doris::TStatusCode::VEC_CANNOT_MUNMAP); } else { RELEASE_THREAD_MEM_TRACKER(size); @@ -212,7 +212,7 @@ public: if (nullptr == new_buf) { auto err = fmt::format("Allocator: Cannot realloc from {} to {}.", old_size, new_size); - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + doris::MemTrackerLimiter::print_log_process_usage(err); doris::vectorized::throwFromErrno(err, doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY); } @@ -232,7 +232,7 @@ public: RELEASE_THREAD_MEM_TRACKER(new_size - old_size); auto err = fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.", old_size, new_size); - doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err); + doris::MemTrackerLimiter::print_log_process_usage(err); doris::vectorized::throwFromErrno(err, doris::TStatusCode::VEC_CANNOT_MREMAP); } diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp b/be/src/vec/exec/scan/scanner_scheduler.cpp index dbb9cadb24..09578a2ba1 100644 --- a/be/src/vec/exec/scan/scanner_scheduler.cpp +++ b/be/src/vec/exec/scan/scanner_scheduler.cpp @@ -185,10 +185,8 @@ void ScannerScheduler::_scanner_scan(ScannerScheduler* scheduler, ScannerContext VScanner* scanner) { INIT_AND_SCOPE_REENTRANT_SPAN_IF(ctx->state()->enable_profile(), ctx->state()->get_tracer(), ctx->scan_span(), "VScanner::scan"); - SCOPED_ATTACH_TASK(scanner->runtime_state()->scanner_mem_tracker(), - ThreadContext::query_to_task_type(scanner->runtime_state()->query_type()), - print_id(scanner->runtime_state()->query_id()), - scanner->runtime_state()->fragment_instance_id()); + SCOPED_ATTACH_TASK(scanner->runtime_state()); + SCOPED_CONSUME_MEM_TRACKER(scanner->runtime_state()->scanner_mem_tracker().get()); Thread::set_self_name("_scanner_scan"); scanner->update_wait_worker_timer(); // Do not use ScopedTimer. There is no guarantee that, the counter diff --git a/be/src/vec/exec/volap_scan_node.cpp b/be/src/vec/exec/volap_scan_node.cpp index 19ab63ae37..7e0abc1e77 100644 --- a/be/src/vec/exec/volap_scan_node.cpp +++ b/be/src/vec/exec/volap_scan_node.cpp @@ -394,10 +394,8 @@ void VOlapScanNode::transfer_thread(RuntimeState* state) { } void VOlapScanNode::scanner_thread(VOlapScanner* scanner) { - SCOPED_ATTACH_TASK(_runtime_state->scanner_mem_tracker(), - ThreadContext::query_to_task_type(_runtime_state->query_type()), - print_id(_runtime_state->query_id()), - _runtime_state->fragment_instance_id()); + SCOPED_ATTACH_TASK(_runtime_state); + SCOPED_CONSUME_MEM_TRACKER(_runtime_state->scanner_mem_tracker().get()); Thread::set_self_name("volap_scanner"); int64_t wait_time = scanner->update_wait_worker_timer(); // Do not use ScopedTimer. There is no guarantee that, the counter diff --git a/be/src/vec/runtime/vdata_stream_mgr.cpp b/be/src/vec/runtime/vdata_stream_mgr.cpp index 511fbbe19d..4743d0933f 100644 --- a/be/src/vec/runtime/vdata_stream_mgr.cpp +++ b/be/src/vec/runtime/vdata_stream_mgr.cpp @@ -53,7 +53,7 @@ std::shared_ptr VDataStreamMgr::create_recvr( VLOG_FILE << "creating receiver for fragment=" << fragment_instance_id << ", node=" << dest_node_id; std::shared_ptr recvr(new VDataStreamRecvr( - this, row_desc, fragment_instance_id, dest_node_id, num_senders, is_merging, + this, state, row_desc, fragment_instance_id, dest_node_id, num_senders, is_merging, buffer_size, profile, sub_plan_query_statistics_recvr)); uint32_t hash_value = get_hash_value(fragment_instance_id, dest_node_id); std::lock_guard l(_lock); diff --git a/be/src/vec/runtime/vdata_stream_recvr.cpp b/be/src/vec/runtime/vdata_stream_recvr.cpp index a649816a1b..7a9a55ebc1 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.cpp +++ b/be/src/vec/runtime/vdata_stream_recvr.cpp @@ -77,10 +77,7 @@ Status VDataStreamRecvr::SenderQueue::get_batch(Block** next_block) { if (!_pending_closures.empty()) { auto closure_pair = _pending_closures.front(); - { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); - closure_pair.first->Run(); - } + closure_pair.first->Run(); _pending_closures.pop_front(); closure_pair.second.stop(); @@ -224,11 +221,8 @@ void VDataStreamRecvr::SenderQueue::cancel() { { std::lock_guard l(_lock); - { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); - for (auto closure_pair : _pending_closures) { - closure_pair.first->Run(); - } + for (auto closure_pair : _pending_closures) { + closure_pair.first->Run(); } _pending_closures.clear(); } @@ -242,11 +236,8 @@ void VDataStreamRecvr::SenderQueue::close() { std::lock_guard l(_lock); _is_cancelled = true; - { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); - for (auto closure_pair : _pending_closures) { - closure_pair.first->Run(); - } + for (auto closure_pair : _pending_closures) { + closure_pair.first->Run(); } _pending_closures.clear(); } @@ -260,11 +251,12 @@ void VDataStreamRecvr::SenderQueue::close() { } VDataStreamRecvr::VDataStreamRecvr( - VDataStreamMgr* stream_mgr, const RowDescriptor& row_desc, + VDataStreamMgr* stream_mgr, RuntimeState* state, const RowDescriptor& row_desc, const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id, int num_senders, bool is_merging, int total_buffer_limit, RuntimeProfile* profile, std::shared_ptr sub_plan_query_statistics_recvr) : _mgr(stream_mgr), + _state(state), _fragment_instance_id(fragment_instance_id), _dest_node_id(dest_node_id), _total_buffer_limit(total_buffer_limit), @@ -325,6 +317,8 @@ Status VDataStreamRecvr::create_merger(const std::vector& orderin void VDataStreamRecvr::add_block(const PBlock& pblock, int sender_id, int be_number, int64_t packet_seq, ::google::protobuf::Closure** done) { + SCOPED_ATTACH_TASK(_state->query_mem_tracker(), print_id(_state->query_id()), + _fragment_instance_id); SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); int use_sender_id = _is_merging ? sender_id : 0; _sender_queues[use_sender_id]->add_block(pblock, be_number, packet_seq, done); diff --git a/be/src/vec/runtime/vdata_stream_recvr.h b/be/src/vec/runtime/vdata_stream_recvr.h index 7372285125..254d85185c 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.h +++ b/be/src/vec/runtime/vdata_stream_recvr.h @@ -28,6 +28,7 @@ #include "common/status.h" #include "gen_cpp/Types_types.h" #include "runtime/descriptors.h" +#include "runtime/query_fragments_ctx.h" #include "runtime/query_statistics.h" #include "util/runtime_profile.h" @@ -50,7 +51,7 @@ class VExprContext; class VDataStreamRecvr { public: - VDataStreamRecvr(VDataStreamMgr* stream_mgr, const RowDescriptor& row_desc, + VDataStreamRecvr(VDataStreamMgr* stream_mgr, RuntimeState* state, const RowDescriptor& row_desc, const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id, int num_senders, bool is_merging, int total_buffer_limit, RuntimeProfile* profile, @@ -97,6 +98,8 @@ private: // DataStreamMgr instance used to create this recvr. (Not owned) VDataStreamMgr* _mgr; + RuntimeState* _state; + // Fragment and node id of the destination exchange node this receiver is used by. TUniqueId _fragment_instance_id; PlanNodeId _dest_node_id; diff --git a/be/src/vec/sink/vdata_stream_sender.cpp b/be/src/vec/sink/vdata_stream_sender.cpp index 0ca087ea59..100341a197 100644 --- a/be/src/vec/sink/vdata_stream_sender.cpp +++ b/be/src/vec/sink/vdata_stream_sender.cpp @@ -139,7 +139,7 @@ Status VDataStreamSender::Channel::send_block(PBlock* block, bool eos) { _closure->ref(); } else { RETURN_IF_ERROR(_wait_last_brpc()); - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); _closure->cntl.Reset(); } VLOG_ROW << "Channel::send_batch() instance_id=" << _fragment_instance_id @@ -162,7 +162,7 @@ Status VDataStreamSender::Channel::send_block(PBlock* block, bool eos) { if (_parent->_transfer_large_data_by_brpc && _brpc_request.has_block() && _brpc_request.block().has_column_values() && _brpc_request.ByteSizeLong() > MIN_HTTP_BRPC_SIZE) { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); Status st = request_embed_attachment_contain_block>( &_brpc_request, _closure); @@ -179,7 +179,7 @@ Status VDataStreamSender::Channel::send_block(PBlock* block, bool eos) { _brpc_http_stub->transmit_block_by_http(&_closure->cntl, nullptr, &_closure->result, _closure); } else { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); _closure->cntl.http_request().Clear(); _brpc_stub->transmit_block(&_closure->cntl, &_brpc_request, &_closure->result, _closure); } diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp index 126e65cf85..04ce7c54dc 100644 --- a/be/src/vec/sink/vtablet_sink.cpp +++ b/be/src/vec/sink/vtablet_sink.cpp @@ -355,7 +355,7 @@ void VNodeChannel::try_send_block(RuntimeState* state) { _add_block_closure->cntl.http_request().set_content_type("application/json"); { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); _brpc_http_stub->tablet_writer_add_block_by_http(&_add_block_closure->cntl, NULL, &_add_block_closure->result, _add_block_closure); @@ -363,7 +363,7 @@ void VNodeChannel::try_send_block(RuntimeState* state) { } else { _add_block_closure->cntl.http_request().Clear(); { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker()); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); _stub->tablet_writer_add_block(&_add_block_closure->cntl, &request, &_add_block_closure->result, _add_block_closure); } diff --git a/be/test/exec/broker_scan_node_test.cpp b/be/test/exec/broker_scan_node_test.cpp index 8b8e376b34..5ca63810f2 100644 --- a/be/test/exec/broker_scan_node_test.cpp +++ b/be/test/exec/broker_scan_node_test.cpp @@ -40,7 +40,7 @@ class BrokerScanNodeTest : public testing::Test { public: BrokerScanNodeTest() : _runtime_state(TQueryGlobals()) { init(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); } void init(); static void SetUpTestCase() { diff --git a/be/test/exec/broker_scanner_test.cpp b/be/test/exec/broker_scanner_test.cpp index d750370aff..65387e575b 100644 --- a/be/test/exec/broker_scanner_test.cpp +++ b/be/test/exec/broker_scanner_test.cpp @@ -40,7 +40,7 @@ public: BrokerScannerTest() : _runtime_state(TQueryGlobals()) { init(); _profile = _runtime_state.runtime_profile(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); } void init(); diff --git a/be/test/exec/es_http_scan_node_test.cpp b/be/test/exec/es_http_scan_node_test.cpp index 8dc0e71b79..7d0a63596b 100644 --- a/be/test/exec/es_http_scan_node_test.cpp +++ b/be/test/exec/es_http_scan_node_test.cpp @@ -40,7 +40,7 @@ namespace doris { class EsHttpScanNodeTest : public testing::Test { public: EsHttpScanNodeTest() : _runtime_state(TQueryGlobals()) { - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); TDescriptorTable t_desc_table; // table descriptors diff --git a/be/test/exec/es_predicate_test.cpp b/be/test/exec/es_predicate_test.cpp index 8cf009b991..5a91580b36 100644 --- a/be/test/exec/es_predicate_test.cpp +++ b/be/test/exec/es_predicate_test.cpp @@ -43,7 +43,7 @@ class RuntimeState; class EsPredicateTest : public testing::Test { public: EsPredicateTest() : _runtime_state(TQueryGlobals()) { - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); TDescriptorTable t_desc_table; // table descriptors diff --git a/be/test/exec/hash_table_test.cpp b/be/test/exec/hash_table_test.cpp index 2a5e8f62d6..3a5ab8e3f0 100644 --- a/be/test/exec/hash_table_test.cpp +++ b/be/test/exec/hash_table_test.cpp @@ -49,7 +49,7 @@ public: HashTableTest() { _mem_pool.reset(new MemPool()); _state = _pool.add(new RuntimeState(TQueryGlobals())); - _state->init_instance_mem_tracker(); + _state->init_mem_trackers(); _state->_exec_env = ExecEnv::GetInstance(); } @@ -309,7 +309,7 @@ TEST_F(HashTableTest, GrowTableTest) { int64_t num_buckets = 4; HashTable hash_table(_build_expr, _probe_expr, 1, false, is_null_safe, initial_seed, num_buckets); - EXPECT_FALSE(hash_table.mem_tracker()->limit_exceeded(mem_limit)); + EXPECT_FALSE(hash_table.mem_tracker()->consumption() > mem_limit); for (int i = 0; i < LOOP_LESS_OR_MORE(1, 20); ++i) { for (int j = 0; j < num_to_add; ++build_row_val, ++j) { @@ -323,7 +323,7 @@ TEST_F(HashTableTest, GrowTableTest) { LOG(INFO) << "consume:" << hash_table.mem_tracker()->consumption() << ",expected_size:" << expected_size; - EXPECT_EQ(LOOP_LESS_OR_MORE(0, 1), hash_table.mem_tracker()->limit_exceeded(mem_limit)); + EXPECT_EQ(LOOP_LESS_OR_MORE(0, 1), hash_table.mem_tracker()->consumption() > mem_limit); // Validate that we can find the entries for (int i = 0; i < expected_size * 5; i += 100000) { diff --git a/be/test/exec/json_scanner_test.cpp b/be/test/exec/json_scanner_test.cpp index 5daa4ef19a..5fbc3b3c3d 100644 --- a/be/test/exec/json_scanner_test.cpp +++ b/be/test/exec/json_scanner_test.cpp @@ -42,7 +42,7 @@ class JsonScannerTest : public testing::Test { public: JsonScannerTest() : _runtime_state(TQueryGlobals()) { init(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); _runtime_state._exec_env = ExecEnv::GetInstance(); } void init(); diff --git a/be/test/exec/json_scanner_with_jsonpath_test.cpp b/be/test/exec/json_scanner_with_jsonpath_test.cpp index 578cd10443..e54113c0bc 100644 --- a/be/test/exec/json_scanner_with_jsonpath_test.cpp +++ b/be/test/exec/json_scanner_with_jsonpath_test.cpp @@ -41,7 +41,7 @@ class JsonScannerWithJsonPathTest : public testing::Test { public: JsonScannerWithJsonPathTest() : _runtime_state(TQueryGlobals()) { init(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); _runtime_state._exec_env = ExecEnv::GetInstance(); } void init(); diff --git a/be/test/exec/orc_scanner_test.cpp b/be/test/exec/orc_scanner_test.cpp index bcc4d79e53..7356458f47 100644 --- a/be/test/exec/orc_scanner_test.cpp +++ b/be/test/exec/orc_scanner_test.cpp @@ -44,7 +44,7 @@ class OrcScannerTest : public testing::Test { public: OrcScannerTest() : _runtime_state(TQueryGlobals()) { _profile = _runtime_state.runtime_profile(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); } static void SetUpTestCase() { diff --git a/be/test/exec/parquet_scanner_test.cpp b/be/test/exec/parquet_scanner_test.cpp index 35d0f6e359..d330a44b3f 100644 --- a/be/test/exec/parquet_scanner_test.cpp +++ b/be/test/exec/parquet_scanner_test.cpp @@ -40,7 +40,7 @@ class ParquetScannerTest : public testing::Test { public: ParquetScannerTest() : _runtime_state(TQueryGlobals()) { init(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); } void init(); static void SetUpTestCase() { diff --git a/be/test/exec/tablet_sink_test.cpp b/be/test/exec/tablet_sink_test.cpp index 4e2d36bc88..566d9675be 100644 --- a/be/test/exec/tablet_sink_test.cpp +++ b/be/test/exec/tablet_sink_test.cpp @@ -25,7 +25,6 @@ #include "runtime/decimalv2_value.h" #include "runtime/descriptor_helper.h" #include "runtime/exec_env.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "runtime/result_queue_mgr.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" @@ -56,7 +55,6 @@ public: _env->_load_stream_mgr = new LoadStreamMgr(); _env->_internal_client_cache = new BrpcClientCache(); _env->_function_client_cache = new BrpcClientCache(); - _env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool(); ThreadPoolBuilder("SendBatchThreadPool") .set_min_threads(1) .set_max_threads(5) @@ -72,7 +70,6 @@ public: SAFE_DELETE(_env->_load_stream_mgr); SAFE_DELETE(_env->_master_info); SAFE_DELETE(_env->_thread_mgr); - SAFE_DELETE(_env->_task_pool_mem_tracker_registry); if (_server) { _server->Stop(100); _server->Join(); diff --git a/be/test/exprs/runtime_filter_test.cpp b/be/test/exprs/runtime_filter_test.cpp index d4b66f295c..ee319ac6cb 100644 --- a/be/test/exprs/runtime_filter_test.cpp +++ b/be/test/exprs/runtime_filter_test.cpp @@ -42,7 +42,7 @@ public: exec_env = nullptr; _runtime_stat.reset( new RuntimeState(_fragment_id, _query_options, _query_globals, exec_env)); - _runtime_stat->init_instance_mem_tracker(); + _runtime_stat->init_mem_trackers(); } virtual void TearDown() { _obj_pool.clear(); } diff --git a/be/test/olap/lru_cache_test.cpp b/be/test/olap/lru_cache_test.cpp index c9fb0cbd93..bafe4e7288 100644 --- a/be/test/olap/lru_cache_test.cpp +++ b/be/test/olap/lru_cache_test.cpp @@ -21,6 +21,7 @@ #include +#include "runtime/memory/mem_tracker_limiter.h" #include "testutil/test_util.h" using namespace doris; @@ -221,7 +222,7 @@ static void insert_LRUCache(LRUCache& cache, const CacheKey& key, int value, CachePriority priority) { uint32_t hash = key.hash(key.data(), key.size(), 0); static std::unique_ptr lru_cache_tracker = - std::make_unique(-1, "TestLruCache"); + std::make_unique(MemTrackerLimiter::Type::GLOBAL, "TestLruCache"); cache.release(cache.insert(key, hash, EncodeValue(value), value, &deleter, lru_cache_tracker.get(), priority)); } diff --git a/be/test/runtime/mem_limit_test.cpp b/be/test/runtime/mem_limit_test.cpp index 811102b216..74b06033b0 100644 --- a/be/test/runtime/mem_limit_test.cpp +++ b/be/test/runtime/mem_limit_test.cpp @@ -24,7 +24,7 @@ namespace doris { TEST(MemTrackerTest, SingleTrackerNoLimit) { - auto t = std::make_unique(); + auto t = std::make_shared(MemTrackerLimiter::Type::GLOBAL); EXPECT_FALSE(t->has_limit()); t->consume(10); EXPECT_EQ(t->consumption(), 10); @@ -37,7 +37,8 @@ TEST(MemTrackerTest, SingleTrackerNoLimit) { } TEST(MemTestTest, SingleTrackerWithLimit) { - auto t = std::make_unique(11, "limit tracker"); + auto t = std::make_unique(MemTrackerLimiter::Type::GLOBAL, "limit tracker", + 11); EXPECT_TRUE(t->has_limit()); t->consume(10); EXPECT_EQ(t->consumption(), 10); @@ -51,95 +52,4 @@ TEST(MemTestTest, SingleTrackerWithLimit) { t->release(5); } -TEST(MemTestTest, TrackerHierarchy) { - auto p = std::make_shared(100); - auto c1 = std::make_unique(80, "c1", p); - auto c2 = std::make_unique(50, "c2", p); - - // everything below limits - c1->consume(60); - EXPECT_EQ(c1->consumption(), 60); - EXPECT_FALSE(c1->limit_exceeded()); - EXPECT_FALSE(c1->any_limit_exceeded()); - EXPECT_EQ(c2->consumption(), 0); - EXPECT_FALSE(c2->limit_exceeded()); - EXPECT_FALSE(c2->any_limit_exceeded()); - EXPECT_EQ(p->consumption(), 60); - EXPECT_FALSE(p->limit_exceeded()); - EXPECT_FALSE(p->any_limit_exceeded()); - - // p goes over limit - c2->consume(50); - EXPECT_EQ(c1->consumption(), 60); - EXPECT_FALSE(c1->limit_exceeded()); - EXPECT_TRUE(c1->any_limit_exceeded()); - EXPECT_EQ(c2->consumption(), 50); - EXPECT_FALSE(c2->limit_exceeded()); - EXPECT_TRUE(c2->any_limit_exceeded()); - EXPECT_EQ(p->consumption(), 110); - EXPECT_TRUE(p->limit_exceeded()); - - // c2 goes over limit, p drops below limit - c1->release(20); - c2->consume(10); - EXPECT_EQ(c1->consumption(), 40); - EXPECT_FALSE(c1->limit_exceeded()); - EXPECT_FALSE(c1->any_limit_exceeded()); - EXPECT_EQ(c2->consumption(), 60); - EXPECT_TRUE(c2->limit_exceeded()); - EXPECT_TRUE(c2->any_limit_exceeded()); - EXPECT_EQ(p->consumption(), 100); - EXPECT_FALSE(p->limit_exceeded()); - c1->release(40); - c2->release(60); -} - -TEST(MemTestTest, TrackerHierarchyTryConsume) { - auto p = std::make_shared(100); - auto c1 = std::make_unique(80, "c1", p); - auto c2 = std::make_unique(50, "c2", p); - - // everything below limits - std::string err_msg = ""; - bool consumption = c1->try_consume(60, err_msg); - EXPECT_EQ(consumption, true); - EXPECT_EQ(c1->consumption(), 60); - EXPECT_FALSE(c1->limit_exceeded()); - EXPECT_FALSE(c1->any_limit_exceeded()); - EXPECT_EQ(c2->consumption(), 0); - EXPECT_FALSE(c2->limit_exceeded()); - EXPECT_FALSE(c2->any_limit_exceeded()); - EXPECT_EQ(p->consumption(), 60); - EXPECT_FALSE(p->limit_exceeded()); - EXPECT_FALSE(p->any_limit_exceeded()); - - // p goes over limit - consumption = c2->try_consume(50, err_msg); - EXPECT_EQ(consumption, false); - EXPECT_EQ(c1->consumption(), 60); - EXPECT_FALSE(c1->limit_exceeded()); - EXPECT_FALSE(c1->any_limit_exceeded()); - EXPECT_EQ(c2->consumption(), 0); - EXPECT_FALSE(c2->limit_exceeded()); - EXPECT_FALSE(c2->any_limit_exceeded()); - EXPECT_EQ(p->consumption(), 60); - EXPECT_FALSE(p->limit_exceeded()); - EXPECT_FALSE(p->any_limit_exceeded()); - - // c2 goes over limit, p drops below limit - c1->release(20); - c2->consume(10); - EXPECT_EQ(c1->consumption(), 40); - EXPECT_FALSE(c1->limit_exceeded()); - EXPECT_FALSE(c1->any_limit_exceeded()); - EXPECT_EQ(c2->consumption(), 10); - EXPECT_FALSE(c2->limit_exceeded()); - EXPECT_FALSE(c2->any_limit_exceeded()); - EXPECT_EQ(p->consumption(), 50); - EXPECT_FALSE(p->limit_exceeded()); - - c1->release(40); - c2->release(10); -} - } // end namespace doris diff --git a/be/test/runtime/test_env.cc b/be/test/runtime/test_env.cc index db32d9a30e..dc2b53c9f6 100644 --- a/be/test/runtime/test_env.cc +++ b/be/test/runtime/test_env.cc @@ -24,7 +24,6 @@ #include "olap/storage_engine.h" #include "runtime/bufferpool/buffer_pool.h" #include "runtime/fragment_mgr.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "runtime/result_queue_mgr.h" #include "util/disk_info.h" #include "util/priority_thread_pool.hpp" @@ -35,7 +34,6 @@ TestEnv::TestEnv() { // Some code will use ExecEnv::GetInstance(), so init the global ExecEnv singleton _exec_env = ExecEnv::GetInstance(); _exec_env->_thread_mgr = new ThreadResourceMgr(2); - _exec_env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool(); _exec_env->_disk_io_mgr = new DiskIoMgr(1, 1, 1, 10); _exec_env->disk_io_mgr()->init(-1); _exec_env->_scan_thread_pool = new PriorityThreadPool(1, 16, "ut_scan"); @@ -62,7 +60,6 @@ TestEnv::~TestEnv() { SAFE_DELETE(_exec_env->_buffer_pool); SAFE_DELETE(_exec_env->_scan_thread_pool); SAFE_DELETE(_exec_env->_disk_io_mgr); - SAFE_DELETE(_exec_env->_task_pool_mem_tracker_registry); SAFE_DELETE(_exec_env->_thread_mgr); if (_engine == StorageEngine::_s_instance) { diff --git a/be/test/testutil/run_all_tests.cpp b/be/test/testutil/run_all_tests.cpp index a1e53f7ed0..570f428720 100644 --- a/be/test/testutil/run_all_tests.cpp +++ b/be/test/testutil/run_all_tests.cpp @@ -28,16 +28,10 @@ #include "util/mem_info.h" int main(int argc, char** argv) { - std::shared_ptr process_mem_tracker = - std::make_shared(-1, "Process"); std::shared_ptr orphan_mem_tracker = - std::make_shared(-1, "Orphan", process_mem_tracker); - std::shared_ptr nursery_mem_tracker = - std::make_shared(-1, "Nursery", orphan_mem_tracker); - std::shared_ptr bthread_mem_tracker = - std::make_shared(-1, "Bthread", orphan_mem_tracker); - doris::ExecEnv::GetInstance()->set_global_mem_tracker(process_mem_tracker, orphan_mem_tracker, - nursery_mem_tracker, bthread_mem_tracker); + std::make_shared(doris::MemTrackerLimiter::Type::GLOBAL, + "Orphan"); + doris::ExecEnv::GetInstance()->set_orphan_mem_tracker(orphan_mem_tracker); doris::thread_context()->_thread_mem_tracker_mgr->init(); doris::TabletSchemaCache::create_global_schema_cache(); doris::StoragePageCache::create_global_cache(1 << 30, 10); diff --git a/be/test/util/arrow/arrow_work_flow_test.cpp b/be/test/util/arrow/arrow_work_flow_test.cpp index 0475d479a4..7acd5e58b7 100644 --- a/be/test/util/arrow/arrow_work_flow_test.cpp +++ b/be/test/util/arrow/arrow_work_flow_test.cpp @@ -29,7 +29,6 @@ #include "gen_cpp/Types_types.h" #include "olap/row.h" #include "runtime/exec_env.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "runtime/result_queue_mgr.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" @@ -66,7 +65,6 @@ protected: if (_exec_env) { delete _exec_env->_result_queue_mgr; delete _exec_env->_thread_mgr; - delete _exec_env->_task_pool_mem_tracker_registry; } } @@ -92,7 +90,6 @@ void ArrowWorkFlowTest::init() { void ArrowWorkFlowTest::init_runtime_state() { _exec_env->_result_queue_mgr = new ResultQueueMgr(); _exec_env->_thread_mgr = new ThreadResourceMgr(); - _exec_env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool(); _exec_env->_is_init = true; TQueryOptions query_options; query_options.batch_size = 1024; @@ -100,7 +97,7 @@ void ArrowWorkFlowTest::init_runtime_state() { query_id.lo = 10; query_id.hi = 100; _state = new RuntimeState(query_id, query_options, TQueryGlobals(), _exec_env); - _state->init_instance_mem_tracker(); + _state->init_mem_trackers(); _state->set_desc_tbl(_desc_tbl); _state->_load_dir = "./test_run/output/"; _state->init_mem_trackers(TUniqueId()); diff --git a/be/test/vec/exec/parquet/parquet_reader_test.cpp b/be/test/vec/exec/parquet/parquet_reader_test.cpp index b2288338b3..480460f394 100644 --- a/be/test/vec/exec/parquet/parquet_reader_test.cpp +++ b/be/test/vec/exec/parquet/parquet_reader_test.cpp @@ -109,7 +109,7 @@ TEST_F(ParquetReaderTest, normal) { p_reader->set_file_reader(reader); RuntimeState runtime_state((TQueryGlobals())); runtime_state.set_desc_tbl(desc_tbl); - runtime_state.init_instance_mem_tracker(); + runtime_state.init_mem_trackers(); std::unordered_map colname_to_value_range; p_reader->init_reader(&colname_to_value_range); diff --git a/be/test/vec/exec/vbroker_scan_node_test.cpp b/be/test/vec/exec/vbroker_scan_node_test.cpp index 195f468841..5da2f255f3 100644 --- a/be/test/vec/exec/vbroker_scan_node_test.cpp +++ b/be/test/vec/exec/vbroker_scan_node_test.cpp @@ -45,7 +45,7 @@ class VBrokerScanNodeTest : public testing::Test { public: VBrokerScanNodeTest() : _runtime_state(TQueryGlobals()) { init(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); _runtime_state._query_options.enable_vectorized_engine = true; } void init(); diff --git a/be/test/vec/exec/vbroker_scanner_test.cpp b/be/test/vec/exec/vbroker_scanner_test.cpp index 5cb9afc4b2..225eeeb16b 100644 --- a/be/test/vec/exec/vbroker_scanner_test.cpp +++ b/be/test/vec/exec/vbroker_scanner_test.cpp @@ -40,7 +40,7 @@ public: VBrokerScannerTest() : _runtime_state(TQueryGlobals()) { init(); _profile = _runtime_state.runtime_profile(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); TUniqueId unique_id; TQueryOptions query_options; diff --git a/be/test/vec/exec/vjson_scanner_test.cpp b/be/test/vec/exec/vjson_scanner_test.cpp index f06b8233d6..55775a9555 100644 --- a/be/test/vec/exec/vjson_scanner_test.cpp +++ b/be/test/vec/exec/vjson_scanner_test.cpp @@ -47,7 +47,7 @@ class VJsonScannerTest : public testing::Test { public: VJsonScannerTest() : _runtime_state(TQueryGlobals()) { init(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); TUniqueId unique_id; TQueryOptions query_options; diff --git a/be/test/vec/exec/vorc_scanner_test.cpp b/be/test/vec/exec/vorc_scanner_test.cpp index 7e9aff95ba..0362ebb080 100644 --- a/be/test/vec/exec/vorc_scanner_test.cpp +++ b/be/test/vec/exec/vorc_scanner_test.cpp @@ -46,7 +46,7 @@ class VOrcScannerTest : public testing::Test { public: VOrcScannerTest() : _runtime_state(TQueryGlobals()) { _profile = _runtime_state.runtime_profile(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); _runtime_state._query_options.enable_vectorized_engine = true; } ~VOrcScannerTest() {} diff --git a/be/test/vec/exec/vparquet_scanner_test.cpp b/be/test/vec/exec/vparquet_scanner_test.cpp index bb1bb2c7f3..c08a69a005 100644 --- a/be/test/vec/exec/vparquet_scanner_test.cpp +++ b/be/test/vec/exec/vparquet_scanner_test.cpp @@ -41,7 +41,7 @@ class VParquetScannerTest : public testing::Test { public: VParquetScannerTest() : _runtime_state(TQueryGlobals()) { init(); - _runtime_state.init_instance_mem_tracker(); + _runtime_state.init_mem_trackers(); _runtime_state._query_options.enable_vectorized_engine = true; } ~VParquetScannerTest() {} diff --git a/be/test/vec/exec/vtablet_sink_test.cpp b/be/test/vec/exec/vtablet_sink_test.cpp index ae1f615035..4e75c6808e 100644 --- a/be/test/vec/exec/vtablet_sink_test.cpp +++ b/be/test/vec/exec/vtablet_sink_test.cpp @@ -28,7 +28,6 @@ #include "runtime/decimalv2_value.h" #include "runtime/descriptor_helper.h" #include "runtime/exec_env.h" -#include "runtime/memory/mem_tracker_task_pool.h" #include "runtime/result_queue_mgr.h" #include "runtime/runtime_state.h" #include "runtime/stream_load/load_stream_mgr.h" @@ -157,7 +156,6 @@ public: _env->_load_stream_mgr = new LoadStreamMgr(); _env->_internal_client_cache = new BrpcClientCache(); _env->_function_client_cache = new BrpcClientCache(); - _env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool(); ThreadPoolBuilder("SendBatchThreadPool") .set_min_threads(1) .set_max_threads(5) @@ -173,7 +171,6 @@ public: SAFE_DELETE(_env->_load_stream_mgr); SAFE_DELETE(_env->_master_info); SAFE_DELETE(_env->_thread_mgr); - SAFE_DELETE(_env->_task_pool_mem_tracker_registry); if (_server) { _server->Stop(100); _server->Join(); diff --git a/be/test/vec/exprs/vexpr_test.cpp b/be/test/vec/exprs/vexpr_test.cpp index 27c76c320f..593cfc932e 100644 --- a/be/test/vec/exprs/vexpr_test.cpp +++ b/be/test/vec/exprs/vexpr_test.cpp @@ -69,7 +69,7 @@ TEST(TEST_VEXPR, ABSTEST) { doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(), doris::TQueryGlobals(), nullptr); - runtime_stat.init_instance_mem_tracker(); + runtime_stat.init_mem_trackers(); runtime_stat.set_desc_tbl(desc_tbl); context->prepare(&runtime_stat, row_desc); context->open(&runtime_stat); @@ -112,7 +112,7 @@ TEST(TEST_VEXPR, ABSTEST2) { doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(), doris::TQueryGlobals(), nullptr); - runtime_stat.init_instance_mem_tracker(); + runtime_stat.init_mem_trackers(); DescriptorTbl desc_tbl; desc_tbl._slot_desc_map[0] = tuple_desc->slots()[0]; runtime_stat.set_desc_tbl(&desc_tbl); diff --git a/be/test/vec/runtime/vdata_stream_test.cpp b/be/test/vec/runtime/vdata_stream_test.cpp index 175fa64ee3..539dd51d74 100644 --- a/be/test/vec/runtime/vdata_stream_test.cpp +++ b/be/test/vec/runtime/vdata_stream_test.cpp @@ -114,7 +114,7 @@ TEST_F(VDataStreamTest, BasicTest) { doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(), doris::TQueryGlobals(), nullptr); - runtime_stat.init_instance_mem_tracker(); + runtime_stat.init_mem_trackers(); runtime_stat.set_desc_tbl(desc_tbl); runtime_stat.set_be_number(1); runtime_stat._exec_env = _object_pool.add(new ExecEnv);