From 4960043f5eb044ef22abe78f355f121bda0c9671 Mon Sep 17 00:00:00 2001 From: Xinyi Zou Date: Thu, 21 Jul 2022 17:11:28 +0800 Subject: [PATCH] [enhancement] Refactor to improve the usability of MemTracker (step2) (#10823) --- be/src/agent/task_worker_pool.cpp | 2 - be/src/common/config.h | 16 +- be/src/exec/analytic_eval_node.cpp | 30 +- be/src/exec/assert_num_rows_node.cpp | 4 +- be/src/exec/base_scanner.cpp | 22 +- be/src/exec/base_scanner.h | 2 - be/src/exec/blocking_join_node.cpp | 8 +- be/src/exec/broker_scan_node.cpp | 11 +- be/src/exec/cross_join_node.cpp | 6 +- be/src/exec/csv_scan_node.cpp | 6 +- be/src/exec/data_sink.cpp | 3 - be/src/exec/data_sink.h | 3 - be/src/exec/es/es_scroll_parser.cpp | 10 +- be/src/exec/es_http_scan_node.cpp | 8 +- be/src/exec/es_http_scanner.cpp | 11 +- be/src/exec/except_node.cpp | 6 +- be/src/exec/exchange_node.cpp | 10 +- be/src/exec/exec_node.cpp | 21 +- be/src/exec/exec_node.h | 8 +- be/src/exec/hash_join_node.cpp | 25 +- be/src/exec/hash_table.cpp | 11 +- be/src/exec/hash_table.h | 7 +- be/src/exec/intersect_node.cpp | 6 +- be/src/exec/merge_node.cpp | 12 +- be/src/exec/mysql_scan_node.cpp | 8 +- be/src/exec/odbc_scan_node.cpp | 8 +- be/src/exec/olap_scan_node.cpp | 23 +- be/src/exec/olap_scan_node.h | 2 +- be/src/exec/olap_scanner.cpp | 17 +- be/src/exec/olap_scanner.h | 7 +- be/src/exec/partitioned_aggregation_node.cc | 38 +- be/src/exec/partitioned_hash_table.cc | 41 +- be/src/exec/partitioned_hash_table.h | 15 +- be/src/exec/repeat_node.cpp | 6 +- be/src/exec/schema_scan_node.cpp | 6 +- be/src/exec/select_node.cpp | 6 +- be/src/exec/set_operation_node.cpp | 14 +- be/src/exec/set_operation_node.h | 3 +- be/src/exec/sort_exec_exprs.cpp | 9 +- be/src/exec/sort_exec_exprs.h | 3 +- be/src/exec/spill_sort_node.cc | 11 +- be/src/exec/table_function_node.cpp | 8 +- be/src/exec/tablet_info.cpp | 4 +- be/src/exec/tablet_info.h | 2 +- be/src/exec/tablet_sink.cpp | 42 +- be/src/exec/tablet_sink.h | 10 +- be/src/exec/topn_node.cpp | 13 +- be/src/exec/union_node.cpp | 11 +- be/src/exprs/agg_fn_evaluator.cpp | 7 +- be/src/exprs/agg_fn_evaluator.h | 5 +- be/src/exprs/anyval_util.cpp | 5 +- be/src/exprs/bloomfilter_predicate.h | 15 +- be/src/exprs/expr.cpp | 4 +- be/src/exprs/expr.h | 3 +- be/src/exprs/expr_context.cpp | 23 +- be/src/exprs/expr_context.h | 10 +- be/src/exprs/new_agg_fn_evaluator.cc | 9 +- be/src/exprs/new_agg_fn_evaluator.h | 3 - be/src/exprs/runtime_filter.cpp | 8 +- be/src/exprs/runtime_filter.h | 6 +- be/src/gutil/strings/numbers.cc | 8 +- be/src/http/default_path_handlers.cpp | 96 +-- be/src/http/default_path_handlers.h | 3 +- be/src/olap/base_compaction.cpp | 2 + be/src/olap/byte_buffer.cpp | 14 +- be/src/olap/compaction.cpp | 7 +- be/src/olap/compaction.h | 4 +- be/src/olap/cumulative_compaction.cpp | 2 + be/src/olap/delta_writer.cpp | 19 +- be/src/olap/delta_writer.h | 2 +- be/src/olap/lru_cache.cpp | 13 +- be/src/olap/lru_cache.h | 14 +- be/src/olap/memtable.cpp | 11 +- be/src/olap/memtable.h | 10 +- be/src/olap/memtable_flush_executor.cpp | 2 +- be/src/olap/merger.cpp | 2 +- be/src/olap/olap_server.cpp | 2 - be/src/olap/page_cache.cpp | 5 +- be/src/olap/page_cache.h | 4 +- be/src/olap/push_handler.cpp | 2 +- be/src/olap/reader.cpp | 2 +- be/src/olap/row_block2.h | 2 +- .../rowset/segment_v2/bitmap_index_reader.h | 2 +- .../rowset/segment_v2/bitmap_index_writer.cpp | 2 +- .../segment_v2/bloom_filter_index_reader.h | 2 +- .../segment_v2/bloom_filter_index_writer.cpp | 2 +- be/src/olap/rowset/segment_v2/column_reader.h | 2 +- be/src/olap/rowset/segment_v2/column_writer.h | 3 - .../segment_v2/indexed_column_writer.cpp | 2 +- be/src/olap/rowset/segment_v2/segment.cpp | 17 +- be/src/olap/rowset/segment_v2/segment.h | 4 +- .../olap/rowset/segment_v2/segment_writer.cpp | 6 +- .../olap/rowset/segment_v2/segment_writer.h | 2 +- be/src/olap/schema_change.cpp | 43 +- be/src/olap/schema_change.h | 4 +- be/src/olap/snapshot_manager.cpp | 6 +- be/src/olap/snapshot_manager.h | 5 +- be/src/olap/storage_engine.cpp | 24 +- be/src/olap/storage_engine.h | 32 +- be/src/olap/tablet.cpp | 8 - be/src/olap/tablet.h | 2 - be/src/olap/tablet_manager.cpp | 19 +- be/src/olap/tablet_manager.h | 2 +- be/src/olap/task/engine_alter_tablet_task.cpp | 8 +- be/src/olap/task/engine_alter_tablet_task.h | 2 +- be/src/olap/task/engine_batch_load_task.cpp | 6 +- be/src/olap/task/engine_batch_load_task.h | 2 +- be/src/olap/task/engine_checksum_task.cpp | 8 +- be/src/olap/task/engine_checksum_task.h | 2 +- be/src/olap/task/engine_clone_task.cpp | 6 +- be/src/olap/task/engine_clone_task.h | 2 +- be/src/runtime/CMakeLists.txt | 7 +- be/src/runtime/buffered_block_mgr2.cc | 152 +---- be/src/runtime/buffered_block_mgr2.h | 28 +- be/src/runtime/bufferpool/buffer_allocator.cc | 9 +- be/src/runtime/bufferpool/buffer_allocator.h | 4 +- be/src/runtime/bufferpool/buffer_pool.cc | 8 +- be/src/runtime/bufferpool/buffer_pool.h | 4 +- .../runtime/bufferpool/buffer_pool_internal.h | 3 +- .../runtime/bufferpool/reservation_tracker.cc | 25 +- .../runtime/bufferpool/reservation_tracker.h | 2 +- be/src/runtime/bufferpool/system_allocator.cc | 10 +- be/src/runtime/data_stream_recvr.cc | 19 +- be/src/runtime/data_stream_recvr.h | 3 +- be/src/runtime/data_stream_sender.cpp | 19 +- be/src/runtime/data_stream_sender.h | 2 +- be/src/runtime/disk_io_mgr.cc | 92 +-- be/src/runtime/disk_io_mgr.h | 28 +- be/src/runtime/disk_io_mgr_internal.h | 5 +- be/src/runtime/disk_io_mgr_reader_context.cc | 3 +- be/src/runtime/dpp_sink_internal.cpp | 5 +- be/src/runtime/dpp_sink_internal.h | 4 +- be/src/runtime/exec_env.h | 23 +- be/src/runtime/exec_env_init.cpp | 36 +- be/src/runtime/export_sink.cpp | 2 +- be/src/runtime/fold_constant_executor.cpp | 12 +- be/src/runtime/fold_constant_executor.h | 2 +- be/src/runtime/fragment_mgr.cpp | 6 +- be/src/runtime/initial_reservations.cc | 7 +- be/src/runtime/initial_reservations.h | 3 - be/src/runtime/load_channel.cpp | 10 +- be/src/runtime/load_channel.h | 7 +- be/src/runtime/load_channel_mgr.cpp | 29 +- be/src/runtime/load_channel_mgr.h | 5 +- be/src/runtime/mem_pool.cpp | 44 +- be/src/runtime/mem_pool.h | 3 - be/src/runtime/mem_tracker.cpp | 354 ----------- be/src/runtime/mem_tracker.h | 554 ------------------ be/src/runtime/mem_tracker_task_pool.cpp | 159 ----- be/src/runtime/mem_tracker_task_pool.h | 54 -- be/src/runtime/memory/chunk_allocator.cpp | 48 +- be/src/runtime/memory/chunk_allocator.h | 26 +- be/src/runtime/memory/mem_tracker.cpp | 116 ++++ be/src/runtime/memory/mem_tracker.h | 129 ++++ be/src/runtime/memory/mem_tracker_base.cpp | 53 -- be/src/runtime/memory/mem_tracker_base.h | 78 --- be/src/runtime/memory/mem_tracker_limiter.cpp | 201 ++----- be/src/runtime/memory/mem_tracker_limiter.h | 263 +++------ be/src/runtime/memory/mem_tracker_observe.cpp | 87 --- be/src/runtime/memory/mem_tracker_observe.h | 91 --- .../runtime/memory/mem_tracker_task_pool.cpp | 38 +- be/src/runtime/memory/system_allocator.cpp | 6 +- be/src/runtime/{ => memory}/tcmalloc_hook.h | 34 +- .../runtime/memory/thread_mem_tracker_mgr.cpp | 70 +++ .../runtime/memory/thread_mem_tracker_mgr.h | 215 +++++++ be/src/runtime/memory_scratch_sink.cpp | 2 +- be/src/runtime/mysql_table_sink.cpp | 8 +- be/src/runtime/mysql_table_sink.h | 2 - be/src/runtime/odbc_table_sink.cpp | 2 +- be/src/runtime/plan_fragment_executor.cpp | 6 +- be/src/runtime/qsorter.cpp | 4 +- be/src/runtime/result_file_sink.cpp | 2 +- be/src/runtime/result_sink.cpp | 6 +- be/src/runtime/row_batch.cpp | 9 +- be/src/runtime/row_batch.h | 9 - be/src/runtime/runtime_filter_mgr.cpp | 21 +- be/src/runtime/runtime_filter_mgr.h | 7 +- be/src/runtime/runtime_state.cpp | 43 +- be/src/runtime/runtime_state.h | 8 +- be/src/runtime/sorted_run_merger.cc | 6 +- be/src/runtime/spill_sorter.cc | 7 +- be/src/runtime/spill_sorter.h | 6 +- be/src/runtime/tablets_channel.cpp | 9 +- be/src/runtime/tablets_channel.h | 7 +- be/src/runtime/thread_context.cpp | 166 ++---- be/src/runtime/thread_context.h | 197 +++---- be/src/runtime/thread_mem_tracker_mgr.cpp | 83 --- be/src/runtime/thread_mem_tracker_mgr.h | 305 ---------- be/src/service/doris_main.cpp | 19 +- be/src/service/http_service.cpp | 2 +- be/src/service/internal_service.cpp | 83 +-- be/src/util/doris_metrics.cpp | 8 +- be/src/util/doris_metrics.h | 4 +- be/src/util/file_utils.cpp | 4 +- be/src/vec/columns/predicate_column.h | 2 +- be/src/vec/common/allocator.h | 10 +- be/src/vec/exec/file_scan_node.cpp | 17 +- be/src/vec/exec/file_scanner.cpp | 13 +- be/src/vec/exec/file_scanner.h | 1 - be/src/vec/exec/join/vhash_join_node.cpp | 24 +- be/src/vec/exec/join/vhash_join_node.h | 2 - be/src/vec/exec/vaggregation_node.cpp | 18 +- be/src/vec/exec/vaggregation_node.h | 2 +- be/src/vec/exec/vanalytic_eval_node.cpp | 19 +- be/src/vec/exec/vblocking_join_node.cpp | 8 +- be/src/vec/exec/vbroker_scan_node.cpp | 11 +- be/src/vec/exec/vcross_join_node.cpp | 11 +- be/src/vec/exec/vcross_join_node.h | 2 - be/src/vec/exec/ves_http_scan_node.cpp | 8 +- be/src/vec/exec/vexchange_node.cpp | 10 +- be/src/vec/exec/vmysql_scan_node.cpp | 6 +- be/src/vec/exec/vodbc_scan_node.cpp | 6 +- be/src/vec/exec/volap_scan_node.cpp | 27 +- be/src/vec/exec/volap_scan_node.h | 4 +- be/src/vec/exec/volap_scanner.cpp | 18 +- be/src/vec/exec/volap_scanner.h | 7 +- be/src/vec/exec/vschema_scan_node.cpp | 4 +- be/src/vec/exec/vset_operation_node.cpp | 14 +- be/src/vec/exec/vset_operation_node.h | 2 - be/src/vec/exec/vsort_exec_exprs.cpp | 9 +- be/src/vec/exec/vsort_exec_exprs.h | 3 +- be/src/vec/exec/vsort_node.cpp | 12 +- be/src/vec/exec/vsort_node.h | 2 - be/src/vec/exec/vtable_function_node.cpp | 2 +- be/src/vec/exec/vunion_node.cpp | 5 +- be/src/vec/exprs/vectorized_agg_fn.cpp | 5 +- be/src/vec/exprs/vectorized_agg_fn.h | 3 +- be/src/vec/exprs/vexpr.cpp | 4 +- be/src/vec/exprs/vexpr.h | 3 +- be/src/vec/exprs/vexpr_context.cpp | 16 +- be/src/vec/exprs/vexpr_context.h | 7 +- be/src/vec/runtime/vdata_stream_recvr.cpp | 32 +- be/src/vec/runtime/vdata_stream_recvr.h | 4 +- be/src/vec/runtime/vpartition_info.cpp | 5 +- be/src/vec/runtime/vpartition_info.h | 3 +- be/src/vec/sink/vdata_stream_sender.cpp | 19 +- be/src/vec/sink/vdata_stream_sender.h | 2 +- be/src/vec/sink/vmysql_table_sink.cpp | 8 +- be/src/vec/sink/vmysql_table_sink.h | 2 - be/src/vec/sink/vodbc_table_sink.cpp | 8 +- be/src/vec/sink/vodbc_table_sink.h | 2 - be/src/vec/sink/vresult_file_sink.cpp | 2 +- be/src/vec/sink/vresult_sink.cpp | 4 +- be/src/vec/sink/vtablet_sink.cpp | 11 +- be/test/exec/broker_scan_node_test.cpp | 2 +- be/test/exec/broker_scanner_test.cpp | 26 +- be/test/exec/es_http_scan_node_test.cpp | 2 +- be/test/exec/es_predicate_test.cpp | 4 +- be/test/exec/hash_table_test.cpp | 38 +- be/test/exec/json_scanner_test.cpp | 2 +- .../exec/json_scanner_with_jsonpath_test.cpp | 2 +- be/test/exec/multi_bytes_separator_test.cpp | 2 +- be/test/exec/orc_scanner_test.cpp | 11 +- be/test/exec/parquet_scanner_test.cpp | 2 +- be/test/exec/tablet_info_test.cpp | 2 +- be/test/exec/tablet_sink_test.cpp | 6 +- be/test/olap/aggregate_func_test.cpp | 16 +- be/test/olap/block_column_predicate_test.cpp | 6 +- .../bloom_filter_column_predicate_test.cpp | 6 +- be/test/olap/column_vector_test.cpp | 4 +- be/test/olap/comparison_predicate_test.cpp | 6 +- be/test/olap/delta_writer_test.cpp | 13 +- .../engine_storage_migration_task_test.cpp | 4 +- be/test/olap/in_list_predicate_test.cpp | 6 +- be/test/olap/key_coder_test.cpp | 4 +- be/test/olap/lru_cache_test.cpp | 5 +- be/test/olap/null_predicate_test.cpp | 6 +- be/test/olap/row_block_v2_test.cpp | 3 +- be/test/olap/row_cursor_test.cpp | 13 +- be/test/olap/rowset/beta_rowset_test.cpp | 4 +- .../segment_v2/binary_dict_page_test.cpp | 7 +- .../segment_v2/binary_plain_page_test.cpp | 4 +- .../segment_v2/binary_prefix_page_test.cpp | 4 +- .../segment_v2/bitshuffle_page_test.cpp | 7 +- .../segment_v2/bloom_filter_page_test.cpp | 4 +- .../segment_v2/column_reader_writer_test.cpp | 22 +- .../frame_of_reference_page_test.cpp | 7 +- .../rowset/segment_v2/plain_page_test.cpp | 7 +- .../olap/rowset/segment_v2/rle_page_test.cpp | 7 +- .../olap/rowset/segment_v2/segment_test.cpp | 16 +- be/test/olap/skiplist_test.cpp | 14 +- be/test/olap/storage_types_test.cpp | 10 +- be/test/olap/tablet_clone_test.cpp | 3 +- be/test/olap/tablet_cooldown_test.cpp | 3 +- be/test/olap/tablet_test.cpp | 17 +- be/test/runtime/array_test.cpp | 6 +- be/test/runtime/buffered_block_mgr2_test.cpp | 96 ++- .../runtime/buffered_tuple_stream2_test.cpp | 12 +- be/test/runtime/data_stream_test.cpp | 15 +- be/test/runtime/disk_io_mgr_test.cpp | 110 ++-- be/test/runtime/load_channel_mgr_test.cpp | 2 - be/test/runtime/mem_limit_test.cpp | 19 +- be/test/runtime/mem_pool_test.cpp | 22 +- be/test/runtime/memory_scratch_sink_test.cpp | 5 - be/test/runtime/string_buffer_test.cpp | 4 +- be/test/runtime/test_env.cc | 12 +- be/test/runtime/test_env.h | 4 - be/test/testutil/array_utils.cpp | 2 +- be/test/testutil/function_utils.cpp | 6 +- be/test/testutil/run_all_tests.cpp | 5 + be/test/tools/benchmark_tool.cpp | 13 +- be/test/util/array_parser_test.cpp | 7 +- be/test/util/arrow/arrow_row_batch_test.cpp | 2 +- be/test/util/arrow/arrow_work_flow_test.cpp | 8 +- .../util/tuple_row_zorder_compare_test.cpp | 6 +- be/test/vec/exec/vbroker_scan_node_test.cpp | 4 +- be/test/vec/exec/vbroker_scanner_test.cpp | 4 +- be/test/vec/exec/vjson_scanner_test.cpp | 2 +- be/test/vec/exec/vorc_scanner_test.cpp | 6 +- be/test/vec/exec/vparquet_scanner_test.cpp | 2 +- be/test/vec/exec/vtablet_sink_test.cpp | 4 +- be/test/vec/exprs/vexpr_test.cpp | 7 +- build-support/clang-format.sh | 3 +- docs/en/docs/admin-manual/config/be-config.md | 20 +- .../docs/admin-manual/config/be-config.md | 20 +- regression-test/conf/regression-conf.groovy | 4 +- 316 files changed, 2145 insertions(+), 4369 deletions(-) delete mode 100644 be/src/runtime/mem_tracker.cpp delete mode 100644 be/src/runtime/mem_tracker.h delete mode 100644 be/src/runtime/mem_tracker_task_pool.cpp delete mode 100644 be/src/runtime/mem_tracker_task_pool.h create mode 100644 be/src/runtime/memory/mem_tracker.cpp create mode 100644 be/src/runtime/memory/mem_tracker.h delete mode 100644 be/src/runtime/memory/mem_tracker_base.cpp delete mode 100644 be/src/runtime/memory/mem_tracker_base.h delete mode 100644 be/src/runtime/memory/mem_tracker_observe.cpp delete mode 100644 be/src/runtime/memory/mem_tracker_observe.h rename be/src/runtime/{ => memory}/tcmalloc_hook.h (68%) create mode 100644 be/src/runtime/memory/thread_mem_tracker_mgr.cpp create mode 100644 be/src/runtime/memory/thread_mem_tracker_mgr.h delete mode 100644 be/src/runtime/thread_mem_tracker_mgr.cpp delete mode 100644 be/src/runtime/thread_mem_tracker_mgr.h diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index 719ce3824d..76a1d72eba 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -1657,8 +1657,6 @@ void TaskWorkerPool::_random_sleep(int second) { } void TaskWorkerPool::_submit_table_compaction_worker_thread_callback() { - SCOPED_ATTACH_TASK_THREAD(ThreadContext::TaskType::COMPACTION, - StorageEngine::instance()->compaction_mem_tracker()); while (_is_work) { TAgentTaskRequest agent_task_req; TCompactionReq compaction_req; diff --git a/be/src/common/config.h b/be/src/common/config.h index 12e35d2574..ddca6bc447 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -634,7 +634,7 @@ CONF_Int32(aws_log_level, "3"); CONF_mInt32(remote_storage_read_buffer_mb, "16"); // Whether Hook TCmalloc new/delete, currently consume/release tls mem tracker in Hook. -CONF_Bool(track_new_delete, "true"); +CONF_Bool(enable_tcmalloc_hook, "true"); // If true, switch TLS MemTracker to count more detailed memory, // including caches such as ExecNode operators and TabletManager. @@ -647,26 +647,12 @@ CONF_Bool(track_new_delete, "true"); // 2. Consider using raw pointers for mem tracker in thread local CONF_Bool(memory_verbose_track, "false"); -// Default level of MemTracker to show in web page -// now MemTracker support two level: -// OVERVIEW: 0 -// TASK: 1 -// INSTANCE: 2 -// VERBOSE: 3 -// the level equal or lower than mem_tracker_level will show in web page -CONF_mInt16(mem_tracker_level, "0"); - // The minimum length when TCMalloc Hook consumes/releases MemTracker, consume size // smaller than this value will continue to accumulate. specified as number of bytes. // Decreasing this value will increase the frequency of consume/release. // Increasing this value will cause MemTracker statistics to be inaccurate. CONF_mInt32(mem_tracker_consume_min_size_bytes, "4194304"); -// When MemTracker is a negative value, it is considered that a memory leak has occurred, -// but the actual MemTracker records inaccurately will also cause a negative value, -// so this feature is in the experimental stage. -CONF_mBool(memory_leak_detection, "false"); - // The version information of the tablet will be stored in the memory // in an adjacency graph data structure. // And as the new version is written and the old version is deleted, diff --git a/be/src/exec/analytic_eval_node.cpp b/be/src/exec/analytic_eval_node.cpp index 7a104a724e..08d2dc2c35 100644 --- a/be/src/exec/analytic_eval_node.cpp +++ b/be/src/exec/analytic_eval_node.cpp @@ -142,21 +142,21 @@ Status AnalyticEvalNode::init(const TPlanNode& tnode, RuntimeState* state) { Status AnalyticEvalNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); DCHECK(child(0)->row_desc().is_prefix_of(row_desc())); _child_tuple_desc = child(0)->row_desc().tuple_descriptors()[0]; - _curr_tuple_pool.reset(new MemPool(mem_tracker().get())); - _prev_tuple_pool.reset(new MemPool(mem_tracker().get())); - _mem_pool.reset(new MemPool(mem_tracker().get())); + _curr_tuple_pool.reset(new MemPool(mem_tracker())); + _prev_tuple_pool.reset(new MemPool(mem_tracker())); + _mem_pool.reset(new MemPool(mem_tracker())); _evaluation_timer = ADD_TIMER(runtime_profile(), "EvaluationTime"); DCHECK_EQ(_result_tuple_desc->slots().size(), _evaluators.size()); for (int i = 0; i < _evaluators.size(); ++i) { doris_udf::FunctionContext* ctx; - RETURN_IF_ERROR(_evaluators[i]->prepare( - state, child(0)->row_desc(), _mem_pool.get(), _intermediate_tuple_desc->slots()[i], - _result_tuple_desc->slots()[i], mem_tracker(), &ctx)); + RETURN_IF_ERROR(_evaluators[i]->prepare(state, child(0)->row_desc(), _mem_pool.get(), + _intermediate_tuple_desc->slots()[i], + _result_tuple_desc->slots()[i], &ctx)); _fn_ctxs.push_back(ctx); state->obj_pool()->add(ctx); } @@ -169,14 +169,12 @@ Status AnalyticEvalNode::prepare(RuntimeState* state) { RowDescriptor cmp_row_desc(state->desc_tbl(), tuple_ids, std::vector(2, false)); if (_partition_by_eq_expr_ctx != nullptr) { - RETURN_IF_ERROR( - _partition_by_eq_expr_ctx->prepare(state, cmp_row_desc, expr_mem_tracker())); + RETURN_IF_ERROR(_partition_by_eq_expr_ctx->prepare(state, cmp_row_desc)); //AddExprCtxToFree(_partition_by_eq_expr_ctx); } if (_order_by_eq_expr_ctx != nullptr) { - RETURN_IF_ERROR( - _order_by_eq_expr_ctx->prepare(state, cmp_row_desc, expr_mem_tracker())); + RETURN_IF_ERROR(_order_by_eq_expr_ctx->prepare(state, cmp_row_desc)); //AddExprCtxToFree(_order_by_eq_expr_ctx); } } @@ -187,13 +185,12 @@ Status AnalyticEvalNode::prepare(RuntimeState* state) { Status AnalyticEvalNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); //RETURN_IF_ERROR(QueryMaintenance(state)); RETURN_IF_ERROR(child(0)->open(state)); - RETURN_IF_ERROR( - state->block_mgr2()->register_client(2, mem_tracker(), state, &_block_mgr_client)); + RETURN_IF_ERROR(state->block_mgr2()->register_client(2, state, &_block_mgr_client)); _input_stream.reset(new BufferedTupleStream2(state, child(0)->row_desc(), state->block_mgr2(), _block_mgr_client, false, true)); RETURN_IF_ERROR(_input_stream->init(id(), runtime_profile(), true)); @@ -205,7 +202,8 @@ Status AnalyticEvalNode::open(RuntimeState* state) { "Failed to acquire initial read buffer for analytic function " "evaluation. Reducing query concurrency or increasing the memory limit may " "help this query to complete successfully."); - RETURN_LIMIT_EXCEEDED(mem_tracker(), state, msg); + RETURN_LIMIT_EXCEEDED(thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker(), + state, msg); } DCHECK_EQ(_evaluators.size(), _fn_ctxs.size()); @@ -816,7 +814,7 @@ inline int64_t AnalyticEvalNode::num_output_rows_ready() const { Status AnalyticEvalNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); //RETURN_IF_ERROR(QueryMaintenance(state)); RETURN_IF_ERROR(state->check_query_state("Analytic eval, while get_next.")); diff --git a/be/src/exec/assert_num_rows_node.cpp b/be/src/exec/assert_num_rows_node.cpp index 3766d83ad3..6be5eca938 100644 --- a/be/src/exec/assert_num_rows_node.cpp +++ b/be/src/exec/assert_num_rows_node.cpp @@ -49,8 +49,8 @@ Status AssertNumRowsNode::prepare(RuntimeState* state) { Status AssertNumRowsNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // ISSUE-3435 RETURN_IF_ERROR(child(0)->open(state)); return Status::OK(); @@ -58,7 +58,7 @@ Status AssertNumRowsNode::open(RuntimeState* state) { Status AssertNumRowsNode::get_next(RuntimeState* state, RowBatch* output_batch, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); output_batch->reset(); child(0)->get_next(state, output_batch, eos); _num_rows_returned += output_batch->num_rows(); diff --git a/be/src/exec/base_scanner.cpp b/be/src/exec/base_scanner.cpp index ef85e56dd3..5808d35021 100644 --- a/be/src/exec/base_scanner.cpp +++ b/be/src/exec/base_scanner.cpp @@ -23,7 +23,6 @@ #include "exec/exec_node.h" #include "exprs/expr_context.h" #include "runtime/descriptors.h" -#include "runtime/mem_tracker.h" #include "runtime/raw_value.h" #include "runtime/runtime_state.h" #include "runtime/tuple.h" @@ -44,15 +43,7 @@ BaseScanner::BaseScanner(RuntimeState* state, RuntimeProfile* profile, _counter(counter), _src_tuple(nullptr), _src_tuple_row(nullptr), -#if BE_TEST - _mem_tracker(new MemTracker()), -#else - _mem_tracker(MemTracker::create_tracker( - -1, state->query_type() == TQueryType::LOAD - ? "BaseScanner:" + std::to_string(state->load_job_id()) - : "BaseScanner:Select")), -#endif - _mem_pool(std::make_unique(_mem_tracker.get())), + _mem_pool(std::make_unique()), _dest_tuple_desc(nullptr), _pre_filter_texprs(pre_filter_texprs), _strict_mode(false), @@ -62,8 +53,7 @@ BaseScanner::BaseScanner(RuntimeState* state, RuntimeProfile* profile, _read_timer(nullptr), _materialize_timer(nullptr), _success(false), - _scanner_eof(false) { -} + _scanner_eof(false) {} Status BaseScanner::open() { RETURN_IF_ERROR(init_expr_ctxes()); @@ -137,12 +127,12 @@ Status BaseScanner::init_expr_ctxes() { _vpre_filter_ctx_ptr.reset(new doris::vectorized::VExprContext*); RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree( _state->obj_pool(), _pre_filter_texprs[0], _vpre_filter_ctx_ptr.get())); - RETURN_IF_ERROR((*_vpre_filter_ctx_ptr)->prepare(_state, *_row_desc, _mem_tracker)); + RETURN_IF_ERROR((*_vpre_filter_ctx_ptr)->prepare(_state, *_row_desc)); RETURN_IF_ERROR((*_vpre_filter_ctx_ptr)->open(_state)); } else { RETURN_IF_ERROR(Expr::create_expr_trees(_state->obj_pool(), _pre_filter_texprs, &_pre_filter_ctxs)); - RETURN_IF_ERROR(Expr::prepare(_pre_filter_ctxs, _state, *_row_desc, _mem_tracker)); + RETURN_IF_ERROR(Expr::prepare(_pre_filter_ctxs, _state, *_row_desc)); RETURN_IF_ERROR(Expr::open(_pre_filter_ctxs, _state)); } } @@ -169,13 +159,13 @@ Status BaseScanner::init_expr_ctxes() { vectorized::VExprContext* ctx = nullptr; RETURN_IF_ERROR( vectorized::VExpr::create_expr_tree(_state->obj_pool(), it->second, &ctx)); - RETURN_IF_ERROR(ctx->prepare(_state, *_row_desc.get(), _mem_tracker)); + RETURN_IF_ERROR(ctx->prepare(_state, *_row_desc.get())); RETURN_IF_ERROR(ctx->open(_state)); _dest_vexpr_ctx.emplace_back(ctx); } else { ExprContext* ctx = nullptr; RETURN_IF_ERROR(Expr::create_expr_tree(_state->obj_pool(), it->second, &ctx)); - RETURN_IF_ERROR(ctx->prepare(_state, *_row_desc.get(), _mem_tracker)); + RETURN_IF_ERROR(ctx->prepare(_state, *_row_desc.get())); RETURN_IF_ERROR(ctx->open(_state)); _dest_expr_ctx.emplace_back(ctx); } diff --git a/be/src/exec/base_scanner.h b/be/src/exec/base_scanner.h index 7efdee0f2f..b59fa113a2 100644 --- a/be/src/exec/base_scanner.h +++ b/be/src/exec/base_scanner.h @@ -30,7 +30,6 @@ class Tuple; class TupleDescriptor; class TupleRow; class RowDescriptor; -class MemTracker; class RuntimeState; class ExprContext; @@ -110,7 +109,6 @@ protected: Tuple* _src_tuple; TupleRow* _src_tuple_row; - std::shared_ptr _mem_tracker; // Mem pool used to allocate _src_tuple and _src_tuple_row std::unique_ptr _mem_pool; diff --git a/be/src/exec/blocking_join_node.cpp b/be/src/exec/blocking_join_node.cpp index 3aa1794a7a..ed533cc21f 100644 --- a/be/src/exec/blocking_join_node.cpp +++ b/be/src/exec/blocking_join_node.cpp @@ -51,9 +51,9 @@ BlockingJoinNode::~BlockingJoinNode() { Status BlockingJoinNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); - _build_pool.reset(new MemPool(mem_tracker().get())); + _build_pool.reset(new MemPool(mem_tracker())); _build_timer = ADD_TIMER(runtime_profile(), "BuildTime"); _left_child_timer = ADD_TIMER(runtime_profile(), "LeftChildTime"); _build_row_counter = ADD_COUNTER(runtime_profile(), "BuildRows", TUnit::UNIT); @@ -89,14 +89,14 @@ Status BlockingJoinNode::close(RuntimeState* state) { } void BlockingJoinNode::build_side_thread(RuntimeState* state, std::promise* status) { - SCOPED_ATTACH_TASK_THREAD(state, mem_tracker()); + SCOPED_ATTACH_TASK(state); status->set_value(construct_build_side(state)); } Status BlockingJoinNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // RETURN_IF_ERROR(Expr::open(_conjuncts, state)); RETURN_IF_CANCELLED(state); diff --git a/be/src/exec/broker_scan_node.cpp b/be/src/exec/broker_scan_node.cpp index 49fed4c3ce..9bc920a9b6 100644 --- a/be/src/exec/broker_scan_node.cpp +++ b/be/src/exec/broker_scan_node.cpp @@ -63,7 +63,7 @@ Status BrokerScanNode::init(const TPlanNode& tnode, RuntimeState* state) { Status BrokerScanNode::prepare(RuntimeState* state) { VLOG_QUERY << "BrokerScanNode prepare"; RETURN_IF_ERROR(ScanNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // get tuple desc _runtime_state = state; _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); @@ -87,8 +87,8 @@ Status BrokerScanNode::prepare(RuntimeState* state) { Status BrokerScanNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(start_scanners()); @@ -107,7 +107,7 @@ Status BrokerScanNode::start_scanners() { Status BrokerScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // check if CANCELLED. if (state->is_cancelled()) { std::unique_lock l(_batch_queue_lock); @@ -343,7 +343,10 @@ Status BrokerScanNode::scanner_scan(const TBrokerScanRange& scan_range, // 1. too many batches in queue, or // 2. at least one batch in queue and memory exceed limit. (_batch_queue.size() >= _max_buffered_batches || - (mem_tracker()->any_limit_exceeded() && !_batch_queue.empty()))) { + (thread_context() + ->_thread_mem_tracker_mgr->limiter_mem_tracker() + ->any_limit_exceeded() && + !_batch_queue.empty()))) { _queue_writer_cond.wait_for(l, std::chrono::seconds(1)); } // Process already set failed, so we just return OK diff --git a/be/src/exec/cross_join_node.cpp b/be/src/exec/cross_join_node.cpp index 8488ebfc49..c80560bf82 100644 --- a/be/src/exec/cross_join_node.cpp +++ b/be/src/exec/cross_join_node.cpp @@ -33,7 +33,7 @@ CrossJoinNode::CrossJoinNode(ObjectPool* pool, const TPlanNode& tnode, const Des Status CrossJoinNode::prepare(RuntimeState* state) { DCHECK(_join_op == TJoinOp::CROSS_JOIN); RETURN_IF_ERROR(BlockingJoinNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); _build_batch_pool.reset(new ObjectPool()); return Status::OK(); } @@ -52,7 +52,7 @@ Status CrossJoinNode::close(RuntimeState* state) { Status CrossJoinNode::construct_build_side(RuntimeState* state) { // Do a full scan of child(1) and store all build row batches. RETURN_IF_ERROR(child(1)->open(state)); - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB("Cross join, while getting next from child 1"); + SCOPED_UPDATE_MEM_EXCEED_CALL_BACK("Cross join, while getting next from child 1"); while (true) { RowBatch* batch = @@ -87,7 +87,7 @@ Status CrossJoinNode::get_next(RuntimeState* state, RowBatch* output_batch, bool // TOOD(zhaochun) // RETURN_IF_ERROR(state->check_query_state()); SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); if (reached_limit() || _eos) { *eos = true; diff --git a/be/src/exec/csv_scan_node.cpp b/be/src/exec/csv_scan_node.cpp index 66169935d7..f84c25cbba 100644 --- a/be/src/exec/csv_scan_node.cpp +++ b/be/src/exec/csv_scan_node.cpp @@ -122,7 +122,7 @@ Status CsvScanNode::prepare(RuntimeState* state) { } RETURN_IF_ERROR(ScanNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // add timer _split_check_timer = ADD_TIMER(_runtime_profile, "split check timer"); @@ -206,8 +206,8 @@ Status CsvScanNode::prepare(RuntimeState* state) { Status CsvScanNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); VLOG_CRITICAL << "CsvScanNode::Open"; if (nullptr == state) { @@ -238,7 +238,7 @@ Status CsvScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos RETURN_IF_CANCELLED(state); SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); if (reached_limit()) { *eos = true; diff --git a/be/src/exec/data_sink.cpp b/be/src/exec/data_sink.cpp index 1ccf23f0ec..8c581181ca 100644 --- a/be/src/exec/data_sink.cpp +++ b/be/src/exec/data_sink.cpp @@ -213,9 +213,6 @@ Status DataSink::init(const TDataSink& thrift_sink) { } Status DataSink::prepare(RuntimeState* state) { - _expr_mem_tracker = - MemTracker::create_tracker(-1, _name + ":Expr:" + std::to_string(state->load_job_id()), - state->instance_mem_tracker()); return Status::OK(); } diff --git a/be/src/exec/data_sink.h b/be/src/exec/data_sink.h index e079877c2e..952e51e5cd 100644 --- a/be/src/exec/data_sink.h +++ b/be/src/exec/data_sink.h @@ -26,7 +26,6 @@ #include "gen_cpp/DataSinks_types.h" #include "gen_cpp/Exprs_types.h" #include "runtime/descriptors.h" -#include "runtime/mem_tracker.h" #include "runtime/query_statistics.h" #include "util/telemetry/telemetry.h" @@ -71,7 +70,6 @@ public: // It must be okay to call this multiple times. Subsequent calls should // be ignored. virtual Status close(RuntimeState* state, Status exec_status) { - _expr_mem_tracker.reset(); _closed = true; return Status::OK(); } @@ -101,7 +99,6 @@ protected: // Set to true after close() has been called. subclasses should check and set this in // close(). bool _closed; - std::shared_ptr _expr_mem_tracker; std::string _name; // Maybe this will be transferred to BufferControlBlock. diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index 73f648b4a7..8dd3a2c9e1 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -28,7 +28,7 @@ #include "rapidjson/stringbuffer.h" #include "rapidjson/writer.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" +#include "runtime/memory/mem_tracker.h" #include "runtime/string_value.h" #include "util/string_parser.hpp" #include "vec/runtime/vdatetime_value.h" @@ -353,7 +353,9 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple, if (UNLIKELY(buffer == nullptr)) { std::string details = strings::Substitute(ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", len, "string slot"); - RETURN_LIMIT_EXCEEDED(tuple_pool->mem_tracker(), nullptr, details, len, rst); + RETURN_LIMIT_EXCEEDED( + thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker(), nullptr, + details, len, rst); } memcpy(buffer, _id.data(), len); reinterpret_cast(slot)->ptr = buffer; @@ -413,7 +415,9 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple, if (UNLIKELY(buffer == nullptr)) { std::string details = strings::Substitute( ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", val_size, "string slot"); - RETURN_LIMIT_EXCEEDED(tuple_pool->mem_tracker(), nullptr, details, val_size, rst); + RETURN_LIMIT_EXCEEDED( + thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker(), nullptr, + details, val_size, rst); } memcpy(buffer, val.data(), val_size); reinterpret_cast(slot)->ptr = buffer; diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index 62d219d13d..acfc14bb7d 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -66,7 +66,7 @@ Status EsHttpScanNode::init(const TPlanNode& tnode, RuntimeState* state) { Status EsHttpScanNode::prepare(RuntimeState* state) { VLOG_QUERY << "EsHttpScanNode prepare"; RETURN_IF_ERROR(ScanNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); _scanner_profile.reset(new RuntimeProfile("EsHttpScanNode")); runtime_profile()->add_child(_scanner_profile.get(), true, nullptr); @@ -121,8 +121,8 @@ Status EsHttpScanNode::build_conjuncts_list() { Status EsHttpScanNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); // if conjunct is constant, compute direct and set eos = true @@ -195,7 +195,7 @@ Status EsHttpScanNode::collect_scanners_status() { Status EsHttpScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); if (state->is_cancelled()) { std::unique_lock l(_batch_queue_lock); if (update_status(Status::Cancelled("Cancelled"))) { @@ -418,7 +418,7 @@ static std::string get_host_port(const std::vector& es_hosts) { } void EsHttpScanNode::scanner_worker(int start_idx, int length, std::promise& p_status) { - SCOPED_ATTACH_TASK_THREAD(_runtime_state, mem_tracker()); + SCOPED_ATTACH_TASK(_runtime_state); // Clone expr context std::vector scanner_expr_ctxs; DCHECK(start_idx < length); diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index c6b7dac2bc..cb9747673f 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -40,6 +40,7 @@ EsHttpScanner::EsHttpScanner(RuntimeState* state, RuntimeProfile* profile, Tuple _next_range(0), _line_eof(false), _batch_eof(false), + _mem_pool(new MemPool()), _tuple_desc(nullptr), _counter(counter), _es_reader(nullptr), @@ -47,15 +48,7 @@ EsHttpScanner::EsHttpScanner(RuntimeState* state, RuntimeProfile* profile, Tuple _doc_value_mode(doc_value_mode), _rows_read_counter(nullptr), _read_timer(nullptr), - _materialize_timer(nullptr) { -#ifndef BE_TEST - _mem_pool.reset(new MemPool(state->query_type() == TQueryType::LOAD - ? "EsHttpScanner:" + std::to_string(state->load_job_id()) - : "EsHttpScanner:Select")); -#else - _mem_pool.reset(new MemPool()); -#endif -} + _materialize_timer(nullptr) {} EsHttpScanner::~EsHttpScanner() { close(); diff --git a/be/src/exec/except_node.cpp b/be/src/exec/except_node.cpp index 03fc7b229f..8084fb47f4 100644 --- a/be/src/exec/except_node.cpp +++ b/be/src/exec/except_node.cpp @@ -39,8 +39,8 @@ Status ExceptNode::init(const TPlanNode& tnode, RuntimeState* state) { } Status ExceptNode::open(RuntimeState* state) { - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB("Except Node, while probing the hash table."); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); + SCOPED_UPDATE_MEM_EXCEED_CALL_BACK("Except Node, while probing the hash table."); RETURN_IF_ERROR(SetOperationNode::open(state)); // if a table is empty, the result must be empty if (_hash_tbl->size() == 0) { @@ -87,7 +87,7 @@ Status ExceptNode::open(RuntimeState* state) { Status ExceptNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* eos) { RETURN_IF_CANCELLED(state); SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); *eos = true; if (reached_limit()) { return Status::OK(); diff --git a/be/src/exec/exchange_node.cpp b/be/src/exec/exchange_node.cpp index e510f7af28..410ed4ace1 100644 --- a/be/src/exec/exchange_node.cpp +++ b/be/src/exec/exchange_node.cpp @@ -60,7 +60,7 @@ Status ExchangeNode::init(const TPlanNode& tnode, RuntimeState* state) { Status ExchangeNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); _convert_row_batch_timer = ADD_TIMER(runtime_profile(), "ConvertRowBatchTime"); // TODO: figure out appropriate buffer size DCHECK_GT(_num_senders, 0); @@ -70,8 +70,7 @@ Status ExchangeNode::prepare(RuntimeState* state) { config::exchg_node_buffer_size_bytes, _runtime_profile.get(), _is_merging, _sub_plan_query_statistics_recvr); if (_is_merging) { - RETURN_IF_ERROR(_sort_exec_exprs.prepare(state, _row_descriptor, _row_descriptor, - expr_mem_tracker())); + RETURN_IF_ERROR(_sort_exec_exprs.prepare(state, _row_descriptor, _row_descriptor)); // AddExprCtxsToFree(_sort_exec_exprs); } return Status::OK(); @@ -79,9 +78,8 @@ Status ExchangeNode::prepare(RuntimeState* state) { Status ExchangeNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); - ADD_THREAD_LOCAL_MEM_TRACKER(_stream_recvr->mem_tracker()); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); if (_is_merging) { RETURN_IF_ERROR(_sort_exec_exprs.open(state)); TupleRowComparator less_than(_sort_exec_exprs, _is_asc_order, _nulls_first); @@ -134,7 +132,7 @@ Status ExchangeNode::fill_input_row_batch(RuntimeState* state) { Status ExchangeNode::get_next(RuntimeState* state, RowBatch* output_batch, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); if (reached_limit()) { _stream_recvr->transfer_all_resources(output_batch); diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index 7f6bdf886a..a64011b14d 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -54,7 +54,7 @@ #include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/initial_reservations.h" -#include "runtime/mem_tracker.h" +#include "runtime/memory/mem_tracker.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" #include "util/debug_util.h" @@ -169,7 +169,7 @@ void ExecNode::push_down_predicate(RuntimeState* state, std::list* if ((*iter)->root()->is_bound(&_tuple_ids)) { // LOG(INFO) << "push down success expr is " << (*iter)->debug_string() // << " and node is " << debug_string(); - (*iter)->prepare(state, row_desc(), _expr_mem_tracker); + (*iter)->prepare(state, row_desc()); (*iter)->open(state); _conjunct_ctxs.push_back(*iter); iter = expr_ctxs->erase(iter); @@ -206,17 +206,14 @@ Status ExecNode::prepare(RuntimeState* state) { std::bind(&RuntimeProfile::units_per_second, _rows_returned_counter, runtime_profile()->total_time_counter()), ""); - _mem_tracker = MemTracker::create_tracker(-1, "ExecNode:" + _runtime_profile->name(), - state->instance_mem_tracker(), - MemTrackerLevel::VERBOSE, _runtime_profile.get()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); - _expr_mem_tracker = MemTracker::create_tracker(-1, "ExecNode:Exprs:" + _runtime_profile->name(), - _mem_tracker); + _mem_tracker = std::make_unique("ExecNode:" + _runtime_profile->name(), nullptr, + _runtime_profile.get()); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); if (_vconjunct_ctx_ptr) { - RETURN_IF_ERROR((*_vconjunct_ctx_ptr)->prepare(state, row_desc(), expr_mem_tracker())); + RETURN_IF_ERROR((*_vconjunct_ctx_ptr)->prepare(state, row_desc())); } - RETURN_IF_ERROR(Expr::prepare(_conjunct_ctxs, state, row_desc(), expr_mem_tracker())); + RETURN_IF_ERROR(Expr::prepare(_conjunct_ctxs, state, row_desc())); // TODO(zc): // AddExprCtxsToFree(_conjunct_ctxs); @@ -228,7 +225,7 @@ Status ExecNode::prepare(RuntimeState* state) { } Status ExecNode::open(RuntimeState* state) { - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); if (_vconjunct_ctx_ptr) { RETURN_IF_ERROR((*_vconjunct_ctx_ptr)->open(state)); } @@ -700,7 +697,7 @@ Status ExecNode::claim_buffer_reservation(RuntimeState* state) { ss << print_plan_node_type(_type) << " id=" << _id << " ptr=" << this; RETURN_IF_ERROR(buffer_pool->RegisterClient(ss.str(), state->instance_buffer_reservation(), - mem_tracker(), buffer_pool->GetSystemBytesLimit(), + buffer_pool->GetSystemBytesLimit(), runtime_profile(), &_buffer_pool_client)); state->initial_reservations()->Claim(&_buffer_pool_client, _resource_profile.min_reservation); diff --git a/be/src/exec/exec_node.h b/be/src/exec/exec_node.h index 1f386ac1d4..527b294632 100644 --- a/be/src/exec/exec_node.h +++ b/be/src/exec/exec_node.h @@ -188,9 +188,7 @@ public: RuntimeProfile* runtime_profile() const { return _runtime_profile.get(); } RuntimeProfile::Counter* memory_used_counter() const { return _memory_used_counter; } - std::shared_ptr mem_tracker() const { return _mem_tracker; } - - std::shared_ptr expr_mem_tracker() const { return _expr_mem_tracker; } + MemTracker* mem_tracker() const { return _mem_tracker.get(); } OpentelemetrySpan get_next_span() { return _get_next_span; } @@ -291,9 +289,7 @@ protected: std::unique_ptr _runtime_profile; /// Account for peak memory used by this node - std::shared_ptr _mem_tracker; - // MemTracker used by all Expr. - std::shared_ptr _expr_mem_tracker; + std::unique_ptr _mem_tracker; RuntimeProfile::Counter* _rows_returned_counter; RuntimeProfile::Counter* _rows_returned_rate; diff --git a/be/src/exec/hash_join_node.cpp b/be/src/exec/hash_join_node.cpp index 383007d112..c642335924 100644 --- a/be/src/exec/hash_join_node.cpp +++ b/be/src/exec/hash_join_node.cpp @@ -97,9 +97,9 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { Status HashJoinNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); - _build_pool.reset(new MemPool(mem_tracker().get())); + _build_pool.reset(new MemPool(mem_tracker())); _build_timer = ADD_TIMER(runtime_profile(), "BuildTime"); _push_down_timer = ADD_TIMER(runtime_profile(), "PushDownTime"); _push_compute_timer = ADD_TIMER(runtime_profile(), "PushDownComputeTime"); @@ -113,14 +113,11 @@ Status HashJoinNode::prepare(RuntimeState* state) { _hash_table_list_max_size = ADD_COUNTER(runtime_profile(), "HashTableMaxList", TUnit::UNIT); // build and probe exprs are evaluated in the context of the rows produced by our // right and left children, respectively - RETURN_IF_ERROR( - Expr::prepare(_build_expr_ctxs, state, child(1)->row_desc(), expr_mem_tracker())); - RETURN_IF_ERROR( - Expr::prepare(_probe_expr_ctxs, state, child(0)->row_desc(), expr_mem_tracker())); + RETURN_IF_ERROR(Expr::prepare(_build_expr_ctxs, state, child(1)->row_desc())); + RETURN_IF_ERROR(Expr::prepare(_probe_expr_ctxs, state, child(0)->row_desc())); // _other_join_conjuncts are evaluated in the context of the rows produced by this node - RETURN_IF_ERROR( - Expr::prepare(_other_join_conjunct_ctxs, state, _row_descriptor, expr_mem_tracker())); + RETURN_IF_ERROR(Expr::prepare(_other_join_conjunct_ctxs, state, _row_descriptor)); _result_tuple_row_size = _row_descriptor.tuple_descriptors().size() * sizeof(Tuple*); @@ -147,7 +144,7 @@ Status HashJoinNode::prepare(RuntimeState* state) { (std::find(_is_null_safe_eq_join.begin(), _is_null_safe_eq_join.end(), true) != _is_null_safe_eq_join.end()); _hash_tbl.reset(new HashTable(_build_expr_ctxs, _probe_expr_ctxs, _build_tuple_size, - stores_nulls, _is_null_safe_eq_join, id(), mem_tracker(), + stores_nulls, _is_null_safe_eq_join, id(), state->batch_size() * 2)); _probe_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); @@ -178,7 +175,7 @@ Status HashJoinNode::close(RuntimeState* state) { } void HashJoinNode::build_side_thread(RuntimeState* state, std::promise* status) { - SCOPED_ATTACH_TASK_THREAD(state, mem_tracker()); + SCOPED_ATTACH_TASK(state); status->set_value(construct_hash_table(state)); } @@ -187,7 +184,7 @@ Status HashJoinNode::construct_hash_table(RuntimeState* state) { // The hash join node needs to keep in memory all build tuples, including the tuple // row ptrs. The row ptrs are copied into the hash table's internal structure so they // don't need to be stored in the _build_pool. - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB("Hash join, while constructing the hash table."); + SCOPED_UPDATE_MEM_EXCEED_CALL_BACK("Hash join, while constructing the hash table."); RowBatch build_batch(child(1)->row_desc(), state->batch_size()); RETURN_IF_ERROR(child(1)->open(state)); @@ -220,7 +217,7 @@ Status HashJoinNode::construct_hash_table(RuntimeState* state) { Status HashJoinNode::open(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::open(state)); SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(Expr::open(_build_expr_ctxs, state)); RETURN_IF_ERROR(Expr::open(_probe_expr_ctxs, state)); @@ -304,9 +301,9 @@ Status HashJoinNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* eo // In most cases, no additional memory overhead will be applied for at this stage, // but if the expression calculation in this node needs to apply for additional memory, // it may cause the memory to exceed the limit. - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB("Hash join, while execute get_next."); + SCOPED_UPDATE_MEM_EXCEED_CALL_BACK("Hash join, while execute get_next."); SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); if (reached_limit()) { *eos = true; diff --git a/be/src/exec/hash_table.cpp b/be/src/exec/hash_table.cpp index cd32d3fb36..b50b03460e 100644 --- a/be/src/exec/hash_table.cpp +++ b/be/src/exec/hash_table.cpp @@ -22,7 +22,7 @@ #include "exprs/expr.h" #include "exprs/expr_context.h" -#include "runtime/mem_tracker.h" +#include "runtime/memory/mem_tracker.h" #include "runtime/raw_value.h" namespace doris { @@ -30,7 +30,7 @@ namespace doris { HashTable::HashTable(const std::vector& build_expr_ctxs, const std::vector& probe_expr_ctxs, int num_build_tuples, bool stores_nulls, const std::vector& finds_nulls, int32_t initial_seed, - const std::shared_ptr& mem_tracker, int64_t num_buckets) + int64_t num_buckets) : _build_expr_ctxs(build_expr_ctxs), _probe_expr_ctxs(probe_expr_ctxs), _num_build_tuples(num_build_tuples), @@ -47,8 +47,7 @@ HashTable::HashTable(const std::vector& build_expr_ctxs, DCHECK_EQ(_build_expr_ctxs.size(), _probe_expr_ctxs.size()); DCHECK_EQ((num_buckets & (num_buckets - 1)), 0) << "num_buckets must be a power of 2"; - _mem_tracker = - MemTracker::create_virtual_tracker(-1, mem_tracker->label() + "HashTable", mem_tracker); + _mem_tracker = std::make_unique("HashTable"); _buckets.resize(num_buckets); _num_buckets = num_buckets; _num_buckets_till_resize = MAX_BUCKET_OCCUPANCY_FRACTION * _num_buckets; @@ -176,11 +175,13 @@ Status HashTable::resize_buckets(int64_t num_buckets) { int64_t old_num_buckets = _num_buckets; int64_t delta_bytes = (num_buckets - old_num_buckets) * sizeof(Bucket); - Status st = _mem_tracker->try_consume(delta_bytes); + Status st = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit( + delta_bytes); if (!st) { LOG_EVERY_N(WARNING, 100) << "resize bucket failed: " << st.to_string(); return st; } + _mem_tracker->consume(delta_bytes); _buckets.resize(num_buckets); diff --git a/be/src/exec/hash_table.h b/be/src/exec/hash_table.h index b2e8b73a66..15cd9c67f9 100644 --- a/be/src/exec/hash_table.h +++ b/be/src/exec/hash_table.h @@ -90,8 +90,7 @@ public: // - initial_seed: Initial seed value to use when computing hashes for rows HashTable(const std::vector& build_exprs, const std::vector& probe_exprs, int num_build_tuples, bool stores_nulls, - const std::vector& finds_nulls, int32_t initial_seed, - const std::shared_ptr& mem_tracker, int64_t num_buckets); + const std::vector& finds_nulls, int32_t initial_seed, int64_t num_buckets); ~HashTable(); @@ -197,6 +196,8 @@ public: std::pair minmax_node(); + MemTracker* mem_tracker() { return _mem_tracker.get(); } + // Load factor that will trigger growing the hash table on insert. This is // defined as the number of non-empty buckets / total_buckets static constexpr float MAX_BUCKET_OCCUPANCY_FRACTION = 0.75f; @@ -448,7 +449,7 @@ private: // total capacity int64_t _total_capacity; - std::shared_ptr _mem_tracker; + std::unique_ptr _mem_tracker; std::vector _buckets; diff --git a/be/src/exec/intersect_node.cpp b/be/src/exec/intersect_node.cpp index 68fa46c288..c897945fed 100644 --- a/be/src/exec/intersect_node.cpp +++ b/be/src/exec/intersect_node.cpp @@ -43,8 +43,8 @@ Status IntersectNode::init(const TPlanNode& tnode, RuntimeState* state) { // 2 probe with child(1), then filter the hash table and find the matched item, use them to rebuild a hash table // repeat [2] this for all the rest child Status IntersectNode::open(RuntimeState* state) { - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB("Intersect Node, while probing the hash table."); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); + SCOPED_UPDATE_MEM_EXCEED_CALL_BACK("Intersect Node, while probing the hash table."); RETURN_IF_ERROR(SetOperationNode::open(state)); // if a table is empty, the result must be empty if (_hash_tbl->size() == 0) { @@ -86,7 +86,7 @@ Status IntersectNode::open(RuntimeState* state) { Status IntersectNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* eos) { RETURN_IF_CANCELLED(state); SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); *eos = true; if (reached_limit()) { return Status::OK(); diff --git a/be/src/exec/merge_node.cpp b/be/src/exec/merge_node.cpp index f57fa38686..07d3f21e80 100644 --- a/be/src/exec/merge_node.cpp +++ b/be/src/exec/merge_node.cpp @@ -63,14 +63,13 @@ Status MergeNode::init(const TPlanNode& tnode, RuntimeState* state) { Status MergeNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); DCHECK(_tuple_desc != nullptr); // Prepare const expr lists. for (int i = 0; i < _const_result_expr_ctx_lists.size(); ++i) { - RETURN_IF_ERROR(Expr::prepare(_const_result_expr_ctx_lists[i], state, row_desc(), - expr_mem_tracker())); + RETURN_IF_ERROR(Expr::prepare(_const_result_expr_ctx_lists[i], state, row_desc())); DCHECK_EQ(_const_result_expr_ctx_lists[i].size(), _tuple_desc->slots().size()); } @@ -84,8 +83,7 @@ Status MergeNode::prepare(RuntimeState* state) { // Prepare result expr lists. for (int i = 0; i < _result_expr_ctx_lists.size(); ++i) { - RETURN_IF_ERROR(Expr::prepare(_result_expr_ctx_lists[i], state, child(i)->row_desc(), - expr_mem_tracker())); + RETURN_IF_ERROR(Expr::prepare(_result_expr_ctx_lists[i], state, child(i)->row_desc())); // DCHECK_EQ(_result_expr_ctx_lists[i].size(), _tuple_desc->slots().size()); DCHECK_EQ(_result_expr_ctx_lists[i].size(), _materialized_slots.size()); } @@ -94,8 +92,8 @@ Status MergeNode::prepare(RuntimeState* state) { } Status MergeNode::open(RuntimeState* state) { - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // Prepare const expr lists. for (int i = 0; i < _const_result_expr_ctx_lists.size(); ++i) { RETURN_IF_ERROR(Expr::open(_const_result_expr_ctx_lists[i], state)); @@ -112,7 +110,7 @@ Status MergeNode::open(RuntimeState* state) { Status MergeNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { RETURN_IF_CANCELLED(state); SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // Create new tuple buffer for row_batch. int tuple_buffer_size = row_batch->capacity() * _tuple_desc->byte_size(); void* tuple_buffer = row_batch->tuple_data_pool()->allocate(tuple_buffer_size); diff --git a/be/src/exec/mysql_scan_node.cpp b/be/src/exec/mysql_scan_node.cpp index a136c5dc35..5df00ee9ac 100644 --- a/be/src/exec/mysql_scan_node.cpp +++ b/be/src/exec/mysql_scan_node.cpp @@ -52,7 +52,7 @@ Status MysqlScanNode::prepare(RuntimeState* state) { } RETURN_IF_ERROR(ScanNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // get tuple desc _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); @@ -82,7 +82,7 @@ Status MysqlScanNode::prepare(RuntimeState* state) { return Status::InternalError("new a mysql scanner failed."); } - _tuple_pool.reset(new (std::nothrow) MemPool("MysqlScanNode")); + _tuple_pool.reset(new (std::nothrow) MemPool()); if (_tuple_pool.get() == nullptr) { return Status::InternalError("new a mem pool failed."); @@ -101,8 +101,8 @@ Status MysqlScanNode::prepare(RuntimeState* state) { Status MysqlScanNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); VLOG_CRITICAL << "MysqlScanNode::Open"; if (nullptr == state) { @@ -157,7 +157,7 @@ Status MysqlScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* e RETURN_IF_CANCELLED(state); SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // create new tuple buffer for row_batch int tuple_buffer_size = row_batch->capacity() * _tuple_desc->byte_size(); diff --git a/be/src/exec/odbc_scan_node.cpp b/be/src/exec/odbc_scan_node.cpp index 4e27864eda..8e5837a957 100644 --- a/be/src/exec/odbc_scan_node.cpp +++ b/be/src/exec/odbc_scan_node.cpp @@ -54,7 +54,7 @@ Status OdbcScanNode::prepare(RuntimeState* state) { } RETURN_IF_ERROR(ScanNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // get tuple desc _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); @@ -74,7 +74,7 @@ Status OdbcScanNode::prepare(RuntimeState* state) { return Status::InternalError("new a odbc scanner failed."); } - _tuple_pool.reset(new (std::nothrow) MemPool("OdbcScanNode")); + _tuple_pool.reset(new (std::nothrow) MemPool()); if (_tuple_pool.get() == nullptr) { return Status::InternalError("new a mem pool failed."); @@ -93,8 +93,8 @@ Status OdbcScanNode::prepare(RuntimeState* state) { Status OdbcScanNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); VLOG_CRITICAL << _scan_node_type << "::Open"; if (nullptr == state) { @@ -139,7 +139,7 @@ Status OdbcScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eo RETURN_IF_CANCELLED(state); SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); if (reached_limit()) { *eos = true; diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index 0c33a44aa8..ce3486a56d 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -179,7 +179,7 @@ void OlapScanNode::_init_counter(RuntimeState* state) { Status OlapScanNode::prepare(RuntimeState* state) { init_scan_profile(); RETURN_IF_ERROR(ScanNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // create scanner profile // create timer _tablet_counter = ADD_COUNTER(runtime_profile(), "TabletCount ", TUnit::UNIT); @@ -190,8 +190,7 @@ Status OlapScanNode::prepare(RuntimeState* state) { _init_counter(state); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); - _scanner_mem_tracker = MemTracker::create_tracker(state->instance_mem_tracker()->limit(), - "Scanners", mem_tracker()); + _scanner_mem_tracker = std::make_unique("Scanners"); if (_tuple_desc == nullptr) { // TODO: make sure we print all available diagnostic output to our error log @@ -230,9 +229,9 @@ Status OlapScanNode::prepare(RuntimeState* state) { Status OlapScanNode::open(RuntimeState* state) { VLOG_CRITICAL << "OlapScanNode::Open"; SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); _resource_info = ResourceTls::get_resource_tls(); @@ -258,7 +257,7 @@ Status OlapScanNode::open(RuntimeState* state) { _runtime_filter_ctxs[i].runtimefilter = runtime_filter; for (auto ctx : expr_context) { - ctx->prepare(state, row_desc(), _expr_mem_tracker); + ctx->prepare(state, row_desc()); ctx->open(state); int index = _conjunct_ctxs.size(); _conjunct_ctxs.push_back(ctx); @@ -273,7 +272,7 @@ Status OlapScanNode::open(RuntimeState* state) { Status OlapScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // check if Canceled. if (state->is_cancelled()) { @@ -939,7 +938,7 @@ Status OlapScanNode::start_scan_thread(RuntimeState* state) { } OlapScanner* scanner = new OlapScanner(state, this, _olap_scan_node.is_preaggregation, - _need_agg_finalize, *scan_range, _scanner_mem_tracker); + _need_agg_finalize, *scan_range, _scanner_mem_tracker.get()); // add scanner to pool before doing prepare. // so that scanner can be automatically deconstructed if prepare failed. _scanner_pool.add(scanner); @@ -1479,7 +1478,7 @@ Status OlapScanNode::normalize_bloom_filter_predicate(SlotDescriptor* slot) { void OlapScanNode::transfer_thread(RuntimeState* state) { // scanner open pushdown to scanThread - SCOPED_ATTACH_TASK_THREAD(state, mem_tracker()); + SCOPED_ATTACH_TASK(state); Status status = Status::OK(); for (auto scanner : _olap_scanners) { status = Expr::clone_if_not_exists(_conjunct_ctxs, state, scanner->conjunct_ctxs()); @@ -1507,7 +1506,6 @@ void OlapScanNode::transfer_thread(RuntimeState* state) { _nice = 18 + std::max(0, 2 - (int)_olap_scanners.size() / 5); std::list olap_scanners; - int64_t mem_limit = _scanner_mem_tracker->limit(); int64_t mem_consume = _scanner_mem_tracker->consumption(); int max_thread = _max_materialized_row_batches; if (config::doris_scanner_row_num > state->batch_size()) { @@ -1532,7 +1530,7 @@ void OlapScanNode::transfer_thread(RuntimeState* state) { size_t thread_slot_num = 0; mem_consume = _scanner_mem_tracker->consumption(); // check limit for total memory and _scan_row_batches memory - if (mem_consume < (mem_limit * 6) / 10 && + if (mem_consume < (state->instance_mem_tracker()->limit() * 6) / 10 && _scan_row_batches_bytes < _max_scanner_queue_size_bytes / 2) { thread_slot_num = max_thread - assigned_thread_num; } else { @@ -1648,8 +1646,7 @@ void OlapScanNode::transfer_thread(RuntimeState* state) { } void OlapScanNode::scanner_thread(OlapScanner* scanner) { - SCOPED_ATTACH_TASK_THREAD(_runtime_state, mem_tracker()); - ADD_THREAD_LOCAL_MEM_TRACKER(scanner->mem_tracker()); + SCOPED_ATTACH_TASK(_runtime_state); Thread::set_self_name("olap_scanner"); if (UNLIKELY(_transfer_done)) { _scanner_done = true; @@ -1692,7 +1689,7 @@ void OlapScanNode::scanner_thread(OlapScanner* scanner) { DCHECK(runtime_filter != nullptr); bool ready = runtime_filter->is_ready(); if (ready) { - runtime_filter->get_prepared_context(&contexts, row_desc(), _expr_mem_tracker); + runtime_filter->get_prepared_context(&contexts, row_desc()); scanner_filter_apply_marks[i] = true; } } diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h index 99607cfa21..771adf76e8 100644 --- a/be/src/exec/olap_scan_node.h +++ b/be/src/exec/olap_scan_node.h @@ -264,7 +264,7 @@ protected: int64_t _buffered_bytes; // Count the memory consumption of Rowset Reader and Tablet Reader in OlapScanner. - std::shared_ptr _scanner_mem_tracker; + std::unique_ptr _scanner_mem_tracker; EvalConjunctsFn _eval_conjuncts_fn; bool _need_agg_finalize = true; diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index a1afd1d2ce..1a8cd2a165 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -31,7 +31,7 @@ #include "olap_utils.h" #include "runtime/descriptors.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" +#include "runtime/memory/mem_tracker.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" #include "runtime/thread_context.h" @@ -43,7 +43,7 @@ namespace doris { OlapScanner::OlapScanner(RuntimeState* runtime_state, OlapScanNode* parent, bool aggregation, bool need_agg_finalize, const TPaloScanRange& scan_range, - const std::shared_ptr& tracker) + MemTracker* tracker) : _runtime_state(runtime_state), _parent(parent), _tuple_desc(parent->_tuple_desc), @@ -52,12 +52,7 @@ OlapScanner::OlapScanner(RuntimeState* runtime_state, OlapScanNode* parent, bool _aggregation(aggregation), _need_agg_finalize(need_agg_finalize), _version(-1) { -#ifndef NDEBUG - _mem_tracker = MemTracker::create_tracker(tracker->limit(), - "OlapScanner:" + tls_ctx()->thread_id_str(), tracker); -#else _mem_tracker = tracker; -#endif } Status OlapScanner::prepare( @@ -65,7 +60,7 @@ Status OlapScanner::prepare( const std::vector& filters, const std::vector>>& bloom_filters, const std::vector& function_filters) { - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); set_tablet_reader(); // set limit to reduce end of rowset and segment mem use _tablet_reader->set_batch_size( @@ -136,7 +131,7 @@ Status OlapScanner::open() { auto span = _runtime_state->get_tracer()->StartSpan("OlapScanner::open"); auto scope = opentelemetry::trace::Scope {span}; SCOPED_TIMER(_parent->_reader_init_timer); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); if (_conjunct_ctxs.size() > _parent->_direct_conjunct_size) { _use_pushdown_conjuncts = true; @@ -298,14 +293,14 @@ Status OlapScanner::_init_return_columns(bool need_seq_col) { } Status OlapScanner::get_batch(RuntimeState* state, RowBatch* batch, bool* eof) { - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(_mem_tracker); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); // 2. Allocate Row's Tuple buf uint8_t* tuple_buf = batch->tuple_data_pool()->allocate(state->batch_size() * _tuple_desc->byte_size()); bzero(tuple_buf, state->batch_size() * _tuple_desc->byte_size()); Tuple* tuple = reinterpret_cast(tuple_buf); - std::unique_ptr mem_pool(new MemPool(_mem_tracker.get())); + std::unique_ptr mem_pool(new MemPool(_mem_tracker)); int64_t raw_rows_threshold = raw_rows_read() + config::doris_scanner_row_num; int64_t raw_bytes_threshold = config::doris_scanner_row_bytes; { diff --git a/be/src/exec/olap_scanner.h b/be/src/exec/olap_scanner.h index bde36eeafb..44fab43dc6 100644 --- a/be/src/exec/olap_scanner.h +++ b/be/src/exec/olap_scanner.h @@ -42,8 +42,7 @@ class OlapScanNode; class OlapScanner { public: OlapScanner(RuntimeState* runtime_state, OlapScanNode* parent, bool aggregation, - bool need_agg_finalize, const TPaloScanRange& scan_range, - const std::shared_ptr& tracker); + bool need_agg_finalize, const TPaloScanRange& scan_range, MemTracker* tracker); virtual ~OlapScanner() = default; @@ -89,8 +88,6 @@ public: const std::vector& get_query_slots() const { return _query_slots; } - const std::shared_ptr& mem_tracker() const { return _mem_tracker; } - protected: Status _init_tablet_reader_params( const std::vector& key_ranges, const std::vector& filters, @@ -148,7 +145,7 @@ protected: MonotonicStopWatch _watcher; - std::shared_ptr _mem_tracker; + MemTracker* _mem_tracker; TabletSchema _tablet_schema; }; diff --git a/be/src/exec/partitioned_aggregation_node.cc b/be/src/exec/partitioned_aggregation_node.cc index 083c9c8a4f..2811375395 100644 --- a/be/src/exec/partitioned_aggregation_node.cc +++ b/be/src/exec/partitioned_aggregation_node.cc @@ -37,7 +37,7 @@ #include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" +#include "runtime/memory/mem_tracker.h" #include "runtime/raw_value.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" @@ -183,11 +183,11 @@ Status PartitionedAggregationNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); state_ = state; - mem_pool_.reset(new MemPool(mem_tracker().get())); - agg_fn_pool_.reset(new MemPool(expr_mem_tracker().get())); + mem_pool_.reset(new MemPool(mem_tracker())); + agg_fn_pool_.reset(new MemPool(mem_tracker())); ht_resize_timer_ = ADD_TIMER(runtime_profile(), "HTResizeTime"); get_results_timer_ = ADD_TIMER(runtime_profile(), "GetResultsTime"); @@ -228,16 +228,16 @@ Status PartitionedAggregationNode::prepare(RuntimeState* state) { // TODO chenhao const RowDescriptor& row_desc = child(0)->row_desc(); RETURN_IF_ERROR(NewAggFnEvaluator::Create(agg_fns_, state, _pool, agg_fn_pool_.get(), - &agg_fn_evals_, expr_mem_tracker(), row_desc)); + &agg_fn_evals_, row_desc)); - expr_results_pool_.reset(new MemPool(expr_mem_tracker().get())); + expr_results_pool_.reset(new MemPool(mem_tracker())); if (!grouping_exprs_.empty()) { RowDescriptor build_row_desc(intermediate_tuple_desc_, false); RETURN_IF_ERROR(PartitionedHashTableCtx::Create( _pool, state, build_exprs_, grouping_exprs_, true, vector(build_exprs_.size(), true), state->fragment_hash_seed(), - MAX_PARTITION_DEPTH, 1, nullptr, expr_results_pool_.get(), expr_mem_tracker(), - build_row_desc, row_desc, &ht_ctx_)); + MAX_PARTITION_DEPTH, 1, nullptr, expr_results_pool_.get(), build_row_desc, row_desc, + &ht_ctx_)); } // AddCodegenDisabledMessage(state); return Status::OK(); @@ -245,10 +245,10 @@ Status PartitionedAggregationNode::prepare(RuntimeState* state) { Status PartitionedAggregationNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); // Open the child before consuming resources in this node. RETURN_IF_ERROR(child(0)->open(state)); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // Claim reservation after the child has been opened to reduce the peak reservation // requirement. @@ -343,7 +343,7 @@ Status PartitionedAggregationNode::open(RuntimeState* state) { } Status PartitionedAggregationNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // 1. `!need_finalize` means this aggregation node not the level two aggregation node // 2. `grouping_exprs_.size() == 0 ` means is not group by // 3. `child(0)->rows_returned() == 0` mean not data from child @@ -411,7 +411,8 @@ Status PartitionedAggregationNode::CopyStringData(const SlotDescriptor& slot_des "Cannot perform aggregation at node with id $0." " Failed to allocate $1 output bytes.", _id, sv->len); - RETURN_LIMIT_EXCEEDED(pool->mem_tracker(), state_, details, sv->len, rst); + RETURN_LIMIT_EXCEEDED(thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker(), + state_, details, sv->len, rst); } memcpy(new_ptr, sv->ptr, sv->len); sv->ptr = new_ptr; @@ -724,7 +725,7 @@ PartitionedAggregationNode::Partition::~Partition() { } Status PartitionedAggregationNode::Partition::InitStreams() { - agg_fn_pool.reset(new MemPool(parent->expr_mem_tracker().get())); + agg_fn_pool.reset(new MemPool(parent->mem_tracker())); DCHECK_EQ(agg_fn_evals.size(), 0); NewAggFnEvaluator::ShallowClone(parent->partition_pool_.get(), agg_fn_pool.get(), parent->agg_fn_evals_, &agg_fn_evals); @@ -850,7 +851,8 @@ Status PartitionedAggregationNode::Partition::Spill(bool more_aggregate_rows) { // TODO(ml): enable spill std::stringstream msg; msg << "New partitioned Aggregation in spill"; - RETURN_LIMIT_EXCEEDED(parent->state_->query_mem_tracker(), parent->state_, msg.str()); + RETURN_LIMIT_EXCEEDED(thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker(), + parent->state_, msg.str()); RETURN_IF_ERROR(SerializeStreamForSpilling()); @@ -929,11 +931,15 @@ Tuple* PartitionedAggregationNode::ConstructIntermediateTuple( << "to allocate $1 bytes for intermediate tuple. " << "Backend: " << BackendOptions::get_localhost() << ", " << "fragment: " << print_id(state_->fragment_instance_id()) << " " - << "Used: " << pool->mem_tracker()->consumption() - << ", Limit: " << pool->mem_tracker()->limit() << ". " + << "Used: " + << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->consumption() + << ", Limit: " + << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->limit() << ". " << "You can change the limit by session variable exec_mem_limit."; string details = Substitute(str.str(), _id, tuple_data_size); - *status = pool->mem_tracker()->mem_limit_exceeded(state_, details, tuple_data_size, rst); + *status = thread_context() + ->_thread_mem_tracker_mgr->limiter_mem_tracker() + ->mem_limit_exceeded(state_, details, tuple_data_size, rst); return nullptr; } memset(tuple_data, 0, fixed_size); diff --git a/be/src/exec/partitioned_hash_table.cc b/be/src/exec/partitioned_hash_table.cc index ec4e12db73..4f7904cb56 100644 --- a/be/src/exec/partitioned_hash_table.cc +++ b/be/src/exec/partitioned_hash_table.cc @@ -27,7 +27,7 @@ #include "exec/partitioned_hash_table.inline.h" #include "exprs/expr.h" #include "exprs/expr_context.h" -#include "runtime/mem_tracker.h" +#include "runtime/memory/mem_tracker.h" #include "runtime/raw_value.h" #include "runtime/runtime_state.h" #include "runtime/string_value.h" @@ -64,10 +64,8 @@ PartitionedHashTableCtx::PartitionedHashTableCtx(const std::vector& build bool stores_nulls, const std::vector& finds_nulls, int32_t initial_seed, int max_levels, - MemPool* mem_pool, MemPool* expr_results_pool, - const std::shared_ptr& tracker) - : tracker_(tracker), - build_exprs_(build_exprs), + MemPool* mem_pool, MemPool* expr_results_pool) + : build_exprs_(build_exprs), probe_exprs_(probe_exprs), stores_nulls_(stores_nulls), finds_nulls_(finds_nulls), @@ -77,7 +75,6 @@ PartitionedHashTableCtx::PartitionedHashTableCtx(const std::vector& build scratch_row_(nullptr), mem_pool_(mem_pool), expr_results_pool_(expr_results_pool) { - DCHECK(tracker_ != nullptr); DCHECK(!finds_some_nulls_ || stores_nulls_); // Compute the layout and buffer size to store the evaluated expr results DCHECK_EQ(build_exprs_.size(), probe_exprs_.size()); @@ -110,30 +107,31 @@ Status PartitionedHashTableCtx::Init(ObjectPool* pool, RuntimeState* state, int // TODO chenhao replace ExprContext with ScalarFnEvaluator for (int i = 0; i < build_exprs_.size(); i++) { ExprContext* context = pool->add(new ExprContext(build_exprs_[i])); - RETURN_IF_ERROR(context->prepare(state, row_desc, tracker_)); + RETURN_IF_ERROR(context->prepare(state, row_desc)); build_expr_evals_.push_back(context); } DCHECK_EQ(build_exprs_.size(), build_expr_evals_.size()); for (int i = 0; i < probe_exprs_.size(); i++) { ExprContext* context = pool->add(new ExprContext(probe_exprs_[i])); - RETURN_IF_ERROR(context->prepare(state, row_desc_probe, tracker_)); + RETURN_IF_ERROR(context->prepare(state, row_desc_probe)); probe_expr_evals_.push_back(context); } DCHECK_EQ(probe_exprs_.size(), probe_expr_evals_.size()); - return expr_values_cache_.Init(state, tracker_, build_exprs_); + return expr_values_cache_.Init(state, build_exprs_); } -Status PartitionedHashTableCtx::Create( - ObjectPool* pool, RuntimeState* state, const std::vector& build_exprs, - const std::vector& probe_exprs, bool stores_nulls, - const std::vector& finds_nulls, int32_t initial_seed, int max_levels, - int num_build_tuples, MemPool* mem_pool, MemPool* expr_results_pool, - const std::shared_ptr& tracker, const RowDescriptor& row_desc, - const RowDescriptor& row_desc_probe, std::unique_ptr* ht_ctx) { +Status PartitionedHashTableCtx::Create(ObjectPool* pool, RuntimeState* state, + const std::vector& build_exprs, + const std::vector& probe_exprs, bool stores_nulls, + const std::vector& finds_nulls, int32_t initial_seed, + int max_levels, int num_build_tuples, MemPool* mem_pool, + MemPool* expr_results_pool, const RowDescriptor& row_desc, + const RowDescriptor& row_desc_probe, + std::unique_ptr* ht_ctx) { ht_ctx->reset(new PartitionedHashTableCtx(build_exprs, probe_exprs, stores_nulls, finds_nulls, - initial_seed, max_levels, mem_pool, expr_results_pool, - tracker)); + initial_seed, max_levels, mem_pool, + expr_results_pool)); return (*ht_ctx)->Init(pool, state, num_build_tuples, row_desc, row_desc_probe); } @@ -291,7 +289,6 @@ PartitionedHashTableCtx::ExprValuesCache::ExprValuesCache() null_bitmap_(0) {} Status PartitionedHashTableCtx::ExprValuesCache::Init(RuntimeState* state, - const std::shared_ptr& tracker, const std::vector& build_exprs) { // Initialize the number of expressions. num_exprs_ = build_exprs.size(); @@ -310,12 +307,14 @@ Status PartitionedHashTableCtx::ExprValuesCache::Init(RuntimeState* state, MAX_EXPR_VALUES_ARRAY_SIZE / expr_values_bytes_per_row_)); int mem_usage = MemUsage(capacity_, expr_values_bytes_per_row_, num_exprs_); - Status st = tracker->check_limit(mem_usage); + Status st = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit( + mem_usage); if (UNLIKELY(!st)) { capacity_ = 0; string details = Substitute( "PartitionedHashTableCtx::ExprValuesCache failed to allocate $0 bytes", mem_usage); - RETURN_LIMIT_EXCEEDED(tracker, state, details, mem_usage, st); + RETURN_LIMIT_EXCEEDED(thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker(), + state, details, mem_usage, st); } int expr_values_size = expr_values_bytes_per_row_ * capacity_; diff --git a/be/src/exec/partitioned_hash_table.h b/be/src/exec/partitioned_hash_table.h index 45f13a0d1f..c531b8da0a 100644 --- a/be/src/exec/partitioned_hash_table.h +++ b/be/src/exec/partitioned_hash_table.h @@ -33,7 +33,6 @@ namespace doris { class Expr; class ExprContext; -class MemTracker; class PartitionedHashTable; class RowDescriptor; class RuntimeState; @@ -110,8 +109,7 @@ public: const std::vector& probe_exprs, bool stores_nulls, const std::vector& finds_nulls, int32_t initial_seed, int max_levels, int num_build_tuples, MemPool* mem_pool, MemPool* expr_results_pool, - const std::shared_ptr& tracker, const RowDescriptor& row_desc, - const RowDescriptor& row_desc_probe, + const RowDescriptor& row_desc, const RowDescriptor& row_desc_probe, std::unique_ptr* ht_ctx); /// Initialize the build and probe expression evaluators. @@ -204,11 +202,9 @@ public: /// Allocates memory and initializes various data structures. Return error status /// if memory allocation leads to the memory limits of the exec node to be exceeded. - /// 'tracker' is the memory tracker of the exec node which owns this PartitionedHashTableCtx. - Status Init(RuntimeState* state, const std::shared_ptr& tracker, - const std::vector& build_exprs); + Status Init(RuntimeState* state, const std::vector& build_exprs); - /// Frees up various resources and updates memory tracker with proper accounting. + /// Frees up various resources. void Close(); /// Resets the cache states (iterators, end pointers etc) before writing. @@ -373,8 +369,7 @@ private: PartitionedHashTableCtx(const std::vector& build_exprs, const std::vector& probe_exprs, bool stores_nulls, const std::vector& finds_nulls, int32_t initial_seed, - int max_levels, MemPool* mem_pool, MemPool* expr_results_pool, - const std::shared_ptr& tracker); + int max_levels, MemPool* mem_pool, MemPool* expr_results_pool); /// Allocate various buffers for storing expression evaluation results, hash values, /// null bits etc. Also allocate evaluators for the build and probe expressions and @@ -440,8 +435,6 @@ private: bool stores_nulls() const { return stores_nulls_; } bool finds_some_nulls() const { return finds_some_nulls_; } - std::shared_ptr tracker_; - const std::vector& build_exprs_; std::vector build_expr_evals_; diff --git a/be/src/exec/repeat_node.cpp b/be/src/exec/repeat_node.cpp index 23e7bf299f..6f075cb00f 100644 --- a/be/src/exec/repeat_node.cpp +++ b/be/src/exec/repeat_node.cpp @@ -45,7 +45,7 @@ RepeatNode::~RepeatNode() {} Status RepeatNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); _runtime_state = state; _tuple_desc = state->desc_tbl().get_tuple_descriptor(_output_tuple_id); if (_tuple_desc == nullptr) { @@ -57,8 +57,8 @@ Status RepeatNode::prepare(RuntimeState* state) { Status RepeatNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(child(0)->open(state)); return Status::OK(); @@ -166,7 +166,7 @@ Status RepeatNode::get_repeated_batch(RowBatch* child_row_batch, int repeat_id_i Status RepeatNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); DCHECK(_repeat_id_idx >= 0); for (const std::vector& v : _grouping_list) { diff --git a/be/src/exec/schema_scan_node.cpp b/be/src/exec/schema_scan_node.cpp index d2332f219f..6a1b546ec1 100644 --- a/be/src/exec/schema_scan_node.cpp +++ b/be/src/exec/schema_scan_node.cpp @@ -95,7 +95,7 @@ Status SchemaScanNode::prepare(RuntimeState* state) { } RETURN_IF_ERROR(ScanNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // new one mem pool _tuple_pool.reset(new (std::nothrow) MemPool()); @@ -193,9 +193,9 @@ Status SchemaScanNode::open(RuntimeState* state) { } SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); if (_scanner_param.user) { TSetSessionParams param; @@ -239,7 +239,7 @@ Status SchemaScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* RETURN_IF_CANCELLED(state); SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); if (reached_limit()) { *eos = true; diff --git a/be/src/exec/select_node.cpp b/be/src/exec/select_node.cpp index e59557e8e7..81574ac9cc 100644 --- a/be/src/exec/select_node.cpp +++ b/be/src/exec/select_node.cpp @@ -35,14 +35,14 @@ SelectNode::SelectNode(ObjectPool* pool, const TPlanNode& tnode, const Descripto Status SelectNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); _child_row_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); return Status::OK(); } Status SelectNode::open(RuntimeState* state) { - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(child(0)->open(state)); return Status::OK(); } @@ -50,7 +50,7 @@ Status SelectNode::open(RuntimeState* state) { Status SelectNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { RETURN_IF_CANCELLED(state); SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); if (reached_limit() || (_child_row_idx == _child_row_batch->num_rows() && _child_eos)) { // we're already done or we exhausted the last child batch and there won't be any diff --git a/be/src/exec/set_operation_node.cpp b/be/src/exec/set_operation_node.cpp index 8126b380f4..32794051ef 100644 --- a/be/src/exec/set_operation_node.cpp +++ b/be/src/exec/set_operation_node.cpp @@ -41,15 +41,14 @@ Status SetOperationNode::init(const TPlanNode& tnode, RuntimeState* state) { Status SetOperationNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); DCHECK(_tuple_desc != nullptr); - _build_pool.reset(new MemPool(mem_tracker().get())); + _build_pool.reset(new MemPool(mem_tracker())); _build_timer = ADD_TIMER(runtime_profile(), "BuildTime"); _probe_timer = ADD_TIMER(runtime_profile(), "ProbeTime"); for (size_t i = 0; i < _child_expr_lists.size(); ++i) { - RETURN_IF_ERROR(Expr::prepare(_child_expr_lists[i], state, child(i)->row_desc(), - expr_mem_tracker())); + RETURN_IF_ERROR(Expr::prepare(_child_expr_lists[i], state, child(i)->row_desc())); DCHECK_EQ(_child_expr_lists[i].size(), _tuple_desc->slots().size()); } _build_tuple_size = child(0)->row_desc().tuple_descriptors().size(); @@ -136,9 +135,8 @@ bool SetOperationNode::equals(TupleRow* row, TupleRow* other) { Status SetOperationNode::open(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::open(state)); SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB( - "SetOperation, while constructing the hash table."); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); + SCOPED_UPDATE_MEM_EXCEED_CALL_BACK("SetOperation, while constructing the hash table."); RETURN_IF_CANCELLED(state); // open result expr lists. for (const std::vector& exprs : _child_expr_lists) { @@ -146,7 +144,7 @@ Status SetOperationNode::open(RuntimeState* state) { } // initial build hash table used for remove duplicated _hash_tbl.reset(new HashTable(_child_expr_lists[0], _child_expr_lists[1], _build_tuple_size, - true, _find_nulls, id(), mem_tracker(), state->batch_size() * 2)); + true, _find_nulls, id(), state->batch_size() * 2)); RowBatch build_batch(child(0)->row_desc(), state->batch_size()); RETURN_IF_ERROR(child(0)->open(state)); diff --git a/be/src/exec/set_operation_node.h b/be/src/exec/set_operation_node.h index 9fbb5d75df..2d5900345f 100644 --- a/be/src/exec/set_operation_node.h +++ b/be/src/exec/set_operation_node.h @@ -85,8 +85,7 @@ Status SetOperationNode::refresh_hash_table(int child_id) { SCOPED_TIMER(_build_timer); std::unique_ptr temp_tbl(new HashTable( _child_expr_lists[0], _child_expr_lists[child_id], _build_tuple_size, true, _find_nulls, - id(), mem_tracker(), - _valid_element_in_hash_tbl / HashTable::MAX_BUCKET_OCCUPANCY_FRACTION + 1)); + id(), _valid_element_in_hash_tbl / HashTable::MAX_BUCKET_OCCUPANCY_FRACTION + 1)); _hash_tbl_iterator = _hash_tbl->begin(); while (_hash_tbl_iterator.has_next()) { if constexpr (keep_matched) { diff --git a/be/src/exec/sort_exec_exprs.cpp b/be/src/exec/sort_exec_exprs.cpp index add2b241d7..bede01e503 100644 --- a/be/src/exec/sort_exec_exprs.cpp +++ b/be/src/exec/sort_exec_exprs.cpp @@ -50,14 +50,11 @@ Status SortExecExprs::init(const std::vector& lhs_ordering_expr_ct } Status SortExecExprs::prepare(RuntimeState* state, const RowDescriptor& child_row_desc, - const RowDescriptor& output_row_desc, - const std::shared_ptr& expr_mem_tracker) { + const RowDescriptor& output_row_desc) { if (_materialize_tuple) { - RETURN_IF_ERROR( - Expr::prepare(_sort_tuple_slot_expr_ctxs, state, child_row_desc, expr_mem_tracker)); + RETURN_IF_ERROR(Expr::prepare(_sort_tuple_slot_expr_ctxs, state, child_row_desc)); } - RETURN_IF_ERROR( - Expr::prepare(_lhs_ordering_expr_ctxs, state, output_row_desc, expr_mem_tracker)); + RETURN_IF_ERROR(Expr::prepare(_lhs_ordering_expr_ctxs, state, output_row_desc)); return Status::OK(); } diff --git a/be/src/exec/sort_exec_exprs.h b/be/src/exec/sort_exec_exprs.h index 7ef54e5fc8..798cb5eebb 100644 --- a/be/src/exec/sort_exec_exprs.h +++ b/be/src/exec/sort_exec_exprs.h @@ -46,8 +46,7 @@ public: // prepare all expressions used for sorting and tuple materialization. Status prepare(RuntimeState* state, const RowDescriptor& child_row_desc, - const RowDescriptor& output_row_desc, - const std::shared_ptr& mem_tracker); + const RowDescriptor& output_row_desc); // open all expressions used for sorting and tuple materialization. Status open(RuntimeState* state); diff --git a/be/src/exec/spill_sort_node.cc b/be/src/exec/spill_sort_node.cc index 9c839598d8..a40d889b0e 100644 --- a/be/src/exec/spill_sort_node.cc +++ b/be/src/exec/spill_sort_node.cc @@ -42,17 +42,16 @@ Status SpillSortNode::init(const TPlanNode& tnode, RuntimeState* state) { Status SpillSortNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); - RETURN_IF_ERROR(_sort_exec_exprs.prepare(state, child(0)->row_desc(), _row_descriptor, - expr_mem_tracker())); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); + RETURN_IF_ERROR(_sort_exec_exprs.prepare(state, child(0)->row_desc(), _row_descriptor)); // AddExprCtxsToFree(_sort_exec_exprs); return Status::OK(); } Status SpillSortNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(_sort_exec_exprs.open(state)); RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(state->check_query_state("Spill sort, while open.")); @@ -64,7 +63,7 @@ Status SpillSortNode::open(RuntimeState* state) { TupleRowComparator less_than(_sort_exec_exprs, _is_asc_order, _nulls_first); // Create and initialize the external sort impl object _sorter.reset(new SpillSorter(less_than, _sort_exec_exprs.sort_tuple_slot_expr_ctxs(), - &_row_descriptor, mem_tracker(), runtime_profile(), state)); + &_row_descriptor, runtime_profile(), state)); RETURN_IF_ERROR(_sorter->init()); } @@ -82,7 +81,7 @@ Status SpillSortNode::open(RuntimeState* state) { Status SpillSortNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(state->check_query_state("Spill sort, while getting next.")); diff --git a/be/src/exec/table_function_node.cpp b/be/src/exec/table_function_node.cpp index 984ebb0266..a1cbd9f10f 100644 --- a/be/src/exec/table_function_node.cpp +++ b/be/src/exec/table_function_node.cpp @@ -90,11 +90,11 @@ bool TableFunctionNode::_is_inner_and_empty() { Status TableFunctionNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); _num_rows_filtered_counter = ADD_COUNTER(_runtime_profile, "RowsFiltered", TUnit::UNIT); - RETURN_IF_ERROR(Expr::prepare(_fn_ctxs, state, _row_descriptor, expr_mem_tracker())); + RETURN_IF_ERROR(Expr::prepare(_fn_ctxs, state, _row_descriptor)); for (auto fn : _fns) { RETURN_IF_ERROR(fn->prepare()); } @@ -104,9 +104,9 @@ Status TableFunctionNode::prepare(RuntimeState* state) { Status TableFunctionNode::open(RuntimeState* state) { START_AND_SCOPE_SPAN(state->get_tracer(), span, "TableFunctionNode::open"); SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(Expr::open(_fn_ctxs, state)); RETURN_IF_ERROR(vectorized::VExpr::open(_vfn_ctxs, state)); @@ -198,7 +198,7 @@ bool TableFunctionNode::_roll_table_functions(int last_eos_idx) { // And the inner loop is to expand the row by table functions, and output row by row. Status TableFunctionNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); const RowDescriptor& parent_rowdesc = row_batch->row_desc(); const RowDescriptor& child_rowdesc = _children[0]->row_desc(); diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index 097b667c3e..cdaa2a9a61 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -168,7 +168,7 @@ std::string OlapTablePartition::debug_string(TupleDescriptor* tuple_desc) const OlapTablePartitionParam::OlapTablePartitionParam(std::shared_ptr schema, const TOlapTablePartitionParam& t_param) - : _schema(schema), _t_param(t_param), _mem_pool(new MemPool("OlapTablePartitionParam")) {} + : _schema(schema), _t_param(t_param), _mem_pool(new MemPool()) {} OlapTablePartitionParam::~OlapTablePartitionParam() {} @@ -419,7 +419,7 @@ VOlapTablePartitionParam::VOlapTablePartitionParam(std::shared_ptrtuple_desc()->slots()), - _mem_tracker(MemTracker::create_virtual_tracker(-1, "OlapTablePartitionParam")) { + _mem_tracker(std::make_unique("OlapTablePartitionParam")) { for (auto slot : _slots) { _partition_block.insert( {slot->get_empty_mutable_column(), slot->get_data_type_ptr(), slot->col_name()}); diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h index f287fcfa0d..247cd1a4f1 100644 --- a/be/src/exec/tablet_info.h +++ b/be/src/exec/tablet_info.h @@ -295,7 +295,7 @@ private: ObjectPool _obj_pool; vectorized::Block _partition_block; - std::shared_ptr _mem_tracker; + std::unique_ptr _mem_tracker; std::vector _partitions; std::unique_ptr> _partitions_map; diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp index 141c760f8e..7c67a060ed 100644 --- a/be/src/exec/tablet_sink.cpp +++ b/be/src/exec/tablet_sink.cpp @@ -45,9 +45,9 @@ namespace stream_load { NodeChannel::NodeChannel(OlapTableSink* parent, IndexChannel* index_channel, int64_t node_id) : _parent(parent), _index_channel(index_channel), _node_id(node_id) { - _node_channel_tracker = MemTracker::create_tracker( - -1, fmt::format("NodeChannel:indexID={}:threadId={}", - std::to_string(_index_channel->_index_id), tls_ctx()->thread_id_str())); + _node_channel_tracker = std::make_unique(fmt::format( + "NodeChannel:indexID={}:threadId={}", std::to_string(_index_channel->_index_id), + thread_context()->thread_id_str())); } NodeChannel::~NodeChannel() noexcept { @@ -71,7 +71,7 @@ NodeChannel::~NodeChannel() noexcept { // no need to set _cancel_msg because the error will be // returned directly via "TabletSink::prepare()" method. Status NodeChannel::init(RuntimeState* state) { - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_node_channel_tracker); + SCOPED_CONSUME_MEM_TRACKER(_node_channel_tracker.get()); _tuple_desc = _parent->_output_tuple_desc; _state = state; auto node = _parent->_nodes_info->find_node(_node_id); @@ -117,7 +117,7 @@ Status NodeChannel::init(RuntimeState* state) { } void NodeChannel::open() { - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_node_channel_tracker); + SCOPED_CONSUME_MEM_TRACKER(_node_channel_tracker.get()); PTabletWriterOpenRequest request; request.set_allocated_id(&_parent->_load_id); request.set_index_id(_index_channel->_index_id); @@ -164,7 +164,7 @@ void NodeChannel::_cancel_with_msg(const std::string& msg) { Status NodeChannel::open_wait() { _open_closure->join(); - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_node_channel_tracker); + SCOPED_CONSUME_MEM_TRACKER(_node_channel_tracker.get()); if (_open_closure->cntl.Failed()) { if (!ExecEnv::GetInstance()->brpc_internal_client_cache()->available( _stub, _node_info.host, _node_info.brpc_port)) { @@ -260,7 +260,7 @@ Status NodeChannel::open_wait() { } Status NodeChannel::add_row(Tuple* input_tuple, int64_t tablet_id) { - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_node_channel_tracker); + SCOPED_CONSUME_MEM_TRACKER(_node_channel_tracker.get()); // If add_row() when _eos_is_produced==true, there must be sth wrong, we can only mark this channel as failed. auto st = none_of({_cancelled, _eos_is_produced}); if (!st.ok()) { @@ -279,7 +279,7 @@ Status NodeChannel::add_row(Tuple* input_tuple, int64_t tablet_id) { // It's fine to do a fake add_row() and return OK, because we will check _cancelled in next add_row() or mark_close(). while (!_cancelled && _pending_batches_num > 0 && (_pending_batches_bytes > _max_pending_batches_bytes || - _parent->_mem_tracker->any_limit_exceeded())) { + _parent->_mem_tracker->limit_exceeded(_max_pending_batches_bytes))) { SCOPED_ATOMIC_TIMER(&_mem_exceeded_block_ns); std::this_thread::sleep_for(std::chrono::milliseconds(10)); } @@ -310,7 +310,7 @@ Status NodeChannel::add_row(Tuple* input_tuple, int64_t tablet_id) { } void NodeChannel::mark_close() { - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_node_channel_tracker); + SCOPED_CONSUME_MEM_TRACKER(_node_channel_tracker.get()); auto st = none_of({_cancelled, _eos_is_produced}); if (!st.ok()) { return; @@ -339,7 +339,7 @@ void NodeChannel::_close_check() { CHECK(_cur_batch == nullptr) << name(); } Status NodeChannel::close_wait(RuntimeState* state) { - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_node_channel_tracker); + SCOPED_CONSUME_MEM_TRACKER(_node_channel_tracker.get()); // set _is_closed to true finally Defer set_closed {[&]() { std::lock_guard l(_closed_lock); @@ -385,7 +385,7 @@ Status NodeChannel::close_wait(RuntimeState* state) { } void NodeChannel::cancel(const std::string& cancel_msg) { - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_node_channel_tracker); + SCOPED_CONSUME_MEM_TRACKER(_node_channel_tracker.get()); // set _is_closed to true finally Defer set_closed {[&]() { std::lock_guard l(_closed_lock); @@ -444,7 +444,7 @@ int NodeChannel::try_send_and_fetch_status(RuntimeState* state, } void NodeChannel::try_send_batch(RuntimeState* state) { - SCOPED_ATTACH_TASK_THREAD(state, _node_channel_tracker); + SCOPED_ATTACH_TASK(state); SCOPED_ATOMIC_TIMER(&_actual_consume_ns); AddBatchReq send_batch; { @@ -562,7 +562,7 @@ void NodeChannel::clear_all_batches() { } Status IndexChannel::init(RuntimeState* state, const std::vector& tablets) { - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_index_channel_tracker); + SCOPED_CONSUME_MEM_TRACKER(_index_channel_tracker.get()); for (auto& tablet : tablets) { auto location = _parent->_location->find_tablet(tablet.tablet_id); if (location == nullptr) { @@ -600,7 +600,7 @@ Status IndexChannel::init(RuntimeState* state, const std::vectorobj_pool()->add(new RuntimeProfile("OlapTableSink")); _mem_tracker = - MemTracker::create_tracker(-1, "OlapTableSink:" + std::to_string(state->load_job_id()), - state->instance_mem_tracker()); + std::make_unique("OlapTableSink:" + std::to_string(state->load_job_id())); SCOPED_TIMER(_profile->total_time_counter()); - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); if (!_is_vectorized) { // Prepare the exprs to run. - RETURN_IF_ERROR( - Expr::prepare(_output_expr_ctxs, state, _input_row_desc, _expr_mem_tracker)); + RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _input_row_desc)); } // get table's tuple descriptor @@ -835,7 +833,7 @@ Status OlapTableSink::prepare(RuntimeState* state) { Status OlapTableSink::open(RuntimeState* state) { SCOPED_TIMER(_profile->total_time_counter()); SCOPED_TIMER(_open_timer); - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); if (!_is_vectorized) { // Prepare the exprs to run. RETURN_IF_ERROR(Expr::open(_output_expr_ctxs, state)); @@ -876,7 +874,7 @@ Status OlapTableSink::open(RuntimeState* state) { Status OlapTableSink::send(RuntimeState* state, RowBatch* input_batch) { SCOPED_TIMER(_profile->total_time_counter()); - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); // update incrementally so that FE can get the progress. // the real 'num_rows_load_total' will be set when sink being closed. int64_t num_rows = input_batch->num_rows(); @@ -1263,7 +1261,7 @@ Status OlapTableSink::_validate_data(RuntimeState* state, RowBatch* batch, Bitma void OlapTableSink::_send_batch_process(RuntimeState* state) { SCOPED_TIMER(_non_blocking_send_timer); - SCOPED_ATTACH_TASK_THREAD(state, _mem_tracker); + SCOPED_ATTACH_TASK(state); do { int running_channels_num = 0; for (auto index_channel : _channels) { diff --git a/be/src/exec/tablet_sink.h b/be/src/exec/tablet_sink.h index 342b028524..47431aec61 100644 --- a/be/src/exec/tablet_sink.h +++ b/be/src/exec/tablet_sink.h @@ -248,7 +248,7 @@ protected: std::string _load_info; std::string _name; - std::shared_ptr _node_channel_tracker; + std::unique_ptr _node_channel_tracker; TupleDescriptor* _tuple_desc = nullptr; NodeInfo _node_info; @@ -321,7 +321,7 @@ public: IndexChannel(OlapTableSink* parent, int64_t index_id, bool is_vec) : _parent(parent), _index_id(index_id), _is_vectorized(is_vec) { _index_channel_tracker = - MemTracker::create_tracker(-1, "IndexChannel:indexID=" + std::to_string(_index_id)); + std::make_unique("IndexChannel:indexID=" + std::to_string(_index_id)); } ~IndexChannel() = default; @@ -383,12 +383,12 @@ private: std::unordered_map _failed_channels_msgs; Status _intolerable_failure_status = Status::OK(); - std::shared_ptr _index_channel_tracker; + std::unique_ptr _index_channel_tracker; }; template void IndexChannel::add_row(const Row& tuple, int64_t tablet_id) { - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_index_channel_tracker); + SCOPED_CONSUME_MEM_TRACKER(_index_channel_tracker.get()); auto it = _channels_by_tablet.find(tablet_id); DCHECK(it != _channels_by_tablet.end()) << "unknown tablet, tablet_id=" << tablet_id; for (const auto& channel : it->second) { @@ -450,7 +450,7 @@ protected: bool _is_vectorized = false; - std::shared_ptr _mem_tracker; + std::unique_ptr _mem_tracker; ObjectPool* _pool; const RowDescriptor& _input_row_desc; diff --git a/be/src/exec/topn_node.cpp b/be/src/exec/topn_node.cpp index 4ce44e4ce4..f5215b10d5 100644 --- a/be/src/exec/topn_node.cpp +++ b/be/src/exec/topn_node.cpp @@ -60,10 +60,9 @@ Status TopNNode::init(const TPlanNode& tnode, RuntimeState* state) { Status TopNNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); - _tuple_pool.reset(new MemPool(mem_tracker().get())); - RETURN_IF_ERROR(_sort_exec_exprs.prepare(state, child(0)->row_desc(), _row_descriptor, - expr_mem_tracker())); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); + _tuple_pool.reset(new MemPool(mem_tracker())); + RETURN_IF_ERROR(_sort_exec_exprs.prepare(state, child(0)->row_desc(), _row_descriptor)); // AddExprCtxsToFree(_sort_exec_exprs); _tuple_row_less_than.reset( @@ -77,8 +76,8 @@ Status TopNNode::prepare(RuntimeState* state) { Status TopNNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(state->check_query_state("Top n, before open.")); RETURN_IF_ERROR(_sort_exec_exprs.open(state)); @@ -130,7 +129,7 @@ Status TopNNode::open(RuntimeState* state) { Status TopNNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(state->check_query_state("Top n, before moving result to row_batch.")); @@ -233,7 +232,7 @@ void TopNNode::push_down_predicate(RuntimeState* state, std::list* if ((*iter)->root()->is_bound(&_tuple_ids)) { // LOG(INFO) << "push down success expr is " << (*iter)->debug_string(); // (*iter)->get_child(0)->prepare(state, row_desc()); - (*iter)->prepare(state, row_desc(), _expr_mem_tracker); + (*iter)->prepare(state, row_desc()); (*iter)->open(state); _conjunct_ctxs.push_back(*iter); iter = expr_ctxs->erase(iter); diff --git a/be/src/exec/union_node.cpp b/be/src/exec/union_node.cpp index 75efcaa1f1..de5e7d7aec 100644 --- a/be/src/exec/union_node.cpp +++ b/be/src/exec/union_node.cpp @@ -69,7 +69,7 @@ Status UnionNode::init(const TPlanNode& tnode, RuntimeState* state) { Status UnionNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); DCHECK(_tuple_desc != nullptr); _materialize_exprs_evaluate_timer = @@ -77,7 +77,7 @@ Status UnionNode::prepare(RuntimeState* state) { _codegend_union_materialize_batch_fns.resize(_child_expr_lists.size()); // Prepare const expr lists. for (const std::vector& exprs : _const_expr_lists) { - RETURN_IF_ERROR(Expr::prepare(exprs, state, row_desc(), expr_mem_tracker())); + RETURN_IF_ERROR(Expr::prepare(exprs, state, row_desc())); // TODO(zc) // AddExprCtxsToFree(exprs); DCHECK_EQ(exprs.size(), _tuple_desc->slots().size()); @@ -85,8 +85,7 @@ Status UnionNode::prepare(RuntimeState* state) { // Prepare result expr lists. for (int i = 0; i < _child_expr_lists.size(); ++i) { - RETURN_IF_ERROR(Expr::prepare(_child_expr_lists[i], state, child(i)->row_desc(), - expr_mem_tracker())); + RETURN_IF_ERROR(Expr::prepare(_child_expr_lists[i], state, child(i)->row_desc())); // TODO(zc) // AddExprCtxsToFree(_child_expr_lists[i]); DCHECK_EQ(_child_expr_lists[i].size(), _tuple_desc->slots().size()); @@ -96,8 +95,8 @@ Status UnionNode::prepare(RuntimeState* state) { Status UnionNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); RETURN_IF_ERROR(ExecNode::open(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); // open const expr lists. for (const std::vector& exprs : _const_expr_lists) { RETURN_IF_ERROR(Expr::open(exprs, state)); @@ -235,7 +234,7 @@ Status UnionNode::get_next_const(RuntimeState* state, RowBatch* row_batch) { Status UnionNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker()); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); if (_to_close_child_idx != -1) { diff --git a/be/src/exprs/agg_fn_evaluator.cpp b/be/src/exprs/agg_fn_evaluator.cpp index 6ef80aa1ff..66f4235cf1 100644 --- a/be/src/exprs/agg_fn_evaluator.cpp +++ b/be/src/exprs/agg_fn_evaluator.cpp @@ -35,7 +35,7 @@ #include "exprs/anyval_util.h" #include "runtime/datetime_value.h" -#include "runtime/mem_tracker.h" +#include "runtime/memory/mem_tracker.h" #include "runtime/raw_value.h" #include "runtime/user_function_cache.h" #include "udf/udf_internal.h" @@ -145,7 +145,6 @@ AggFnEvaluator::AggFnEvaluator(const TExprNode& desc, bool is_analytic_fn) Status AggFnEvaluator::prepare(RuntimeState* state, const RowDescriptor& desc, MemPool* pool, const SlotDescriptor* intermediate_slot_desc, const SlotDescriptor* output_slot_desc, - const std::shared_ptr& mem_tracker, FunctionContext** agg_fn_ctx) { DCHECK(pool != nullptr); DCHECK(intermediate_slot_desc != nullptr); @@ -155,9 +154,9 @@ Status AggFnEvaluator::prepare(RuntimeState* state, const RowDescriptor& desc, M _intermediate_slot_desc = intermediate_slot_desc; _string_buffer_len = 0; - _mem_tracker = MemTracker::create_virtual_tracker(-1, "AggFnEvaluator", mem_tracker); + _mem_tracker = std::make_unique("AggFnEvaluator"); - Status status = Expr::prepare(_input_exprs_ctxs, state, desc, mem_tracker); + Status status = Expr::prepare(_input_exprs_ctxs, state, desc); RETURN_IF_ERROR(status); ObjectPool* obj_pool = state->obj_pool(); diff --git a/be/src/exprs/agg_fn_evaluator.h b/be/src/exprs/agg_fn_evaluator.h index af864b14d3..aedcb9f01c 100644 --- a/be/src/exprs/agg_fn_evaluator.h +++ b/be/src/exprs/agg_fn_evaluator.h @@ -80,8 +80,7 @@ public: // TODO: should we give them their own pool? Status prepare(RuntimeState* state, const RowDescriptor& desc, MemPool* pool, const SlotDescriptor* intermediate_slot_desc, - const SlotDescriptor* output_slot_desc, - const std::shared_ptr& mem_tracker, FunctionContext** agg_fn_ctx); + const SlotDescriptor* output_slot_desc, FunctionContext** agg_fn_ctx); Status open(RuntimeState* state, FunctionContext* agg_fn_ctx); @@ -188,7 +187,7 @@ private: std::vector _input_exprs_ctxs; std::unique_ptr _string_buffer; //for count distinct int _string_buffer_len; //for count distinct - std::shared_ptr _mem_tracker; // saved c'tor param + std::unique_ptr _mem_tracker; // saved c'tor param const TypeDescriptor _return_type; const TypeDescriptor _intermediate_type; diff --git a/be/src/exprs/anyval_util.cpp b/be/src/exprs/anyval_util.cpp index f92419123f..754f1f4dbd 100644 --- a/be/src/exprs/anyval_util.cpp +++ b/be/src/exprs/anyval_util.cpp @@ -22,7 +22,7 @@ #include "common/object_pool.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" +#include "runtime/memory/mem_tracker.h" namespace doris { using doris_udf::BooleanVal; @@ -47,7 +47,8 @@ Status allocate_any_val(RuntimeState* state, MemPool* pool, const TypeDescriptor *result = reinterpret_cast( pool->try_allocate_aligned(anyval_size, anyval_alignment, &rst)); if (*result == nullptr) { - RETURN_LIMIT_EXCEEDED(pool->mem_tracker(), state, mem_limit_exceeded_msg, anyval_size, rst); + RETURN_LIMIT_EXCEEDED(thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker(), + state, mem_limit_exceeded_msg, anyval_size, rst); } memset(static_cast(*result), 0, anyval_size); return Status::OK(); diff --git a/be/src/exprs/bloomfilter_predicate.h b/be/src/exprs/bloomfilter_predicate.h index cb0736dcca..479d193fdc 100644 --- a/be/src/exprs/bloomfilter_predicate.h +++ b/be/src/exprs/bloomfilter_predicate.h @@ -28,7 +28,6 @@ #include "olap/decimal12.h" #include "olap/rowset/segment_v2/bloom_filter.h" #include "olap/uint24.h" -#include "runtime/mem_tracker.h" namespace doris { namespace detail { @@ -94,15 +93,9 @@ public: template class BloomFilterFuncBase : public IBloomFilterFuncBase { public: - BloomFilterFuncBase() : _inited(false) { - _tracker = MemTracker::create_virtual_tracker(-1, "BloomFilterFunc"); - } + BloomFilterFuncBase() : _inited(false) {} - virtual ~BloomFilterFuncBase() { - if (_tracker != nullptr) { - _tracker->release(_bloom_filter_alloced); - } - } + virtual ~BloomFilterFuncBase() {} Status init(int64_t expect_num, double fpp) override { size_t filter_size = BloomFilterAdaptor::optimal_bit_num(expect_num, fpp); @@ -116,7 +109,6 @@ public: _bloom_filter_alloced = bloom_filter_length; _bloom_filter.reset(BloomFilterAdaptor::create()); RETURN_IF_ERROR(_bloom_filter->init(bloom_filter_length)); - _tracker->consume(_bloom_filter_alloced); _inited = true; return Status::OK(); } @@ -139,7 +131,6 @@ public: } _bloom_filter_alloced = len; - _tracker->consume(_bloom_filter_alloced); return _bloom_filter->init(data, len); } @@ -151,14 +142,12 @@ public: void light_copy(IBloomFilterFuncBase* bloomfilter_func) override { auto other_func = static_cast(bloomfilter_func); - _tracker = nullptr; // Avoid repeated release when ~BloomFilterFuncBase _bloom_filter_alloced = other_func->_bloom_filter_alloced; _bloom_filter = other_func->_bloom_filter; _inited = other_func->_inited; } protected: - std::shared_ptr _tracker; // bloom filter size int32_t _bloom_filter_alloced; std::shared_ptr _bloom_filter; diff --git a/be/src/exprs/expr.cpp b/be/src/exprs/expr.cpp index 283eb81fca..40224094ce 100644 --- a/be/src/exprs/expr.cpp +++ b/be/src/exprs/expr.cpp @@ -510,9 +510,9 @@ int Expr::compute_results_layout(const std::vector& ctxs, std::vec } Status Expr::prepare(const std::vector& ctxs, RuntimeState* state, - const RowDescriptor& row_desc, const std::shared_ptr& tracker) { + const RowDescriptor& row_desc) { for (int i = 0; i < ctxs.size(); ++i) { - RETURN_IF_ERROR(ctxs[i]->prepare(state, row_desc, tracker)); + RETURN_IF_ERROR(ctxs[i]->prepare(state, row_desc)); } return Status::OK(); } diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h index b6a6023fc9..2beb57c732 100644 --- a/be/src/exprs/expr.h +++ b/be/src/exprs/expr.h @@ -197,8 +197,7 @@ public: /// Convenience function for preparing multiple expr trees. /// Allocations from 'ctxs' will be counted against 'tracker'. static Status prepare(const std::vector& ctxs, RuntimeState* state, - const RowDescriptor& row_desc, - const std::shared_ptr& tracker); + const RowDescriptor& row_desc); /// Convenience function for opening multiple expr trees. static Status open(const std::vector& ctxs, RuntimeState* state); diff --git a/be/src/exprs/expr_context.cpp b/be/src/exprs/expr_context.cpp index 007372d786..7076f1372f 100644 --- a/be/src/exprs/expr_context.cpp +++ b/be/src/exprs/expr_context.cpp @@ -28,7 +28,6 @@ #include "exprs/expr.h" #include "exprs/slot_ref.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" #include "runtime/raw_value.h" #include "runtime/runtime_state.h" #include "runtime/thread_context.h" @@ -46,14 +45,11 @@ ExprContext::~ExprContext() { } } -Status ExprContext::prepare(RuntimeState* state, const RowDescriptor& row_desc, - const std::shared_ptr& tracker) { +Status ExprContext::prepare(RuntimeState* state, const RowDescriptor& row_desc) { DCHECK(!_prepared); - _mem_tracker = tracker; - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); DCHECK(_pool.get() == nullptr); _prepared = true; - _pool.reset(new MemPool(_mem_tracker.get())); + _pool.reset(new MemPool()); return _root->prepare(state, row_desc, this); } @@ -62,7 +58,6 @@ Status ExprContext::open(RuntimeState* state) { if (_opened) { return Status::OK(); } - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); _opened = true; // Fragment-local state is only initialized for original contexts. Clones inherit the // original's fragment state and only need to have thread-local state initialized. @@ -108,10 +103,9 @@ Status ExprContext::clone(RuntimeState* state, ExprContext** new_ctx) { DCHECK(_prepared); DCHECK(_opened); DCHECK(*new_ctx == nullptr); - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); *new_ctx = state->obj_pool()->add(new ExprContext(_root)); - (*new_ctx)->_pool.reset(new MemPool(_pool->mem_tracker())); + (*new_ctx)->_pool.reset(new MemPool()); for (int i = 0; i < _fn_contexts.size(); ++i) { (*new_ctx)->_fn_contexts.push_back(_fn_contexts[i]->impl()->clone((*new_ctx)->_pool.get())); } @@ -119,7 +113,6 @@ Status ExprContext::clone(RuntimeState* state, ExprContext** new_ctx) { (*new_ctx)->_is_clone = true; (*new_ctx)->_prepared = true; (*new_ctx)->_opened = true; - (*new_ctx)->_mem_tracker = _mem_tracker; return _root->open(state, *new_ctx, FunctionContext::THREAD_LOCAL); } @@ -128,10 +121,9 @@ Status ExprContext::clone(RuntimeState* state, ExprContext** new_ctx, Expr* root DCHECK(_prepared); DCHECK(_opened); DCHECK(*new_ctx == nullptr); - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); *new_ctx = state->obj_pool()->add(new ExprContext(root)); - (*new_ctx)->_pool.reset(new MemPool(_pool->mem_tracker())); + (*new_ctx)->_pool.reset(new MemPool()); for (int i = 0; i < _fn_contexts.size(); ++i) { (*new_ctx)->_fn_contexts.push_back(_fn_contexts[i]->impl()->clone((*new_ctx)->_pool.get())); } @@ -139,13 +131,11 @@ Status ExprContext::clone(RuntimeState* state, ExprContext** new_ctx, Expr* root (*new_ctx)->_is_clone = true; (*new_ctx)->_prepared = true; (*new_ctx)->_opened = true; - (*new_ctx)->_mem_tracker = _mem_tracker; return root->open(state, *new_ctx, FunctionContext::THREAD_LOCAL); } void ExprContext::free_local_allocations() { - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); free_local_allocations(_fn_contexts); } @@ -411,8 +401,9 @@ Status ExprContext::get_const_value(RuntimeState* state, Expr& expr, AnyVal** co Status rst; char* ptr_copy = reinterpret_cast(_pool->try_allocate(sv->len, &rst)); if (ptr_copy == nullptr) { - RETURN_LIMIT_EXCEEDED(_pool->mem_tracker(), state, - "Could not allocate constant string value", sv->len, rst); + RETURN_LIMIT_EXCEEDED( + thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker(), state, + "Could not allocate constant string value", sv->len, rst); } memcpy(ptr_copy, sv->ptr, sv->len); sv->ptr = reinterpret_cast(ptr_copy); diff --git a/be/src/exprs/expr_context.h b/be/src/exprs/expr_context.h index b8bda8fa48..362aa53877 100644 --- a/be/src/exprs/expr_context.h +++ b/be/src/exprs/expr_context.h @@ -41,7 +41,6 @@ class VOlapScanNode; class Expr; class MemPool; -class MemTracker; class RuntimeState; class RowDescriptor; class TColumnValue; @@ -57,9 +56,7 @@ public: ~ExprContext(); /// Prepare expr tree for evaluation. - /// Allocations from this context will be counted against 'tracker'. - Status prepare(RuntimeState* state, const RowDescriptor& row_desc, - const std::shared_ptr& tracker); + Status prepare(RuntimeState* state, const RowDescriptor& row_desc); /// Must be called after calling Prepare(). Does not need to be called on clones. /// Idempotent (this allows exprs to be opened multiple times in subplans without @@ -172,10 +169,7 @@ private: /// and owned by this ExprContext. std::vector _fn_contexts; - // Used to create _pool, if change to raw pointer later, be careful about tracker's life cycle. - std::shared_ptr _mem_tracker; - - /// Pool backing fn_contexts_. Counts against the runtime state's UDF mem tracker. + /// Pool backing fn_contexts_. std::unique_ptr _pool; /// The expr tree this context is for. diff --git a/be/src/exprs/new_agg_fn_evaluator.cc b/be/src/exprs/new_agg_fn_evaluator.cc index d595221a7d..1fe0f6e897 100644 --- a/be/src/exprs/new_agg_fn_evaluator.cc +++ b/be/src/exprs/new_agg_fn_evaluator.cc @@ -28,7 +28,6 @@ #include "exprs/anyval_util.h" #include "exprs/expr.h" #include "exprs/expr_context.h" -#include "runtime/mem_tracker.h" #include "runtime/raw_value.h" #include "runtime/runtime_state.h" #include "runtime/string_value.h" @@ -106,7 +105,6 @@ const TypeDescriptor& NewAggFnEvaluator::intermediate_type() const { Status NewAggFnEvaluator::Create(const AggFn& agg_fn, RuntimeState* state, ObjectPool* pool, MemPool* mem_pool, NewAggFnEvaluator** result, - const std::shared_ptr& tracker, const RowDescriptor& row_desc) { *result = nullptr; @@ -123,7 +121,7 @@ Status NewAggFnEvaluator::Create(const AggFn& agg_fn, RuntimeState* state, Objec // TODO chenhao replace ExprContext with ScalarFnEvaluator ExprContext* input_eval = pool->add(new ExprContext(input_expr)); if (input_eval == nullptr) goto cleanup; - input_eval->prepare(state, row_desc, tracker); + input_eval->prepare(state, row_desc); agg_fn_eval->input_evals_.push_back(input_eval); Expr* root = input_eval->root(); DCHECK(root == input_expr); @@ -162,12 +160,11 @@ cleanup: Status NewAggFnEvaluator::Create(const std::vector& agg_fns, RuntimeState* state, ObjectPool* pool, MemPool* mem_pool, std::vector* evals, - const std::shared_ptr& tracker, const RowDescriptor& row_desc) { for (const AggFn* agg_fn : agg_fns) { NewAggFnEvaluator* agg_fn_eval; - RETURN_IF_ERROR(NewAggFnEvaluator::Create(*agg_fn, state, pool, mem_pool, &agg_fn_eval, - tracker, row_desc)); + RETURN_IF_ERROR( + NewAggFnEvaluator::Create(*agg_fn, state, pool, mem_pool, &agg_fn_eval, row_desc)); evals->push_back(agg_fn_eval); } return Status::OK(); diff --git a/be/src/exprs/new_agg_fn_evaluator.h b/be/src/exprs/new_agg_fn_evaluator.h index 4ddf4e3f48..1a7e7f17e4 100644 --- a/be/src/exprs/new_agg_fn_evaluator.h +++ b/be/src/exprs/new_agg_fn_evaluator.h @@ -33,7 +33,6 @@ namespace doris { class MemPool; -class MemTracker; class ObjectPool; class RowDescriptor; class RuntimeState; @@ -63,13 +62,11 @@ public: /// even if this function returns error status on initialization failure. static Status Create(const AggFn& agg_fn, RuntimeState* state, ObjectPool* pool, MemPool* mem_pool, NewAggFnEvaluator** eval, - const std::shared_ptr& tracker, const RowDescriptor& row_desc) WARN_UNUSED_RESULT; /// Convenience functions for creating evaluators for multiple aggregate functions. static Status Create(const std::vector& agg_fns, RuntimeState* state, ObjectPool* pool, MemPool* mem_pool, std::vector* evals, - const std::shared_ptr& tracker, const RowDescriptor& row_desc) WARN_UNUSED_RESULT; ~NewAggFnEvaluator(); diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index 63a4d148f9..f6c142767f 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -1047,8 +1047,7 @@ Status IRuntimeFilter::get_push_expr_ctxs(std::list* push_expr_ctx } Status IRuntimeFilter::get_prepared_context(std::vector* push_expr_ctxs, - const RowDescriptor& desc, - const std::shared_ptr& tracker) { + const RowDescriptor& desc) { if (_is_ignored) { return Status::OK(); } @@ -1058,7 +1057,7 @@ Status IRuntimeFilter::get_prepared_context(std::vector* push_expr if (_push_down_ctxs.empty()) { RETURN_IF_ERROR(_wrapper->get_push_context(&_push_down_ctxs, _state, _probe_ctx)); - RETURN_IF_ERROR(Expr::prepare(_push_down_ctxs, _state, desc, tracker)); + RETURN_IF_ERROR(Expr::prepare(_push_down_ctxs, _state, desc)); RETURN_IF_ERROR(Expr::open(_push_down_ctxs, _state)); } // push expr @@ -1067,8 +1066,7 @@ Status IRuntimeFilter::get_prepared_context(std::vector* push_expr } Status IRuntimeFilter::get_prepared_vexprs(std::vector* vexprs, - const RowDescriptor& desc, - const std::shared_ptr& tracker) { + const RowDescriptor& desc) { if (_is_ignored) { return Status::OK(); } diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h index 5273788894..b4382d8874 100644 --- a/be/src/exprs/runtime_filter.h +++ b/be/src/exprs/runtime_filter.h @@ -159,12 +159,10 @@ public: // This function can be called multiple times Status get_prepared_context(std::vector* push_expr_ctxs, - const RowDescriptor& desc, - const std::shared_ptr& tracker); + const RowDescriptor& desc); Status get_prepared_vexprs(std::vector* push_vexprs, - const RowDescriptor& desc, - const std::shared_ptr& tracker); + const RowDescriptor& desc); bool is_broadcast_join() const { return _is_broadcast_join; } diff --git a/be/src/gutil/strings/numbers.cc b/be/src/gutil/strings/numbers.cc index 24c993b86a..379ed00e94 100644 --- a/be/src/gutil/strings/numbers.cc +++ b/be/src/gutil/strings/numbers.cc @@ -1488,26 +1488,26 @@ string AccurateItoaKMGT(int64 i) { i = -i; } - string ret = StringPrintf("%s", sign) + std::to_string(i) + " = " + StringPrintf("%s", sign); + string ret = StringPrintf("%s", sign); int64 val; if ((val = (i >> 40)) > 1) { ret += StringPrintf("%" PRId64 "%s" - " + ", + ",", val, "T"); i = i - (val << 40); } if ((val = (i >> 30)) > 1) { ret += StringPrintf("%" PRId64 "%s" - " + ", + ",", val, "G"); i = i - (val << 30); } if ((val = (i >> 20)) > 1) { ret += StringPrintf("%" PRId64 "%s" - " + ", + ",", val, "M"); i = i - (val << 20); } diff --git a/be/src/http/default_path_handlers.cpp b/be/src/http/default_path_handlers.cpp index dc953fdf3c..f430c02bbd 100644 --- a/be/src/http/default_path_handlers.cpp +++ b/be/src/http/default_path_handlers.cpp @@ -29,7 +29,7 @@ #include "gutil/strings/substitute.h" #include "http/action/tablets_info_action.h" #include "http/web_page_handler.h" -#include "runtime/mem_tracker.h" +#include "runtime/memory/mem_tracker_limiter.h" #include "util/debug_util.h" #include "util/pretty_printer.h" #include "util/thread.h" @@ -81,20 +81,17 @@ void config_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* } // Registered to handle "/memz", and prints out memory allocation statistics. -void mem_usage_handler(const std::shared_ptr& mem_tracker, - const WebPageHandler::ArgumentMap& args, std::stringstream* output) { - if (mem_tracker != nullptr) { - (*output) << "
"
-                  << "Mem Limit: " << PrettyPrinter::print(mem_tracker->limit(), TUnit::BYTES)
-                  << std::endl
-                  << "Mem Consumption: "
-                  << PrettyPrinter::print(mem_tracker->consumption(), TUnit::BYTES) << std::endl
-                  << "
"; - } else { - (*output) << "
"
-                  << "No process memory limit set."
-                  << "
"; - } +void mem_usage_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* output) { + (*output) << "
"
+              << "Mem Limit: "
+              << PrettyPrinter::print(ExecEnv::GetInstance()->process_mem_tracker()->limit(),
+                                      TUnit::BYTES)
+              << std::endl
+              << "Mem Consumption: "
+              << PrettyPrinter::print(ExecEnv::GetInstance()->process_mem_tracker()->consumption(),
+                                      TUnit::BYTES)
+              << std::endl
+              << "
"; (*output) << "
";
 #if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER) || \
@@ -130,37 +127,44 @@ void mem_tracker_handler(const WebPageHandler::ArgumentMap& args, std::stringstr
                  "       data-search='true' "
                  "       class='table table-striped'>\n";
     (*output) << ""
-                 "Id"
+                 "Level"
+                 "Label"
                  "Parent"
                  "Limit"
-                 "Current Consumption"
-                 "Peak Consumption"
                  "Use Count";
+                 ">Current Consumption(Bytes)"
+                 "Current Consumption(Normalize)"
+                 "Peak Consumption(Bytes)"
+                 "Peak Consumption(Normalize)"
+                 "Child Count"
+                 "";
     (*output) << "\n";
 
-    std::vector> trackers;
-    MemTracker::list_process_trackers(&trackers);
-    for (const shared_ptr& tracker : trackers) {
-        string parent = tracker->parent() == nullptr ? "none" : tracker->parent()->label();
-        string limit_str;
-        string current_consumption_str;
-        string peak_consumption_str;
-        limit_str = tracker->limit() == -1 ? "none" : AccurateItoaKMGT(tracker->limit());
-        current_consumption_str = AccurateItoaKMGT(tracker->consumption());
-        peak_consumption_str = AccurateItoaKMGT(tracker->peak_consumption());
+    size_t upper_level;
+    size_t cur_level = 1;
+    // the level equal or lower than upper_level will show in web page
+    auto iter = args.find("upper_level");
+    if (iter != args.end()) {
+        upper_level = std::stol(iter->second);
+    } else {
+        upper_level = 3;
+    }
 
-        int64_t use_count = tracker.use_count();
+    std::vector snapshots;
+    ExecEnv::GetInstance()->process_mem_tracker()->make_snapshot(&snapshots, cur_level,
+                                                                 upper_level);
+    for (const auto& item : snapshots) {
+        string limit_str = item.limit == -1 ? "none" : AccurateItoaKMGT(item.limit);
+        string current_consumption_normalize = AccurateItoaKMGT(item.cur_consumption);
+        string peak_consumption_normalize = AccurateItoaKMGT(item.peak_consumption);
         (*output) << strings::Substitute(
-                "$0$1$2"     // id, parent, limit
-                "$3$4$5\n", // current, peak
-                tracker->label(), parent, limit_str, current_consumption_str, peak_consumption_str,
-                use_count);
+                "$0$1$2$3$4$5$6$7$8\n",
+                item.level, item.label, item.parent, limit_str, item.cur_consumption,
+                current_consumption_normalize, item.peak_consumption, peak_consumption_normalize,
+                item.child_count);
     }
     (*output) << "\n";
 }
@@ -342,17 +346,15 @@ void cpu_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* out
 #endif
 }
 
-void add_default_path_handlers(WebPageHandler* web_page_handler,
-                               const std::shared_ptr& process_mem_tracker) {
+void add_default_path_handlers(WebPageHandler* web_page_handler) {
     // TODO(yingchun): logs_handler is not implemented yet, so not show it on navigate bar
     web_page_handler->register_page("/logs", "Logs", logs_handler, false /* is_on_nav_bar */);
     web_page_handler->register_page("/varz", "Configs", config_handler, true /* is_on_nav_bar */);
-    web_page_handler->register_page("/memz", "Memory",
-                                    std::bind(&mem_usage_handler, process_mem_tracker,
-                                                    std::placeholders::_1, std::placeholders::_2),
-                                    true /* is_on_nav_bar */);
-    web_page_handler->register_page("/mem_tracker", "MemTracker", mem_tracker_handler,
-                                    true /* is_on_nav_bar */);
+    web_page_handler->register_page("/memz", "Memory", mem_usage_handler, true /* is_on_nav_bar */);
+    web_page_handler->register_page(
+            "/mem_tracker", "MemTracker",
+            std::bind(&mem_tracker_handler, std::placeholders::_1, std::placeholders::_2),
+            true /* is_on_nav_bar */);
     web_page_handler->register_page("/heap", "Heap Profile", heap_handler,
                                     true /* is_on_nav_bar */);
     web_page_handler->register_page("/cpu", "CPU Profile", cpu_handler, true /* is_on_nav_bar */);
diff --git a/be/src/http/default_path_handlers.h b/be/src/http/default_path_handlers.h
index b1a233b5cd..1ff038350b 100644
--- a/be/src/http/default_path_handlers.h
+++ b/be/src/http/default_path_handlers.h
@@ -28,6 +28,5 @@ class WebPageHandler;
 
 // Adds a set of default path handlers to the webserver to display
 // logs and configuration flags
-void add_default_path_handlers(WebPageHandler* web_page_handler,
-                               const std::shared_ptr& process_mem_tracker);
+void add_default_path_handlers(WebPageHandler* web_page_handler);
 } // namespace doris
diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp
index e3d467be2a..1284af6084 100644
--- a/be/src/olap/base_compaction.cpp
+++ b/be/src/olap/base_compaction.cpp
@@ -63,6 +63,8 @@ Status BaseCompaction::execute_compact_impl() {
         return Status::OLAPInternalError(OLAP_ERR_BE_CLONE_OCCURRED);
     }
 
+    SCOPED_ATTACH_TASK(_mem_tracker.get(), ThreadContext::TaskType::COMPACTION);
+
     // 2. do base compaction, merge rowsets
     int64_t permits = get_compaction_permits();
     RETURN_NOT_OK(do_compaction(permits));
diff --git a/be/src/olap/byte_buffer.cpp b/be/src/olap/byte_buffer.cpp
index cfef30b7c1..a90638a1ce 100644
--- a/be/src/olap/byte_buffer.cpp
+++ b/be/src/olap/byte_buffer.cpp
@@ -44,7 +44,7 @@ void StorageByteBuffer::BufDeleter::operator()(char* p) {
             LOG(FATAL) << "fail to munmap: mem=" << p << ", len=" << _mmap_length
                        << ", errno=" << Errno::no() << ", errno_str=" << Errno::str();
         } else {
-            RELEASE_THREAD_LOCAL_MEM_TRACKER(_mmap_length);
+            RELEASE_THREAD_MEM_TRACKER(_mmap_length);
         }
     } else {
         delete[] p;
@@ -96,13 +96,13 @@ StorageByteBuffer* StorageByteBuffer::reference_buffer(StorageByteBuffer* refere
 
 StorageByteBuffer* StorageByteBuffer::mmap(void* start, uint64_t length, int prot, int flags,
                                            int fd, uint64_t offset) {
-    CONSUME_THREAD_LOCAL_MEM_TRACKER(length);
+    CONSUME_THREAD_MEM_TRACKER(length);
     char* memory = (char*)::mmap(start, length, prot, flags, fd, offset);
 
     if (MAP_FAILED == memory) {
         LOG(WARNING) << "fail to mmap. [errno='" << Errno::no() << "' errno_str='" << Errno::str()
                      << "']";
-        RELEASE_THREAD_LOCAL_MEM_TRACKER(length);
+        RELEASE_THREAD_MEM_TRACKER(length);
         return nullptr;
     }
 
@@ -114,7 +114,7 @@ StorageByteBuffer* StorageByteBuffer::mmap(void* start, uint64_t length, int pro
     if (nullptr == buf) {
         deleter(memory);
         LOG(WARNING) << "fail to allocate StorageByteBuffer.";
-        RELEASE_THREAD_LOCAL_MEM_TRACKER(length);
+        RELEASE_THREAD_MEM_TRACKER(length);
         return nullptr;
     }
 
@@ -135,13 +135,13 @@ StorageByteBuffer* StorageByteBuffer::mmap(FileHandler* handler, uint64_t offset
 
     size_t length = handler->length();
     int fd = handler->fd();
-    CONSUME_THREAD_LOCAL_MEM_TRACKER(length);
+    CONSUME_THREAD_MEM_TRACKER(length);
     char* memory = (char*)::mmap(nullptr, length, prot, flags, fd, offset);
 
     if (MAP_FAILED == memory) {
         LOG(WARNING) << "fail to mmap. [errno='" << Errno::no() << "' errno_str='" << Errno::str()
                      << "']";
-        RELEASE_THREAD_LOCAL_MEM_TRACKER(length);
+        RELEASE_THREAD_MEM_TRACKER(length);
         return nullptr;
     }
 
@@ -153,7 +153,7 @@ StorageByteBuffer* StorageByteBuffer::mmap(FileHandler* handler, uint64_t offset
     if (nullptr == buf) {
         deleter(memory);
         LOG(WARNING) << "fail to allocate StorageByteBuffer.";
-        RELEASE_THREAD_LOCAL_MEM_TRACKER(length);
+        RELEASE_THREAD_MEM_TRACKER(length);
         return nullptr;
     }
 
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index fa1b1ba326..ecbe803040 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -34,11 +34,10 @@ Compaction::Compaction(TabletSharedPtr tablet, const std::string& label)
           _input_row_num(0),
           _state(CompactionState::INITED) {
 #ifndef BE_TEST
-    _mem_tracker = MemTracker::create_tracker(-1, label,
-                                              StorageEngine::instance()->compaction_mem_tracker(),
-                                              MemTrackerLevel::INSTANCE);
+    _mem_tracker = std::make_unique(
+            -1, label, StorageEngine::instance()->compaction_mem_tracker());
 #else
-    _mem_tracker = MemTracker::get_process_tracker();
+    _mem_tracker = std::make_unique(-1, label);
 #endif
 }
 
diff --git a/be/src/olap/compaction.h b/be/src/olap/compaction.h
index 000b309e43..4dcb4a21b1 100644
--- a/be/src/olap/compaction.h
+++ b/be/src/olap/compaction.h
@@ -54,8 +54,6 @@ public:
     Status execute_compact();
     virtual Status execute_compact_impl() = 0;
 
-    std::shared_ptr& get_mem_tracker() { return _mem_tracker; }
-
 protected:
     virtual Status pick_rowsets_to_compact() = 0;
     virtual std::string compaction_name() const = 0;
@@ -78,7 +76,7 @@ protected:
 
 protected:
     // the root tracker for this compaction
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
 
     TabletSharedPtr _tablet;
 
diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp
index 35329e1362..9fc9fd8b62 100644
--- a/be/src/olap/cumulative_compaction.cpp
+++ b/be/src/olap/cumulative_compaction.cpp
@@ -70,6 +70,8 @@ Status CumulativeCompaction::execute_compact_impl() {
         return Status::OLAPInternalError(OLAP_ERR_CUMULATIVE_CLONE_OCCURRED);
     }
 
+    SCOPED_ATTACH_TASK(_mem_tracker.get(), ThreadContext::TaskType::COMPACTION);
+
     // 3. do cumulative compaction, merge rowsets
     int64_t permits = get_compaction_permits();
     RETURN_NOT_OK(do_compaction(permits));
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index 5a1a5c5c06..73e9d7b992 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -96,10 +96,8 @@ Status DeltaWriter::init() {
         return Status::OLAPInternalError(OLAP_ERR_TABLE_NOT_FOUND);
     }
 
-    // Only consume mem tracker manually in mem table. Using the virtual tracker can avoid
-    // frequent recursive consumption of the parent tracker, thereby improving performance.
-    _mem_tracker = MemTracker::create_virtual_tracker(
-            -1, "DeltaWriter:" + std::to_string(_tablet->tablet_id()));
+    _flushed_mem_tracker = std::make_unique(
+            fmt::format("DeltaWriter:tabletId={}", std::to_string(_tablet->tablet_id())));
     // check tablet version number
     if (_tablet->version_count() > config::max_tablet_version_num) {
         //trigger quick compaction
@@ -215,6 +213,7 @@ Status DeltaWriter::_flush_memtable_async() {
     if (++_segment_counter > config::max_segment_num_per_rowset) {
         return Status::OLAPInternalError(OLAP_ERR_TOO_MANY_SEGMENTS);
     }
+    _flushed_mem_tracker->consume(_mem_table->memory_usage());
     return _flush_token->submit(_mem_table);
 }
 
@@ -268,7 +267,7 @@ Status DeltaWriter::wait_flush() {
 void DeltaWriter::_reset_mem_table() {
     _mem_table.reset(new MemTable(_tablet->tablet_id(), _schema.get(), _tablet_schema.get(),
                                   _req.slots, _req.tuple_desc, _tablet->keys_type(),
-                                  _rowset_writer.get(), _mem_tracker, _is_vec));
+                                  _rowset_writer.get(), _flushed_mem_tracker.get(), _is_vec));
 }
 
 Status DeltaWriter::close() {
@@ -304,9 +303,6 @@ Status DeltaWriter::close_wait() {
     RETURN_NOT_OK(_flush_token->wait());
 
     _mem_table.reset();
-    // In allocate/free of mem_pool, the consume_cache of _mem_tracker will be called,
-    // and _untracked_mem must be flushed first.
-    MemTracker::memory_leak_check(_mem_tracker.get());
 
     // use rowset meta manager to save meta
     _cur_rowset = _rowset_writer->build();
@@ -340,7 +336,6 @@ Status DeltaWriter::cancel() {
         // cancel and wait all memtables in flush queue to be finished
         _flush_token->cancel();
     }
-    MemTracker::memory_leak_check(_mem_tracker.get());
     _is_cancelled = true;
     return Status::OK();
 }
@@ -355,12 +350,12 @@ int64_t DeltaWriter::get_mem_consumption_snapshot() const {
 }
 
 int64_t DeltaWriter::mem_consumption() const {
-    if (_mem_tracker == nullptr) {
+    if (_flushed_mem_tracker == nullptr) {
         // This method may be called before this writer is initialized.
-        // So _mem_tracker may be null.
+        // So _flushed_mem_tracker may be null.
         return 0;
     }
-    return _mem_tracker->consumption();
+    return _flushed_mem_tracker->consumption() + _mem_table->memory_usage();
 }
 
 int64_t DeltaWriter::partition_id() const {
diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h
index 432f0a0b76..0c11b8fcbe 100644
--- a/be/src/olap/delta_writer.h
+++ b/be/src/olap/delta_writer.h
@@ -131,7 +131,7 @@ private:
 
     StorageEngine* _storage_engine;
     std::unique_ptr _flush_token;
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _flushed_mem_tracker;
 
     // The counter of number of segment flushed already.
     int64_t _segment_counter = 0;
diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp
index b1834efac3..20c3ca476e 100644
--- a/be/src/olap/lru_cache.cpp
+++ b/be/src/olap/lru_cache.cpp
@@ -283,7 +283,7 @@ void LRUCache::_evict_one_entry(LRUHandle* e) {
 
 Cache::Handle* LRUCache::insert(const CacheKey& key, uint32_t hash, void* value, size_t charge,
                                 void (*deleter)(const CacheKey& key, void* value),
-                                CachePriority priority, MemTracker* tracker) {
+                                MemTrackerLimiter* tracker, CachePriority priority) {
     size_t handle_size = sizeof(LRUHandle) - 1 + key.size();
     LRUHandle* e = reinterpret_cast(malloc(handle_size));
     e->value = value;
@@ -300,8 +300,7 @@ Cache::Handle* LRUCache::insert(const CacheKey& key, uint32_t hash, void* value,
     memcpy(e->key_data, key.data(), key.size());
     // The memory of the parameter value should be recorded in the tls mem tracker,
     // transfer the memory ownership of the value to ShardedLRUCache::_mem_tracker.
-    if (tracker)
-        tls_ctx()->_thread_mem_tracker_mgr->mem_tracker()->transfer_to(tracker, e->total_size);
+    THREAD_MEM_TRACKER_TRANSFER_TO(e->total_size, tracker);
     LRUHandle* to_remove_head = nullptr;
     {
         std::lock_guard l(_mutex);
@@ -436,8 +435,8 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity,
           _num_shard_bits(Bits::FindLSBSetNonZero(num_shards)),
           _num_shards(num_shards),
           _shards(nullptr),
-          _last_id(1),
-          _mem_tracker(MemTracker::create_tracker(-1, name, nullptr, MemTrackerLevel::OVERVIEW)) {
+          _last_id(1) {
+    _mem_tracker = std::make_unique(-1, name);
     CHECK(num_shards > 0) << "num_shards cannot be 0";
     CHECK_EQ((num_shards & (num_shards - 1)), 0)
             << "num_shards should be power of two, but got " << num_shards;
@@ -476,8 +475,8 @@ Cache::Handle* ShardedLRUCache::insert(const CacheKey& key, void* value, size_t
                                        void (*deleter)(const CacheKey& key, void* value),
                                        CachePriority priority) {
     const uint32_t hash = _hash_slice(key);
-    return _shards[_shard(hash)]->insert(key, hash, value, charge, deleter, priority,
-                                         _mem_tracker.get());
+    return _shards[_shard(hash)]->insert(key, hash, value, charge, deleter, _mem_tracker.get(),
+                                         priority);
 }
 
 Cache::Handle* ShardedLRUCache::lookup(const CacheKey& key) {
diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h
index 4851e0b9ca..7f925bb69d 100644
--- a/be/src/olap/lru_cache.h
+++ b/be/src/olap/lru_cache.h
@@ -14,7 +14,7 @@
 #include 
 
 #include "olap/olap_common.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/thread_context.h"
 #include "util/metrics.h"
 #include "util/slice.h"
@@ -236,7 +236,7 @@ typedef struct LRUHandle {
     uint32_t refs;
     uint32_t hash; // Hash of key(); used for fast sharding and comparisons
     CachePriority priority = CachePriority::NORMAL;
-    MemTracker* mem_tracker;
+    MemTrackerLimiter* mem_tracker;
     char key_data[1]; // Beginning of key
 
     CacheKey key() const {
@@ -251,9 +251,7 @@ typedef struct LRUHandle {
 
     void free() {
         (*deleter)(key(), value);
-        if (mem_tracker)
-            mem_tracker->transfer_to(tls_ctx()->_thread_mem_tracker_mgr->mem_tracker().get(),
-                                     total_size);
+        THREAD_MEM_TRACKER_TRANSFER_FROM(total_size, mem_tracker);
         ::free(this);
     }
 
@@ -312,8 +310,8 @@ public:
     // Like Cache methods, but with an extra "hash" parameter.
     Cache::Handle* insert(const CacheKey& key, uint32_t hash, void* value, size_t charge,
                           void (*deleter)(const CacheKey& key, void* value),
-                          CachePriority priority = CachePriority::NORMAL,
-                          MemTracker* tracker = nullptr);
+                          MemTrackerLimiter* tracker,
+                          CachePriority priority = CachePriority::NORMAL);
     Cache::Handle* lookup(const CacheKey& key, uint32_t hash);
     void release(Cache::Handle* handle);
     void erase(const CacheKey& key, uint32_t hash);
@@ -388,7 +386,7 @@ private:
     LRUCache** _shards;
     std::atomic _last_id;
 
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
     std::shared_ptr _entity = nullptr;
     IntGauge* capacity = nullptr;
     IntGauge* usage = nullptr;
diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp
index 2332b8e2dd..2c3ee73357 100644
--- a/be/src/olap/memtable.cpp
+++ b/be/src/olap/memtable.cpp
@@ -31,14 +31,16 @@ namespace doris {
 
 MemTable::MemTable(int64_t tablet_id, Schema* schema, const TabletSchema* tablet_schema,
                    const std::vector* slot_descs, TupleDescriptor* tuple_desc,
-                   KeysType keys_type, RowsetWriter* rowset_writer,
-                   const std::shared_ptr& parent_tracker, bool support_vec)
+                   KeysType keys_type, RowsetWriter* rowset_writer, MemTracker* writer_mem_tracker,
+                   bool support_vec)
         : _tablet_id(tablet_id),
           _schema(schema),
           _tablet_schema(tablet_schema),
           _slot_descs(slot_descs),
           _keys_type(keys_type),
-          _mem_tracker(MemTracker::create_tracker(-1, "MemTable", parent_tracker)),
+          _mem_tracker(std::make_unique(
+                  fmt::format("MemTable:tabletId={}", std::to_string(tablet_id)))),
+          _writer_mem_tracker(writer_mem_tracker),
           _buffer_mem_pool(new MemPool(_mem_tracker.get())),
           _table_mem_pool(new MemPool(_mem_tracker.get())),
           _schema_size(_schema->schema_size()),
@@ -132,10 +134,11 @@ MemTable::~MemTable() {
         }
     }
     std::for_each(_row_in_blocks.begin(), _row_in_blocks.end(), std::default_delete());
+    _writer_mem_tracker->release(_mem_tracker->consumption());
     _mem_tracker->release(_mem_usage);
     _buffer_mem_pool->free_all();
     _table_mem_pool->free_all();
-    MemTracker::memory_leak_check(_mem_tracker.get(), true);
+    _mem_tracker->memory_leak_check();
 }
 
 MemTable::RowCursorComparator::RowCursorComparator(const Schema* schema) : _schema(schema) {}
diff --git a/be/src/olap/memtable.h b/be/src/olap/memtable.h
index 3fbfe8288e..d92e7773a9 100644
--- a/be/src/olap/memtable.h
+++ b/be/src/olap/memtable.h
@@ -22,7 +22,7 @@
 #include "common/object_pool.h"
 #include "olap/olap_define.h"
 #include "olap/skiplist.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "util/tuple_row_zorder_compare.h"
 #include "vec/aggregate_functions/aggregate_function.h"
 #include "vec/common/string_ref.h"
@@ -42,13 +42,12 @@ class MemTable {
 public:
     MemTable(int64_t tablet_id, Schema* schema, const TabletSchema* tablet_schema,
              const std::vector* slot_descs, TupleDescriptor* tuple_desc,
-             KeysType keys_type, RowsetWriter* rowset_writer,
-             const std::shared_ptr& parent_tracker, bool support_vec = false);
+             KeysType keys_type, RowsetWriter* rowset_writer, MemTracker* writer_mem_tracker,
+             bool support_vec = false);
     ~MemTable();
 
     int64_t tablet_id() const { return _tablet_id; }
     size_t memory_usage() const { return _mem_tracker->consumption(); }
-    std::shared_ptr& mem_tracker() { return _mem_tracker; }
 
     inline void insert(const Tuple* tuple) { (this->*_insert_fn)(tuple); }
     // insert tuple from (row_pos) to (row_pos+num_rows)
@@ -153,7 +152,8 @@ private:
 
     std::shared_ptr _vec_row_comparator;
 
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
+    MemTracker* _writer_mem_tracker;
     // This is a buffer, to hold the memory referenced by the rows that have not
     // been inserted into the SkipList
     std::unique_ptr _buffer_mem_pool;
diff --git a/be/src/olap/memtable_flush_executor.cpp b/be/src/olap/memtable_flush_executor.cpp
index 29300c0cda..3e722019e4 100644
--- a/be/src/olap/memtable_flush_executor.cpp
+++ b/be/src/olap/memtable_flush_executor.cpp
@@ -70,7 +70,7 @@ void FlushToken::_flush_memtable(std::shared_ptr memtable, int64_t sub
     // TODO(zxy) After rethinking the use of switch thread mem tracker, choose the appropriate way to get
     // load mem tracke here.
     // DCHECK(memtable->mem_tracker()->parent_task_mem_tracker_no_own());
-    // SCOPED_ATTACH_TASK_THREAD(ThreadContext::TaskType::LOAD,
+    // SCOPED_ATTACH_TASK(ThreadContext::TaskType::LOAD,
     //                           memtable->mem_tracker()->parent_task_mem_tracker_no_own());
 #endif
     _stats.flush_wait_time_ns += (MonotonicNanos() - submit_task_time);
diff --git a/be/src/olap/merger.cpp b/be/src/olap/merger.cpp
index c9cba8ca1d..b1c712532f 100644
--- a/be/src/olap/merger.cpp
+++ b/be/src/olap/merger.cpp
@@ -51,7 +51,7 @@ Status Merger::merge_rowsets(TabletSharedPtr tablet, ReaderType reader_type,
             "failed to init row cursor when merging rowsets of tablet " + tablet->full_name());
     row_cursor.allocate_memory_for_string_type(*cur_tablet_schema);
 
-    std::unique_ptr mem_pool(new MemPool("Merger:merge_rowsets"));
+    std::unique_ptr mem_pool(new MemPool());
 
     // The following procedure would last for long time, half of one day, etc.
     int64_t output_rows = 0;
diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp
index c1d01b9b94..fbb1811705 100644
--- a/be/src/olap/olap_server.cpp
+++ b/be/src/olap/olap_server.cpp
@@ -606,8 +606,6 @@ Status StorageEngine::_submit_compaction_task(TabletSharedPtr tablet,
                         ? _cumu_compaction_thread_pool
                         : _base_compaction_thread_pool;
         auto st = thread_pool->submit_func([=]() {
-            SCOPED_ATTACH_TASK_THREAD(ThreadContext::TaskType::COMPACTION,
-                                      tablet->get_compaction_mem_tracker(compaction_type));
             CgroupsMgr::apply_system_cgroup();
             tablet->execute_compaction(compaction_type);
             _permit_limiter.release(permits);
diff --git a/be/src/olap/page_cache.cpp b/be/src/olap/page_cache.cpp
index 2ec540b384..d4d1fce972 100644
--- a/be/src/olap/page_cache.cpp
+++ b/be/src/olap/page_cache.cpp
@@ -32,10 +32,7 @@ void StoragePageCache::create_global_cache(size_t capacity, int32_t index_cache_
 
 StoragePageCache::StoragePageCache(size_t capacity, int32_t index_cache_percentage,
                                    uint32_t num_shards)
-        : _index_cache_percentage(index_cache_percentage),
-          _mem_tracker(MemTracker::create_tracker(capacity, "StoragePageCache", nullptr,
-                                                  MemTrackerLevel::OVERVIEW)) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+        : _index_cache_percentage(index_cache_percentage) {
     if (index_cache_percentage == 0) {
         _data_page_cache = std::unique_ptr(
                 new_lru_cache("DataPageCache", capacity, LRUCacheType::SIZE, num_shards));
diff --git a/be/src/olap/page_cache.h b/be/src/olap/page_cache.h
index f03f50bd5a..c1f0b48da3 100644
--- a/be/src/olap/page_cache.h
+++ b/be/src/olap/page_cache.h
@@ -24,7 +24,7 @@
 #include "gen_cpp/segment_v2.pb.h" // for cache allocation
 #include "gutil/macros.h"          // for DISALLOW_COPY_AND_ASSIGN
 #include "olap/lru_cache.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 
 namespace doris {
 
@@ -99,8 +99,6 @@ private:
     std::unique_ptr _data_page_cache = nullptr;
     std::unique_ptr _index_page_cache = nullptr;
 
-    std::shared_ptr _mem_tracker = nullptr;
-
     Cache* _get_page_cache(segment_v2::PageTypePB page_type) {
         switch (page_type) {
         case segment_v2::DATA_PAGE: {
diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp
index 14490763cd..0fc20b370c 100644
--- a/be/src/olap/push_handler.cpp
+++ b/be/src/olap/push_handler.cpp
@@ -878,7 +878,7 @@ Status PushBrokerReader::init(const Schema* schema, const TBrokerScanRange& t_sc
     }
     _runtime_profile = _runtime_state->runtime_profile();
     _runtime_profile->set_name("PushBrokerReader");
-    _mem_pool.reset(new MemPool("PushBrokerReader"));
+    _mem_pool.reset(new MemPool());
     _counter.reset(new ScannerCounter());
 
     // init scanner
diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp
index 6a206567ab..b1da44702c 100644
--- a/be/src/olap/reader.cpp
+++ b/be/src/olap/reader.cpp
@@ -105,7 +105,7 @@ TabletReader::~TabletReader() {
 
 Status TabletReader::init(const ReaderParams& read_params) {
 #ifndef NDEBUG
-    _predicate_mem_pool.reset(new MemPool("TabletReader:" + read_params.tablet->full_name()));
+    _predicate_mem_pool.reset(new MemPool());
 #else
     _predicate_mem_pool.reset(new MemPool());
 #endif
diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h
index 22a5745ddf..9507ce28fe 100644
--- a/be/src/olap/row_block2.h
+++ b/be/src/olap/row_block2.h
@@ -28,7 +28,7 @@
 #include "olap/selection_vector.h"
 #include "olap/types.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "vec/columns/column.h"
 
 namespace doris {
diff --git a/be/src/olap/rowset/segment_v2/bitmap_index_reader.h b/be/src/olap/rowset/segment_v2/bitmap_index_reader.h
index ecce8da235..bb11172765 100644
--- a/be/src/olap/rowset/segment_v2/bitmap_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/bitmap_index_reader.h
@@ -72,7 +72,7 @@ public:
               _dict_column_iter(reader->_dict_column_reader.get()),
               _bitmap_column_iter(reader->_bitmap_column_reader.get()),
               _current_rowid(0),
-              _pool(new MemPool("BitmapIndexIterator")) {}
+              _pool(new MemPool()) {}
 
     bool has_null_bitmap() const { return _reader->_has_null; }
 
diff --git a/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp b/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp
index fea31b0a5b..610d9ea3fa 100644
--- a/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp
@@ -64,7 +64,7 @@ public:
     using MemoryIndexType = typename BitmapIndexTraits::MemoryIndexType;
 
     explicit BitmapIndexWriterImpl(const TypeInfo* type_info)
-            : _type_info(type_info), _reverted_index_size(0), _pool("BitmapIndexWriterImpl") {}
+            : _type_info(type_info), _reverted_index_size(0), _pool() {}
 
     ~BitmapIndexWriterImpl() override = default;
 
diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h
index 68b96a6044..2809fe6766 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h
@@ -69,7 +69,7 @@ public:
     explicit BloomFilterIndexIterator(BloomFilterIndexReader* reader)
             : _reader(reader),
               _bloom_filter_iter(reader->_bloom_filter_reader.get()),
-              _pool(new MemPool("BloomFilterIndexIterator")) {}
+              _pool(new MemPool()) {}
 
     // Read bloom filter at the given ordinal into `bf`.
     Status read_bloom_filter(rowid_t ordinal, std::unique_ptr* bf);
diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
index 456598cec6..62ee2ede53 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
@@ -69,7 +69,7 @@ public:
                                         const TypeInfo* type_info)
             : _bf_options(bf_options),
               _type_info(type_info),
-              _pool("BloomFilterIndexWriterImpl"),
+              _pool(),
               _has_null(false),
               _bf_buffer_size(0) {}
 
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h
index efa742200e..bcdd529ef8 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -440,7 +440,7 @@ public:
               _type_size(0),
               _precision(precision),
               _scale(scale),
-              _pool(new MemPool("DefaultValueColumnIterator")) {}
+              _pool(new MemPool()) {}
 
     Status init(const ColumnIteratorOptions& opts) override;
 
diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h
index 0a212aa103..a5c2442a24 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.h
+++ b/be/src/olap/rowset/segment_v2/column_writer.h
@@ -147,9 +147,6 @@ private:
     std::unique_ptr _field;
     bool _is_nullable;
     std::vector _null_bitmap;
-
-protected:
-    std::shared_ptr _mem_tracker;
 };
 
 class FlushPageCallback {
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
index 1339baf51a..2d2bdd620e 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
@@ -40,7 +40,7 @@ IndexedColumnWriter::IndexedColumnWriter(const IndexedColumnWriterOptions& optio
         : _options(options),
           _type_info(type_info),
           _file_writer(file_writer),
-          _mem_pool("IndexedColumnWriter"),
+          _mem_pool(),
           _num_values(0),
           _num_data_pages(0),
           _value_key_coder(nullptr),
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp
index 6810b19c99..09c29979bf 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -46,17 +46,10 @@ Status Segment::open(io::FileSystem* fs, const std::string& path, uint32_t segme
 }
 
 Segment::Segment(uint32_t segment_id, const TabletSchema* tablet_schema)
-        : _segment_id(segment_id), _tablet_schema(*tablet_schema) {
-#ifndef BE_TEST
-    _mem_tracker = MemTracker::create_virtual_tracker(
-            -1, "Segment", StorageEngine::instance()->tablet_mem_tracker());
-#else
-    _mem_tracker = MemTracker::create_virtual_tracker(-1, "Segment");
-#endif
-}
+        : _segment_id(segment_id), _tablet_schema(*tablet_schema), _meta_mem_usage(0) {}
 
 Segment::~Segment() {
-    _mem_tracker->release(_mem_tracker->consumption());
+    StorageEngine::instance()->segment_meta_mem_tracker()->release(_meta_mem_usage);
 }
 
 Status Segment::_open() {
@@ -116,7 +109,8 @@ Status Segment::_parse_footer() {
         return Status::Corruption("Bad segment file {}: file size {} < {}",
                                   _file_reader->path().native(), file_size, 12 + footer_length);
     }
-    _mem_tracker->consume(footer_length);
+    _meta_mem_usage += footer_length;
+    StorageEngine::instance()->segment_meta_mem_tracker()->consume(footer_length);
 
     std::string footer_buf;
     footer_buf.resize(footer_length);
@@ -162,7 +156,8 @@ Status Segment::_load_index() {
             DCHECK_EQ(footer.type(), SHORT_KEY_PAGE);
             DCHECK(footer.has_short_key_page_footer());
 
-            _mem_tracker->consume(body.get_size());
+            _meta_mem_usage += body.get_size();
+            StorageEngine::instance()->segment_meta_mem_tracker()->consume(body.get_size());
             _sk_index_decoder.reset(new ShortKeyIndexDecoder);
             return _sk_index_decoder->parse(body, footer.short_key_page_footer());
         }
diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h
index e2b71a05d2..462cf98db8 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -124,9 +124,7 @@ private:
     uint32_t _segment_id;
     TabletSchema _tablet_schema;
 
-    // This mem tracker is only for tracking memory use by segment meta data such as footer or index page.
-    // The memory consumed by querying is tracked in segment iterator.
-    std::shared_ptr _mem_tracker;
+    int64_t _meta_mem_usage;
     SegmentFooterPB _footer;
 
     // Map from column unique id to column ordinal in footer's ColumnMetaPB
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 50c18e6720..bad327dd1a 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -27,7 +27,7 @@
 #include "olap/rowset/segment_v2/page_io.h"
 #include "olap/schema.h"
 #include "olap/short_key_index.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "util/crc32c.h"
 #include "util/faststring.h"
 
@@ -46,8 +46,8 @@ SegmentWriter::SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id,
           _max_row_per_segment(max_row_per_segment),
           _opts(opts),
           _file_writer(file_writer),
-          _mem_tracker(MemTracker::create_virtual_tracker(
-                  -1, "SegmentWriter:Segment-" + std::to_string(segment_id))),
+          _mem_tracker(std::make_unique("SegmentWriter:Segment-" +
+                                                    std::to_string(segment_id))),
           _olap_data_convertor(tablet_schema) {
     CHECK_NOTNULL(file_writer);
     size_t num_short_key_column = _tablet_schema->num_short_key_columns();
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h
index 1c14b9909d..dc10e7a6ff 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -109,7 +109,7 @@ private:
     SegmentFooterPB _footer;
     std::unique_ptr _index_builder;
     std::vector> _column_writers;
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
     uint32_t _row_count = 0;
 
     vectorized::OlapBlockDataConvertor _olap_data_convertor;
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index edc2b8ef4c..ee9653203f 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -31,7 +31,7 @@
 #include "olap/tablet.h"
 #include "olap/types.h"
 #include "olap/wrapper_field.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "util/defer_op.h"
 #include "vec/aggregate_functions/aggregate_function.h"
 #include "vec/aggregate_functions/aggregate_function_reader.h"
@@ -1006,27 +1006,25 @@ bool RowBlockSorter::sort(RowBlock** row_block) {
 
 RowBlockAllocator::RowBlockAllocator(const TabletSchema& tablet_schema, size_t memory_limitation)
         : _tablet_schema(tablet_schema),
-          _mem_tracker(MemTracker::create_virtual_tracker(-1, "RowBlockAllocator")),
+          _tracker(std::make_unique("RowBlockAllocator")),
           _row_len(tablet_schema.row_size()),
           _memory_limitation(memory_limitation) {
     VLOG_NOTICE << "RowBlockAllocator(). row_len=" << _row_len;
 }
 
 RowBlockAllocator::~RowBlockAllocator() {
-    if (_mem_tracker->consumption() != 0) {
-        LOG(WARNING) << "memory lost in RowBlockAllocator. memory_size="
-                     << _mem_tracker->consumption();
+    if (_tracker->consumption() != 0) {
+        LOG(WARNING) << "memory lost in RowBlockAllocator. memory_size=" << _tracker->consumption();
     }
 }
 
 Status RowBlockAllocator::allocate(RowBlock** row_block, size_t num_rows, bool null_supported) {
     size_t row_block_size = _row_len * num_rows;
 
-    if (_memory_limitation > 0 &&
-        _mem_tracker->consumption() + row_block_size > _memory_limitation) {
+    if (_memory_limitation > 0 && _tracker->consumption() + row_block_size > _memory_limitation) {
         LOG(WARNING)
                 << "RowBlockAllocator::alocate() memory exceeded. "
-                << "m_memory_allocated=" << _mem_tracker->consumption() << " "
+                << "m_memory_allocated=" << _tracker->consumption() << " "
                 << "mem limit for schema change=" << _memory_limitation << " "
                 << "You can increase the memory "
                 << "by changing the Config.memory_limitation_per_thread_for_schema_change_bytes";
@@ -1046,9 +1044,9 @@ Status RowBlockAllocator::allocate(RowBlock** row_block, size_t num_rows, bool n
     row_block_info.null_supported = null_supported;
     (*row_block)->init(row_block_info);
 
-    _mem_tracker->consume(row_block_size);
+    _tracker->consume(row_block_size);
     VLOG_NOTICE << "RowBlockAllocator::allocate() this=" << this << ", num_rows=" << num_rows
-                << ", m_memory_allocated=" << _mem_tracker->consumption()
+                << ", m_memory_allocated=" << _tracker->consumption()
                 << ", row_block_addr=" << *row_block;
     return Status::OK();
 }
@@ -1059,11 +1057,11 @@ void RowBlockAllocator::release(RowBlock* row_block) {
         return;
     }
 
-    _mem_tracker->release(row_block->capacity() * _row_len);
+    _tracker->release(row_block->capacity() * _row_len);
 
     VLOG_NOTICE << "RowBlockAllocator::release() this=" << this
                 << ", num_rows=" << row_block->capacity()
-                << ", m_memory_allocated=" << _mem_tracker->consumption()
+                << ", m_memory_allocated=" << _tracker->consumption()
                 << ", row_block_addr=" << row_block;
     delete row_block;
 }
@@ -1073,7 +1071,7 @@ bool RowBlockAllocator::is_memory_enough_for_sorting(size_t num_rows, size_t all
         return true;
     }
     size_t row_block_size = _row_len * (num_rows - allocated_rows);
-    return _mem_tracker->consumption() + row_block_size < _memory_limitation;
+    return _tracker->consumption() + row_block_size < _memory_limitation;
 }
 
 RowBlockMerger::RowBlockMerger(TabletSharedPtr tablet) : _tablet(tablet) {}
@@ -1084,7 +1082,7 @@ bool RowBlockMerger::merge(const std::vector& row_block_arr, RowsetWr
                            uint64_t* merged_rows) {
     uint64_t tmp_merged_rows = 0;
     RowCursor row_cursor;
-    std::unique_ptr mem_pool(new MemPool("RowBlockMerger"));
+    std::unique_ptr mem_pool(new MemPool());
     std::unique_ptr agg_object_pool(new ObjectPool());
 
     auto merge_error = [&]() -> bool {
@@ -1366,11 +1364,10 @@ VSchemaChangeWithSorting::VSchemaChangeWithSorting(const RowBlockChanger& row_bl
         : _changer(row_block_changer),
           _memory_limitation(memory_limitation),
           _temp_delta_versions(Version::mock()) {
-    _mem_tracker = MemTracker::create_tracker(
-            config::memory_limitation_per_thread_for_schema_change_bytes,
-            fmt::format("VSchemaChangeWithSorting:changer={}",
-                        std::to_string(int64(&row_block_changer))),
-            StorageEngine::instance()->schema_change_mem_tracker(), MemTrackerLevel::TASK);
+    _mem_tracker =
+            std::make_unique(fmt::format("VSchemaChangeWithSorting:changer={}",
+                                                     std::to_string(int64(&row_block_changer))),
+                                         StorageEngine::instance()->schema_change_mem_tracker());
 }
 
 Status SchemaChangeWithSorting::_inner_process(RowsetReaderSharedPtr rowset_reader,
@@ -1590,15 +1587,19 @@ Status VSchemaChangeWithSorting::_inner_process(RowsetReaderSharedPtr rowset_rea
     rowset_reader->next_block(ref_block.get());
     while (ref_block->rows()) {
         RETURN_IF_ERROR(_changer.change_block(ref_block.get(), new_block.get()));
-        if (!_mem_tracker->try_consume(new_block->allocated_bytes())) {
+        if (!_mem_tracker->check_limit(config::memory_limitation_per_thread_for_schema_change_bytes,
+                                       new_block->allocated_bytes())) {
             RETURN_IF_ERROR(create_rowset());
 
-            if (!_mem_tracker->try_consume(new_block->allocated_bytes())) {
+            if (!_mem_tracker->check_limit(
+                        config::memory_limitation_per_thread_for_schema_change_bytes,
+                        new_block->allocated_bytes())) {
                 LOG(WARNING) << "Memory limitation is too small for Schema Change."
                              << "memory_limitation=" << _memory_limitation;
                 return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR);
             }
         }
+        _mem_tracker->consume(new_block->allocated_bytes());
 
         // move unique ptr
         blocks.push_back(
diff --git a/be/src/olap/schema_change.h b/be/src/olap/schema_change.h
index 42909d459a..81a6e48791 100644
--- a/be/src/olap/schema_change.h
+++ b/be/src/olap/schema_change.h
@@ -80,7 +80,7 @@ public:
 
 private:
     const TabletSchema& _tablet_schema;
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _tracker;
     size_t _row_len;
     size_t _memory_limitation;
 };
@@ -244,7 +244,7 @@ private:
     const RowBlockChanger& _changer;
     size_t _memory_limitation;
     Version _temp_delta_versions;
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
 };
 
 class SchemaChangeHandler {
diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp
index 6950c1f025..74cbb1aa68 100644
--- a/be/src/olap/snapshot_manager.cpp
+++ b/be/src/olap/snapshot_manager.cpp
@@ -62,7 +62,7 @@ SnapshotManager* SnapshotManager::instance() {
 
 Status SnapshotManager::make_snapshot(const TSnapshotRequest& request, string* snapshot_path,
                                       bool* allow_incremental_clone) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     Status res = Status::OK();
     if (snapshot_path == nullptr) {
         LOG(WARNING) << "output parameter cannot be null";
@@ -90,7 +90,7 @@ Status SnapshotManager::make_snapshot(const TSnapshotRequest& request, string* s
 Status SnapshotManager::release_snapshot(const string& snapshot_path) {
     // 如果请求的snapshot_path位于root/snapshot文件夹下,则认为是合法的,可以删除
     // 否则认为是非法请求,返回错误结果
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     auto stores = StorageEngine::instance()->get_stores();
     for (auto store : stores) {
         std::string abs_path;
@@ -114,7 +114,7 @@ Status SnapshotManager::release_snapshot(const string& snapshot_path) {
 
 Status SnapshotManager::convert_rowset_ids(const std::string& clone_dir, int64_t tablet_id,
                                            int64_t replica_id, const int32_t& schema_hash) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     Status res = Status::OK();
     // check clone dir existed
     if (!FileUtils::check_exist(clone_dir)) {
diff --git a/be/src/olap/snapshot_manager.h b/be/src/olap/snapshot_manager.h
index 48574f18f8..a82728207a 100644
--- a/be/src/olap/snapshot_manager.h
+++ b/be/src/olap/snapshot_manager.h
@@ -64,8 +64,7 @@ public:
 
 private:
     SnapshotManager() : _snapshot_base_id(0) {
-        _mem_tracker = MemTracker::create_tracker(-1, "SnapshotManager", nullptr,
-                                                  MemTrackerLevel::OVERVIEW);
+        _mem_tracker = std::make_unique("SnapshotManager");
     }
 
     Status _calc_snapshot_id_path(const TabletSharedPtr& tablet, int64_t timeout_s,
@@ -96,7 +95,7 @@ private:
     std::mutex _snapshot_mutex;
     uint64_t _snapshot_base_id;
 
-    std::shared_ptr _mem_tracker = nullptr;
+    std::unique_ptr _mem_tracker;
 }; // SnapshotManager
 
 } // namespace doris
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index 43d6b02ca5..0179f3f064 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -112,20 +112,16 @@ StorageEngine::StorageEngine(const EngineOptions& options)
           _is_all_cluster_id_exist(true),
           _index_stream_lru_cache(nullptr),
           _file_cache(nullptr),
-          _compaction_mem_tracker(MemTracker::create_tracker(-1, "StorageEngine::AutoCompaction",
-                                                             nullptr, MemTrackerLevel::OVERVIEW)),
-          _tablet_mem_tracker(MemTracker::create_tracker(-1, "StorageEngine::TabletHeader", nullptr,
-                                                         MemTrackerLevel::OVERVIEW)),
-          _schema_change_mem_tracker(MemTracker::create_tracker(
-                  -1, "StorageEngine::SchemaChange", nullptr, MemTrackerLevel::OVERVIEW)),
-          _storage_migration_mem_tracker(MemTracker::create_tracker(
-                  -1, "StorageEngine::StorageMigration", nullptr, MemTrackerLevel::OVERVIEW)),
-          _clone_mem_tracker(MemTracker::create_tracker(-1, "StorageEngine::Clone", nullptr,
-                                                        MemTrackerLevel::OVERVIEW)),
-          _batch_load_mem_tracker(MemTracker::create_tracker(-1, "StorageEngine::BatchLoad",
-                                                             nullptr, MemTrackerLevel::OVERVIEW)),
-          _consistency_mem_tracker(MemTracker::create_tracker(-1, "StorageEngine::Consistency",
-                                                              nullptr, MemTrackerLevel::OVERVIEW)),
+          _compaction_mem_tracker(
+                  std::make_unique(-1, "StorageEngine::AutoCompaction")),
+          _segment_meta_mem_tracker(std::make_unique("StorageEngine::SegmentMeta")),
+          _schema_change_mem_tracker(
+                  std::make_unique(-1, "StorageEngine::SchemaChange")),
+          _clone_mem_tracker(std::make_unique(-1, "StorageEngine::Clone")),
+          _batch_load_mem_tracker(
+                  std::make_unique(-1, "StorageEngine::BatchLoad")),
+          _consistency_mem_tracker(
+                  std::make_unique(-1, "StorageEngine::Consistency")),
           _stop_background_threads_latch(1),
           _tablet_manager(new TabletManager(config::tablet_map_shard_size)),
           _txn_manager(new TxnManager(config::txn_map_shard_size, config::txn_shard_size)),
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index 6ab0d5a825..1b6b28b4c4 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -179,15 +179,12 @@ public:
 
     Status get_compaction_status_json(std::string* result);
 
-    std::shared_ptr compaction_mem_tracker() { return _compaction_mem_tracker; }
-    std::shared_ptr tablet_mem_tracker() { return _tablet_mem_tracker; }
-    std::shared_ptr schema_change_mem_tracker() { return _schema_change_mem_tracker; }
-    std::shared_ptr storage_migration_mem_tracker() {
-        return _storage_migration_mem_tracker;
-    }
-    std::shared_ptr clone_mem_tracker() { return _clone_mem_tracker; }
-    std::shared_ptr batch_load_mem_tracker() { return _batch_load_mem_tracker; }
-    std::shared_ptr consistency_mem_tracker() { return _consistency_mem_tracker; }
+    MemTrackerLimiter* compaction_mem_tracker() { return _compaction_mem_tracker.get(); }
+    MemTracker* segment_meta_mem_tracker() { return _segment_meta_mem_tracker.get(); }
+    MemTrackerLimiter* schema_change_mem_tracker() { return _schema_change_mem_tracker.get(); }
+    MemTrackerLimiter* clone_mem_tracker() { return _clone_mem_tracker.get(); }
+    MemTrackerLimiter* batch_load_mem_tracker() { return _batch_load_mem_tracker.get(); }
+    MemTrackerLimiter* consistency_mem_tracker() { return _consistency_mem_tracker.get(); }
 
     // check cumulative compaction config
     void check_cumulative_compaction_config();
@@ -332,20 +329,19 @@ private:
     std::unordered_map _unused_rowsets;
 
     // Count the memory consumption of all Base and Cumulative tasks.
-    std::shared_ptr _compaction_mem_tracker;
-    // Count the memory consumption of all Segment read.
-    std::shared_ptr _tablet_mem_tracker;
+    std::unique_ptr _compaction_mem_tracker;
+    // This mem tracker is only for tracking memory use by segment meta data such as footer or index page.
+    // The memory consumed by querying is tracked in segment iterator.
+    std::unique_ptr _segment_meta_mem_tracker;
     // Count the memory consumption of all SchemaChange tasks.
-    std::shared_ptr _schema_change_mem_tracker;
-    // Count the memory consumption of all StorageMigration tasks.
-    std::shared_ptr _storage_migration_mem_tracker;
+    std::unique_ptr _schema_change_mem_tracker;
     // Count the memory consumption of all EngineCloneTask.
     // Note: Memory that does not contain make/release snapshots.
-    std::shared_ptr _clone_mem_tracker;
+    std::unique_ptr _clone_mem_tracker;
     // Count the memory consumption of all EngineBatchLoadTask.
-    std::shared_ptr _batch_load_mem_tracker;
+    std::unique_ptr _batch_load_mem_tracker;
     // Count the memory consumption of all EngineChecksumTask.
-    std::shared_ptr _consistency_mem_tracker;
+    std::unique_ptr _consistency_mem_tracker;
 
     CountDownLatch _stop_background_threads_latch;
     scoped_refptr _unused_rowset_monitor_thread;
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index bf71ed1e28..eccc08199b 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -1683,14 +1683,6 @@ Status Tablet::create_rowset(RowsetMetaSharedPtr rowset_meta, RowsetSharedPtr* r
     return RowsetFactory::create_rowset(&tablet_schema(), tablet_path(), rowset_meta, rowset);
 }
 
-std::shared_ptr& Tablet::get_compaction_mem_tracker(CompactionType compaction_type) {
-    if (compaction_type == CompactionType::CUMULATIVE_COMPACTION) {
-        return _cumulative_compaction->get_mem_tracker();
-    } else {
-        return _base_compaction->get_mem_tracker();
-    }
-}
-
 Status Tablet::cooldown() {
     std::unique_lock schema_change_lock(_schema_change_lock, std::try_to_lock);
     if (!schema_change_lock.owns_lock()) {
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 622495a12c..aa65f2110f 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -273,8 +273,6 @@ public:
         return _cumulative_compaction_policy;
     }
 
-    std::shared_ptr& get_compaction_mem_tracker(CompactionType compaction_type);
-
     inline bool all_beta() const {
         std::shared_lock rdlock(_meta_lock);
         return _tablet_meta->all_beta();
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index 1da11a5675..3ab3c3a99d 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -73,8 +73,7 @@ DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(tablet_meta_mem_consumption, MetricUnit::BYTE
                                    mem_consumption, Labels({{"type", "tablet_meta"}}));
 
 TabletManager::TabletManager(int32_t tablet_map_lock_shard_size)
-        : _mem_tracker(MemTracker::create_tracker(-1, "TabletManager", nullptr,
-                                                  MemTrackerLevel::OVERVIEW)),
+        : _mem_tracker(std::make_unique("TabletManager")),
           _tablets_shards_size(tablet_map_lock_shard_size),
           _tablets_shards_mask(tablet_map_lock_shard_size - 1) {
     CHECK_GT(_tablets_shards_size, 0);
@@ -225,7 +224,7 @@ bool TabletManager::_check_tablet_id_exist_unlocked(TTabletId tablet_id) {
 }
 
 Status TabletManager::create_tablet(const TCreateTabletReq& request, std::vector stores) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     DorisMetrics::instance()->create_tablet_requests_total->increment(1);
 
     int64_t tablet_id = request.tablet_id;
@@ -433,7 +432,7 @@ Status TabletManager::drop_tablet(TTabletId tablet_id, TReplicaId replica_id, bo
         LOG(INFO) << "tablet " << tablet_id << " is under clone, skip drop task";
         return Status::Aborted("aborted");
     }
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     return _drop_tablet_unlocked(tablet_id, replica_id, keep_files);
 }
 
@@ -486,7 +485,7 @@ Status TabletManager::_drop_tablet_unlocked(TTabletId tablet_id, TReplicaId repl
 
 Status TabletManager::drop_tablets_on_error_root_path(
         const std::vector& tablet_info_vec) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     Status res = Status::OK();
     if (tablet_info_vec.empty()) { // This is a high probability event
         return res;
@@ -698,7 +697,7 @@ Status TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tablet_
                                             TSchemaHash schema_hash, const string& meta_binary,
                                             bool update_meta, bool force, bool restore,
                                             bool check_path) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     TabletMetaSharedPtr tablet_meta(new TabletMeta());
     Status status = tablet_meta->deserialize(meta_binary);
     if (!status.ok()) {
@@ -781,7 +780,7 @@ Status TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tablet_
 Status TabletManager::load_tablet_from_dir(DataDir* store, TTabletId tablet_id,
                                            SchemaHash schema_hash, const string& schema_hash_path,
                                            bool force, bool restore) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     LOG(INFO) << "begin to load tablet from dir. "
               << " tablet_id=" << tablet_id << " schema_hash=" << schema_hash
               << " path = " << schema_hash_path << " force = " << force << " restore = " << restore;
@@ -890,7 +889,7 @@ Status TabletManager::build_all_report_tablets_info(std::map
 }
 
 Status TabletManager::start_trash_sweep() {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     {
         std::vector
                 all_tablets; // we use this vector to save all tablet ptr for saving lock time.
@@ -1009,7 +1008,7 @@ void TabletManager::unregister_clone_tablet(int64_t tablet_id) {
 void TabletManager::try_delete_unused_tablet_path(DataDir* data_dir, TTabletId tablet_id,
                                                   SchemaHash schema_hash,
                                                   const string& schema_hash_path) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     // acquire the read lock, so that there is no creating tablet or load tablet from meta tasks
     // create tablet and load tablet task should check whether the dir exists
     tablets_shard& shard = _get_tablets_shard(tablet_id);
@@ -1071,7 +1070,7 @@ void TabletManager::get_partition_related_tablets(int64_t partition_id,
 }
 
 void TabletManager::do_tablet_meta_checkpoint(DataDir* data_dir) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     std::vector related_tablets;
     {
         for (auto& tablets_shard : _tablets_shards) {
diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h
index 46aa04b71a..7b17b59ce3 100644
--- a/be/src/olap/tablet_manager.h
+++ b/be/src/olap/tablet_manager.h
@@ -199,7 +199,7 @@ private:
     };
 
     // trace the memory use by meta of tablet
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
 
     const int32_t _tablets_shards_size;
     const int32_t _tablets_shards_mask;
diff --git a/be/src/olap/task/engine_alter_tablet_task.cpp b/be/src/olap/task/engine_alter_tablet_task.cpp
index 55ba6ab6e9..ce34cac4d3 100644
--- a/be/src/olap/task/engine_alter_tablet_task.cpp
+++ b/be/src/olap/task/engine_alter_tablet_task.cpp
@@ -18,23 +18,23 @@
 #include "olap/task/engine_alter_tablet_task.h"
 
 #include "olap/schema_change.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/thread_context.h"
 
 namespace doris {
 
 EngineAlterTabletTask::EngineAlterTabletTask(const TAlterTabletReqV2& request)
         : _alter_tablet_req(request) {
-    _mem_tracker = MemTracker::create_tracker(
+    _mem_tracker = std::make_unique(
             config::memory_limitation_per_thread_for_schema_change_bytes,
             fmt::format("EngineAlterTabletTask#baseTabletId={}:newTabletId={}",
                         std::to_string(_alter_tablet_req.base_tablet_id),
                         std::to_string(_alter_tablet_req.new_tablet_id)),
-            StorageEngine::instance()->schema_change_mem_tracker(), MemTrackerLevel::TASK);
+            StorageEngine::instance()->schema_change_mem_tracker());
 }
 
 Status EngineAlterTabletTask::execute() {
-    SCOPED_ATTACH_TASK_THREAD(ThreadContext::TaskType::STORAGE, _mem_tracker);
+    SCOPED_ATTACH_TASK(_mem_tracker.get(), ThreadContext::TaskType::STORAGE);
     DorisMetrics::instance()->create_rollup_requests_total->increment(1);
 
     Status res = SchemaChangeHandler::process_alter_tablet_v2(_alter_tablet_req);
diff --git a/be/src/olap/task/engine_alter_tablet_task.h b/be/src/olap/task/engine_alter_tablet_task.h
index 7cc97395f1..b6a736b357 100644
--- a/be/src/olap/task/engine_alter_tablet_task.h
+++ b/be/src/olap/task/engine_alter_tablet_task.h
@@ -36,7 +36,7 @@ public:
 private:
     const TAlterTabletReqV2& _alter_tablet_req;
 
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
 }; // EngineTask
 
 } // namespace doris
diff --git a/be/src/olap/task/engine_batch_load_task.cpp b/be/src/olap/task/engine_batch_load_task.cpp
index fdda32f5f4..e98ad02471 100644
--- a/be/src/olap/task/engine_batch_load_task.cpp
+++ b/be/src/olap/task/engine_batch_load_task.cpp
@@ -53,17 +53,17 @@ EngineBatchLoadTask::EngineBatchLoadTask(TPushReq& push_req, std::vector(
             -1,
             fmt::format("EngineBatchLoadTask#pushType={}:tabletId={}", _push_req.push_type,
                         std::to_string(_push_req.tablet_id)),
-            StorageEngine::instance()->batch_load_mem_tracker(), MemTrackerLevel::TASK);
+            StorageEngine::instance()->batch_load_mem_tracker());
 }
 
 EngineBatchLoadTask::~EngineBatchLoadTask() {}
 
 Status EngineBatchLoadTask::execute() {
-    SCOPED_ATTACH_TASK_THREAD(ThreadContext::TaskType::STORAGE, _mem_tracker);
+    SCOPED_ATTACH_TASK(_mem_tracker.get(), ThreadContext::TaskType::STORAGE);
     Status status = Status::OK();
     if (_push_req.push_type == TPushType::LOAD || _push_req.push_type == TPushType::LOAD_V2) {
         status = _init();
diff --git a/be/src/olap/task/engine_batch_load_task.h b/be/src/olap/task/engine_batch_load_task.h
index 1f3b41a619..f0bf0dba0d 100644
--- a/be/src/olap/task/engine_batch_load_task.h
+++ b/be/src/olap/task/engine_batch_load_task.h
@@ -76,7 +76,7 @@ private:
     Status* _res_status;
     std::string _remote_file_path;
     std::string _local_file_path;
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
 }; // class EngineBatchLoadTask
 } // namespace doris
 #endif // DORIS_BE_SRC_OLAP_TASK_ENGINE_BATCH_LOAD_TASK_H
diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp
index 37efd52e1d..f5d5f317f1 100644
--- a/be/src/olap/task/engine_checksum_task.cpp
+++ b/be/src/olap/task/engine_checksum_task.cpp
@@ -26,13 +26,13 @@ namespace doris {
 EngineChecksumTask::EngineChecksumTask(TTabletId tablet_id, TSchemaHash schema_hash,
                                        TVersion version, uint32_t* checksum)
         : _tablet_id(tablet_id), _schema_hash(schema_hash), _version(version), _checksum(checksum) {
-    _mem_tracker = MemTracker::create_tracker(
+    _mem_tracker = std::make_unique(
             -1, "EngineChecksumTask#tabletId=" + std::to_string(tablet_id),
-            StorageEngine::instance()->consistency_mem_tracker(), MemTrackerLevel::TASK);
+            StorageEngine::instance()->consistency_mem_tracker());
 }
 
 Status EngineChecksumTask::execute() {
-    SCOPED_ATTACH_TASK_THREAD(ThreadContext::TaskType::STORAGE, _mem_tracker);
+    SCOPED_ATTACH_TASK(_mem_tracker.get(), ThreadContext::TaskType::STORAGE);
     return _compute_checksum();
 } // execute
 
@@ -88,7 +88,7 @@ Status EngineChecksumTask::_compute_checksum() {
     }
 
     RowCursor row;
-    std::unique_ptr mem_pool(new MemPool("EngineChecksumTask:_compute_checksum"));
+    std::unique_ptr mem_pool(new MemPool());
     std::unique_ptr agg_object_pool(new ObjectPool());
     res = row.init(tablet->tablet_schema(), reader_params.return_columns);
     if (!res.ok()) {
diff --git a/be/src/olap/task/engine_checksum_task.h b/be/src/olap/task/engine_checksum_task.h
index b96a4328dc..233c6e84f2 100644
--- a/be/src/olap/task/engine_checksum_task.h
+++ b/be/src/olap/task/engine_checksum_task.h
@@ -44,7 +44,7 @@ private:
     TSchemaHash _schema_hash;
     TVersion _version;
     uint32_t* _checksum;
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
 }; // EngineTask
 
 } // namespace doris
diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp
index 07aaf32db0..221be83850 100644
--- a/be/src/olap/task/engine_clone_task.cpp
+++ b/be/src/olap/task/engine_clone_task.cpp
@@ -57,14 +57,14 @@ EngineCloneTask::EngineCloneTask(const TCloneReq& clone_req, const TMasterInfo&
           _res_status(res_status),
           _signature(signature),
           _master_info(master_info) {
-    _mem_tracker = MemTracker::create_tracker(
+    _mem_tracker = std::make_unique(
             -1, "EngineCloneTask#tabletId=" + std::to_string(_clone_req.tablet_id),
-            StorageEngine::instance()->clone_mem_tracker(), MemTrackerLevel::TASK);
+            StorageEngine::instance()->clone_mem_tracker());
 }
 
 Status EngineCloneTask::execute() {
     // register the tablet to avoid it is deleted by gc thread during clone process
-    SCOPED_ATTACH_TASK_THREAD(ThreadContext::TaskType::STORAGE, _mem_tracker);
+    SCOPED_ATTACH_TASK(_mem_tracker.get(), ThreadContext::TaskType::STORAGE);
     StorageEngine::instance()->tablet_manager()->register_clone_tablet(_clone_req.tablet_id);
     Status st = _do_clone();
     StorageEngine::instance()->tablet_manager()->unregister_clone_tablet(_clone_req.tablet_id);
diff --git a/be/src/olap/task/engine_clone_task.h b/be/src/olap/task/engine_clone_task.h
index 604563e03f..ccb8a19581 100644
--- a/be/src/olap/task/engine_clone_task.h
+++ b/be/src/olap/task/engine_clone_task.h
@@ -79,7 +79,7 @@ private:
     const TMasterInfo& _master_info;
     int64_t _copy_size;
     int64_t _copy_time_ms;
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
 }; // EngineTask
 
 } // namespace doris
diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt
index 0d27c398a7..50ab78eda6 100644
--- a/be/src/runtime/CMakeLists.txt
+++ b/be/src/runtime/CMakeLists.txt
@@ -47,7 +47,6 @@ set(RUNTIME_FILES
     runtime_filter_mgr.cpp
     string_value.cpp
     thread_context.cpp
-    thread_mem_tracker_mgr.cpp
     thread_resource_mgr.cpp
     threadlocal.cc
     decimalv2_value.cpp
@@ -66,8 +65,6 @@ set(RUNTIME_FILES
     disk_io_mgr_reader_context.cc
     disk_io_mgr_scan_range.cc 
     buffered_block_mgr2.cc
-    mem_tracker.cpp
-    mem_tracker_task_pool.cpp
     spill_sorter.cc
     sorted_run_merger.cc
     data_stream_recvr.cc
@@ -104,6 +101,10 @@ set(RUNTIME_FILES
     mysql_result_writer.cpp
     memory/system_allocator.cpp
     memory/chunk_allocator.cpp
+    memory/mem_tracker_limiter.cpp
+    memory/mem_tracker.cpp
+    memory/mem_tracker_task_pool.cpp
+    memory/thread_mem_tracker_mgr.cpp
     fold_constant_executor.cpp
     cache/result_node.cpp
     cache/result_cache.cpp
diff --git a/be/src/runtime/buffered_block_mgr2.cc b/be/src/runtime/buffered_block_mgr2.cc
index 7b8a87b762..fa95e63c05 100644
--- a/be/src/runtime/buffered_block_mgr2.cc
+++ b/be/src/runtime/buffered_block_mgr2.cc
@@ -22,7 +22,7 @@
 
 #include "exec/exec_node.h"
 #include "runtime/exec_env.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/runtime_state.h"
 #include "runtime/tmp_file_mgr.h"
 #include "util/bit_util.h"
@@ -54,17 +54,13 @@ SpinLock BufferedBlockMgr2::_s_block_mgrs_lock;
 
 class BufferedBlockMgr2::Client {
 public:
-    Client(BufferedBlockMgr2* mgr, int num_reserved_buffers,
-           const std::shared_ptr& tracker, RuntimeState* state)
+    Client(BufferedBlockMgr2* mgr, int num_reserved_buffers, RuntimeState* state)
             : _mgr(mgr),
               _state(state),
-              _tracker(
-                      MemTracker::create_virtual_tracker(-1, "BufferedBlockMgr2::Client", tracker)),
+              _tracker(std::make_unique("BufferedBlockMgr2::Client")),
               _num_reserved_buffers(num_reserved_buffers),
               _num_tmp_reserved_buffers(0),
-              _num_pinned_buffers(0) {
-        DCHECK(tracker != nullptr);
-    }
+              _num_pinned_buffers(0) {}
 
     // A null dtor to pass codestyle check
     ~Client() {}
@@ -82,7 +78,7 @@ public:
     // enforced. Even when we give a buffer to a client, the buffer is still owned and
     // counts against the block mgr tracker (i.e. there is a fixed pool of buffers
     // regardless of if they are in the block mgr or the clients).
-    std::shared_ptr _tracker;
+    std::unique_ptr _tracker;
 
     // Number of buffers reserved by this client.
     int _num_reserved_buffers;
@@ -217,7 +213,7 @@ BufferedBlockMgr2::BufferedBlockMgr2(RuntimeState* state, TmpFileMgr* tmp_file_m
           _state(state) {}
 
 Status BufferedBlockMgr2::create(RuntimeState* state, RuntimeProfile* profile,
-                                 TmpFileMgr* tmp_file_mgr, int64_t mem_limit, int64_t block_size,
+                                 TmpFileMgr* tmp_file_mgr, int64_t block_size,
                                  std::shared_ptr* block_mgr) {
     block_mgr->reset();
     {
@@ -240,7 +236,7 @@ Status BufferedBlockMgr2::create(RuntimeState* state, RuntimeProfile* profile,
             // _s_query_to_block_mgrs[state->query_id()] = *block_mgr;
         }
     }
-    (*block_mgr)->init(state->exec_env()->disk_io_mgr(), profile, mem_limit);
+    (*block_mgr)->init(state->exec_env()->disk_io_mgr(), profile);
     return Status::OK();
 }
 
@@ -254,16 +250,17 @@ int64_t BufferedBlockMgr2::available_buffers(Client* client) const {
 int64_t BufferedBlockMgr2::remaining_unreserved_buffers() const {
     int64_t num_buffers =
             _free_io_buffers.size() + _unpinned_blocks.size() + _non_local_outstanding_writes;
-    num_buffers += _mem_tracker->spare_capacity() / max_block_size();
+    num_buffers +=
+            thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->spare_capacity() /
+            max_block_size();
     num_buffers -= _unfullfilled_reserved_buffers;
     return num_buffers;
 }
 
-Status BufferedBlockMgr2::register_client(int num_reserved_buffers,
-                                          const std::shared_ptr& tracker,
-                                          RuntimeState* state, Client** client) {
+Status BufferedBlockMgr2::register_client(int num_reserved_buffers, RuntimeState* state,
+                                          Client** client) {
     DCHECK_GE(num_reserved_buffers, 0);
-    Client* a_client = new Client(this, num_reserved_buffers, tracker, state);
+    Client* a_client = new Client(this, num_reserved_buffers, state);
     lock_guard lock(_lock);
     *client = _obj_pool.add(a_client);
     _unfullfilled_reserved_buffers += num_reserved_buffers;
@@ -303,102 +300,6 @@ bool BufferedBlockMgr2::try_acquire_tmp_reservation(Client* client, int num_buff
     return true;
 }
 
-bool BufferedBlockMgr2::consume_memory(Client* client, int64_t size) {
-    // Later, we use this interface to manage the consumption of memory of hashtable instead of ReservationTracker.
-    // So it is possible to allocate 0, which has no additional impact on the behavior of BufferedBlockMgr.
-    // The process of memory allocation still by BufferPool, Because bufferpool has done a lot of optimization in memory allocation
-    // which is better than using the new operator directly.
-    if (size == 0) return true;
-    // Workaround IMPALA-1619. Return immediately if the allocation size will cause
-    // an arithmetic overflow.
-    if (UNLIKELY(size >= (1LL << 31))) {
-        LOG(WARNING) << "Trying to allocate memory >=2GB (" << size << ")B." << get_stack_trace();
-        return false;
-    }
-    int buffers_needed = BitUtil::ceil(size, max_block_size());
-    unique_lock lock(_lock);
-    if (size < max_block_size() && _mem_tracker->try_consume(size)) {
-        // For small allocations (less than a block size), just let the allocation through.
-        client->_tracker->consume(size);
-        return true;
-    }
-
-    if (available_buffers(client) + client->_num_tmp_reserved_buffers < buffers_needed) {
-        return false;
-    }
-    Status st = _mem_tracker->try_consume(size);
-    WARN_IF_ERROR(st, "consume failed");
-    if (st) {
-        // There was still unallocated memory, don't need to recycle allocated blocks.
-        client->_tracker->consume(size);
-        return true;
-    }
-
-    // Bump up client->_num_tmp_reserved_buffers to satisfy this request. We don't want
-    // another client to grab the buffer.
-    int additional_tmp_reservations = 0;
-    if (client->_num_tmp_reserved_buffers < buffers_needed) {
-        additional_tmp_reservations = buffers_needed - client->_num_tmp_reserved_buffers;
-        client->_num_tmp_reserved_buffers += additional_tmp_reservations;
-        _unfullfilled_reserved_buffers += additional_tmp_reservations;
-    }
-
-    // Loop until we have freed enough memory.
-    // We free all the memory at the end. We don't want another component to steal the
-    // memory.
-    int buffers_acquired = 0;
-    do {
-        BufferDescriptor* buffer_desc = nullptr;
-        Status s = find_buffer(lock, &buffer_desc); // This waits on the lock.
-        if (buffer_desc == nullptr) {
-            break;
-        }
-        DCHECK(s.ok());
-        _all_io_buffers.erase(buffer_desc->all_buffers_it);
-        if (buffer_desc->block != nullptr) {
-            buffer_desc->block->_buffer_desc = nullptr;
-        }
-        delete[] buffer_desc->buffer;
-        ++buffers_acquired;
-    } while (buffers_acquired != buffers_needed);
-
-    Status status = Status::OK();
-    if (buffers_acquired == buffers_needed) {
-        status = write_unpinned_blocks();
-    }
-    // If we either couldn't acquire enough buffers or write_unpinned_blocks() failed, undo
-    // the reservation.
-    if (buffers_acquired != buffers_needed || !status.ok()) {
-        if (!status.ok()) {
-            VLOG_QUERY << "Query: " << _query_id << " write unpinned buffers failed.";
-            client->_state->log_error(status);
-        }
-        client->_num_tmp_reserved_buffers -= additional_tmp_reservations;
-        _unfullfilled_reserved_buffers -= additional_tmp_reservations;
-        _mem_tracker->release(buffers_acquired * max_block_size());
-        return false;
-    }
-
-    client->_num_tmp_reserved_buffers -= buffers_acquired;
-    _unfullfilled_reserved_buffers -= buffers_acquired;
-
-    DCHECK_GE(buffers_acquired * max_block_size(), size);
-    _mem_tracker->release(buffers_acquired * max_block_size());
-    st = _mem_tracker->try_consume(size);
-    WARN_IF_ERROR(st, "consume failed");
-    if (!st) {
-        return false;
-    }
-    client->_tracker->consume(size);
-    DCHECK(validate()) << endl << debug_internal();
-    return true;
-}
-
-void BufferedBlockMgr2::release_memory(Client* client, int64_t size) {
-    _mem_tracker->release(size);
-    client->_tracker->release(size);
-}
-
 void BufferedBlockMgr2::cancel() {
     {
         lock_guard lock(_lock);
@@ -457,9 +358,12 @@ Status BufferedBlockMgr2::get_new_block(Client* client, Block* unpin_block, Bloc
 
         if (len > 0 && len < _max_block_size) {
             DCHECK(unpin_block == nullptr);
-            Status st = client->_tracker->try_consume(len);
+            Status st =
+                    thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit(
+                            len);
             WARN_IF_ERROR(st, "get_new_block failed");
             if (st) {
+                client->_tracker->consume(len);
                 // TODO: Have a cache of unused blocks of size 'len' (0, _max_block_size)
                 uint8_t* buffer = new uint8_t[len];
                 // Descriptors for non-I/O sized buffers are deleted when the block is deleted.
@@ -591,7 +495,6 @@ BufferedBlockMgr2::~BufferedBlockMgr2() {
         _mem_tracker->release(buffer->len);
         delete[] buffer->buffer;
     }
-    _mem_tracker.reset();
 }
 
 int64_t BufferedBlockMgr2::bytes_allocated() const {
@@ -606,8 +509,8 @@ int BufferedBlockMgr2::num_reserved_buffers_remaining(Client* client) const {
     return std::max(client->_num_reserved_buffers - client->_num_pinned_buffers, 0);
 }
 
-std::shared_ptr BufferedBlockMgr2::get_tracker(Client* client) const {
-    return client->_tracker;
+MemTracker* BufferedBlockMgr2::get_tracker(Client* client) const {
+    return client->_tracker.get();
 }
 
 // TODO: It would be good if we had a sync primitive that supports is_mine() calls, see
@@ -1083,7 +986,9 @@ Status BufferedBlockMgr2::find_buffer(unique_lock& lock, BufferDescriptor
 
     // First, try to allocate a new buffer.
     if (_free_io_buffers.size() < _block_write_threshold &&
-        _mem_tracker->try_consume(_max_block_size)) {
+        thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit(
+                _max_block_size)) {
+        _mem_tracker->consume(_max_block_size);
         uint8_t* new_buffer = new uint8_t[_max_block_size];
         *buffer_desc = _obj_pool.add(new BufferDescriptor(new_buffer, _max_block_size));
         (*buffer_desc)->all_buffers_it =
@@ -1248,13 +1153,18 @@ string BufferedBlockMgr2::debug_internal() const {
        << "  Num available buffers: " << remaining_unreserved_buffers() << endl
        << "  Total pinned buffers: " << _total_pinned_buffers << endl
        << "  Unfullfilled reserved buffers: " << _unfullfilled_reserved_buffers << endl
-       << "  Remaining memory: " << _mem_tracker->spare_capacity()
-       << " (#blocks=" << (_mem_tracker->spare_capacity() / _max_block_size) << ")" << endl
+       << "  BUffer Block Mgr Used memory: " << _mem_tracker->consumption()
+       << "  Instance remaining memory: "
+       << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->spare_capacity()
+       << " (#blocks="
+       << (thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->spare_capacity() /
+           _max_block_size)
+       << ")" << endl
        << "  Block write threshold: " << _block_write_threshold;
     return ss.str();
 }
 
-void BufferedBlockMgr2::init(DiskIoMgr* io_mgr, RuntimeProfile* parent_profile, int64_t mem_limit) {
+void BufferedBlockMgr2::init(DiskIoMgr* io_mgr, RuntimeProfile* parent_profile) {
     unique_lock l(_lock);
     if (_initialized) {
         return;
@@ -1279,7 +1189,7 @@ void BufferedBlockMgr2::init(DiskIoMgr* io_mgr, RuntimeProfile* parent_profile,
     _integrity_check_timer = ADD_TIMER(_profile.get(), "TotalIntegrityCheckTime");
 
     // Create a new mem_tracker and allocate buffers.
-    _mem_tracker = MemTracker::create_virtual_tracker(mem_limit, "BufferedBlockMgr2");
+    _mem_tracker = std::make_unique("BufferedBlockMgr2");
 
     _initialized = true;
 }
diff --git a/be/src/runtime/buffered_block_mgr2.h b/be/src/runtime/buffered_block_mgr2.h
index c4b08c181b..6c7a63378f 100644
--- a/be/src/runtime/buffered_block_mgr2.h
+++ b/be/src/runtime/buffered_block_mgr2.h
@@ -283,11 +283,9 @@ public:
 
     // Create a block manager with the specified mem_limit. If a block mgr with the
     // same query id has already been created, that block mgr is returned.
-    // - mem_limit: maximum memory that will be used by the block mgr.
     // - buffer_size: maximum size of each buffer.
     static Status create(RuntimeState* state, RuntimeProfile* profile, TmpFileMgr* tmp_file_mgr,
-                         int64_t mem_limit, int64_t buffer_size,
-                         std::shared_ptr* block_mgr);
+                         int64_t buffer_size, std::shared_ptr* block_mgr);
 
     ~BufferedBlockMgr2();
 
@@ -301,8 +299,7 @@ public:
     // Buffers used by this client are reflected in tracker.
     // TODO: The fact that we allow oversubscription is problematic.
     // as the code expects the reservations to always be granted (currently not the case).
-    Status register_client(int num_reserved_buffers, const std::shared_ptr& tracker,
-                           RuntimeState* state, Client** client);
+    Status register_client(int num_reserved_buffers, RuntimeState* state, Client** client);
 
     // Clears all reservations for this client.
     void clear_reservations(Client* client);
@@ -342,20 +339,6 @@ public:
     // Dumps block mgr state. Grabs lock. If client is not nullptr, also dumps its state.
     std::string debug_string(Client* client = nullptr);
 
-    // Consumes 'size' bytes from the buffered block mgr. This is used by callers that want
-    // the memory to come from the block mgr pool (and therefore trigger spilling) but need
-    // the allocation to be more flexible than blocks. Buffer space reserved with
-    // try_acquire_tmp_reservation() may be used to fulfill the request if available. If the
-    // request is unsuccessful, that temporary buffer space is not consumed.
-    // Returns false if there was not enough memory.
-    // TODO: this is added specifically to support the Buckets structure in the hash table
-    // which does not map well to Blocks. Revisit this.
-    bool consume_memory(Client* client, int64_t size);
-
-    // All successful allocates bytes from consume_memory() must have a corresponding
-    // release_memory() call.
-    void release_memory(Client* client, int64_t size);
-
     // The number of buffers available for client. That is, if all other clients were
     // stopped, the number of buffers this client could get.
     int64_t available_buffers(Client* client) const;
@@ -373,7 +356,8 @@ public:
 
     int num_pinned_buffers(Client* client) const;
     int num_reserved_buffers_remaining(Client* client) const;
-    std::shared_ptr get_tracker(Client* client) const;
+    MemTracker* get_tracker(Client* client) const;
+    MemTracker* mem_tracker() const { return _mem_tracker.get(); }
     int64_t max_block_size() const {
         { return _max_block_size; }
     }
@@ -408,7 +392,7 @@ private:
     BufferedBlockMgr2(RuntimeState* state, TmpFileMgr* tmp_file_mgr, int64_t block_size);
 
     // Initializes the block mgr. Idempotent and thread-safe.
-    void init(DiskIoMgr* io_mgr, RuntimeProfile* profile, int64_t mem_limit);
+    void init(DiskIoMgr* io_mgr, RuntimeProfile* profile);
 
     // Initializes _tmp_files. This is initialized the first time we need to write to disk.
     // Must be called with _lock taken.
@@ -509,7 +493,7 @@ private:
     ObjectPool _obj_pool;
 
     // Track buffers allocated by the block manager.
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
 
     // The temporary file manager used to allocate temporary file space.
     TmpFileMgr* _tmp_file_mgr;
diff --git a/be/src/runtime/bufferpool/buffer_allocator.cc b/be/src/runtime/bufferpool/buffer_allocator.cc
index 34067541e1..0c59dd0d9f 100644
--- a/be/src/runtime/bufferpool/buffer_allocator.cc
+++ b/be/src/runtime/bufferpool/buffer_allocator.cc
@@ -194,8 +194,7 @@ BufferPool::BufferAllocator::BufferAllocator(BufferPool* pool, int64_t min_buffe
           clean_page_bytes_remaining_(clean_page_bytes_limit),
           per_core_arenas_(CpuInfo::get_max_num_cores()),
           max_scavenge_attempts_(MAX_SCAVENGE_ATTEMPTS),
-          _mem_tracker(MemTracker::create_virtual_tracker(-1, "BufferAllocator", nullptr,
-                                                          MemTrackerLevel::OVERVIEW)) {
+          _mem_tracker(std::make_unique("BufferAllocator")) {
     DCHECK(BitUtil::IsPowerOf2(min_buffer_len_)) << min_buffer_len_;
     DCHECK(BitUtil::IsPowerOf2(max_buffer_len_)) << max_buffer_len_;
     DCHECK_LE(0, min_buffer_len_);
@@ -305,7 +304,7 @@ Status BufferPool::BufferAllocator::AllocateInternal(int64_t len, BufferHandle*
         system_bytes_remaining_.fetch_add(len, std::memory_order_release);
         return status;
     }
-    _mem_tracker->consume_cache(len);
+    _mem_tracker->consume(len);
     return Status::OK();
 }
 
@@ -380,7 +379,7 @@ void BufferPool::BufferAllocator::Free(BufferHandle&& handle) {
     FreeBufferArena* arena = per_core_arenas_[handle.home_core_].get();
     handle.Poison();
     if (!arena->AddFreeBuffer(std::move(handle))) {
-        _mem_tracker->release_cache(handle.len());
+        _mem_tracker->release(handle.len());
     }
 }
 
@@ -432,7 +431,7 @@ int64_t BufferPool::BufferAllocator::FreeToSystem(std::vector&& bu
         buffer.Unpoison();
         system_allocator_->Free(std::move(buffer));
     }
-    _mem_tracker->release_cache(bytes_freed);
+    _mem_tracker->release(bytes_freed);
     return bytes_freed;
 }
 
diff --git a/be/src/runtime/bufferpool/buffer_allocator.h b/be/src/runtime/bufferpool/buffer_allocator.h
index 122e00e57b..cf2a0f741e 100644
--- a/be/src/runtime/bufferpool/buffer_allocator.h
+++ b/be/src/runtime/bufferpool/buffer_allocator.h
@@ -19,7 +19,7 @@
 
 #include "runtime/bufferpool/buffer_pool_internal.h"
 #include "runtime/bufferpool/free_list.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "util/aligned_new.h"
 
 namespace doris {
@@ -236,6 +236,6 @@ private:
     /// but is guaranteed to succeed.
     int max_scavenge_attempts_;
 
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
 };
 } // namespace doris
diff --git a/be/src/runtime/bufferpool/buffer_pool.cc b/be/src/runtime/bufferpool/buffer_pool.cc
index 3ff0a2e10e..eda74e3530 100644
--- a/be/src/runtime/bufferpool/buffer_pool.cc
+++ b/be/src/runtime/bufferpool/buffer_pool.cc
@@ -115,13 +115,12 @@ BufferPool::BufferPool(int64_t min_buffer_len, int64_t buffer_bytes_limit,
 BufferPool::~BufferPool() {}
 
 Status BufferPool::RegisterClient(const string& name, ReservationTracker* parent_reservation,
-                                  const std::shared_ptr& mem_tracker,
                                   int64_t reservation_limit, RuntimeProfile* profile,
                                   ClientHandle* client) {
     DCHECK(!client->is_registered());
     DCHECK(parent_reservation != nullptr);
     client->impl_ = new Client(this, //file_group,
-                               name, parent_reservation, mem_tracker, reservation_limit, profile);
+                               name, parent_reservation, reservation_limit, profile);
     return Status::OK();
 }
 
@@ -347,7 +346,7 @@ bool BufferPool::ClientHandle::has_unpinned_pages() const {
 
 BufferPool::SubReservation::SubReservation(ClientHandle* client) {
     tracker_.reset(new ReservationTracker);
-    tracker_->InitChildTracker(nullptr, client->impl_->reservation(), nullptr,
+    tracker_->InitChildTracker(nullptr, client->impl_->reservation(),
                                numeric_limits::max());
 }
 
@@ -368,7 +367,6 @@ void BufferPool::SubReservation::Close() {
 
 BufferPool::Client::Client(BufferPool* pool, //TmpFileMgr::FileGroup* file_group,
                            const string& name, ReservationTracker* parent_reservation,
-                           const std::shared_ptr& mem_tracker,
                            int64_t reservation_limit, RuntimeProfile* profile)
         : pool_(pool),
           //file_group_(file_group),
@@ -378,7 +376,7 @@ BufferPool::Client::Client(BufferPool* pool, //TmpFileMgr::FileGroup* file_group
           buffers_allocated_bytes_(0) {
     // Set up a child profile with buffer pool info.
     RuntimeProfile* child_profile = profile->create_child("Buffer pool", true, true);
-    reservation_.InitChildTracker(child_profile, parent_reservation, nullptr, reservation_limit);
+    reservation_.InitChildTracker(child_profile, parent_reservation, reservation_limit);
     counters_.alloc_time = ADD_TIMER(child_profile, "AllocTime");
     counters_.cumulative_allocations =
             ADD_COUNTER(child_profile, "CumulativeAllocations", TUnit::UNIT);
diff --git a/be/src/runtime/bufferpool/buffer_pool.h b/be/src/runtime/bufferpool/buffer_pool.h
index dd13fde8ae..9a378934a2 100644
--- a/be/src/runtime/bufferpool/buffer_pool.h
+++ b/be/src/runtime/bufferpool/buffer_pool.h
@@ -172,8 +172,8 @@ public:
     /// 'reservation_limit' and associated with MemTracker 'mem_tracker'. The initial
     /// reservation is 0 bytes.
     Status RegisterClient(const std::string& name, ReservationTracker* parent_reservation,
-                          const std::shared_ptr& mem_tracker, int64_t reservation_limit,
-                          RuntimeProfile* profile, ClientHandle* client) WARN_UNUSED_RESULT;
+                          int64_t reservation_limit, RuntimeProfile* profile,
+                          ClientHandle* client) WARN_UNUSED_RESULT;
 
     /// Deregister 'client' if it is registered. All pages must be destroyed and buffers
     /// must be freed for the client before calling this. Releases any reservation that
diff --git a/be/src/runtime/bufferpool/buffer_pool_internal.h b/be/src/runtime/bufferpool/buffer_pool_internal.h
index 705c7f2f1d..08549cc947 100644
--- a/be/src/runtime/bufferpool/buffer_pool_internal.h
+++ b/be/src/runtime/bufferpool/buffer_pool_internal.h
@@ -133,8 +133,7 @@ class BufferPool::Client {
 public:
     Client(BufferPool* pool, //TmpFileMgr::FileGroup* file_group,
            const std::string& name, ReservationTracker* parent_reservation,
-           const std::shared_ptr& mem_tracker, int64_t reservation_limit,
-           RuntimeProfile* profile);
+           int64_t reservation_limit, RuntimeProfile* profile);
 
     ~Client() {
         DCHECK_EQ(0, num_pages_);
diff --git a/be/src/runtime/bufferpool/reservation_tracker.cc b/be/src/runtime/bufferpool/reservation_tracker.cc
index 405e75eadb..6bcd0bf38a 100644
--- a/be/src/runtime/bufferpool/reservation_tracker.cc
+++ b/be/src/runtime/bufferpool/reservation_tracker.cc
@@ -23,7 +23,8 @@
 #include "common/object_pool.h"
 #include "gutil/strings/substitute.h"
 #include "olap/utils.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker_limiter.h"
+#include "runtime/thread_context.h"
 #include "util/dummy_runtime_profile.h"
 #include "util/runtime_profile.h"
 
@@ -53,7 +54,7 @@ void ReservationTracker::InitRootTracker(RuntimeProfile* profile, int64_t reserv
 }
 
 void ReservationTracker::InitChildTracker(RuntimeProfile* profile, ReservationTracker* parent,
-                                          MemTracker* mem_tracker, int64_t reservation_limit) {
+                                          int64_t reservation_limit) {
     DCHECK(parent != nullptr);
     DCHECK_GE(reservation_limit, 0);
 
@@ -72,10 +73,7 @@ void ReservationTracker::InitChildTracker(RuntimeProfile* profile, ReservationTr
         MemTracker* parent_mem_tracker = GetParentMemTracker();
         if (parent_mem_tracker != nullptr) {
             // Make sure the parent links of the MemTrackers correspond to our parent links.
-            DCHECK_EQ(parent_mem_tracker, mem_tracker_->parent().get());
-            // Make sure we don't have a lower limit than the ancestor, since we don't enforce
-            // limits at lower links.
-            DCHECK_EQ(mem_tracker_->get_lowest_limit(), parent_mem_tracker->get_lowest_limit());
+            DCHECK_EQ(parent_mem_tracker, mem_tracker_->parent());
         } else {
             // Make sure we didn't leave a gap in the links. E.g. this tracker's grandparent
             // shouldn't have a MemTracker.
@@ -185,14 +183,16 @@ bool ReservationTracker::TryConsumeFromMemTracker(int64_t reservation_increase)
     if (GetParentMemTracker() == nullptr) {
         // At the topmost link, which may be a MemTracker with a limit, we need to use
         // TryConsume() to check the limit.
-        Status st = mem_tracker_->try_consume(reservation_increase);
+        Status st = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit(
+                reservation_increase);
         WARN_IF_ERROR(st, "TryConsumeFromMemTracker failed");
+        mem_tracker_->consume(reservation_increase);
         return st.ok();
     } else {
         // For lower links, there shouldn't be a limit to enforce, so we just need to
         // update the consumption of the linked MemTracker since the reservation is
         // already reflected in its parent.
-        mem_tracker_->consume_local(reservation_increase, GetParentMemTracker());
+        mem_tracker_->consume(reservation_increase);
         return true;
     }
 }
@@ -200,11 +200,7 @@ bool ReservationTracker::TryConsumeFromMemTracker(int64_t reservation_increase)
 void ReservationTracker::ReleaseToMemTracker(int64_t reservation_decrease) {
     DCHECK_GE(reservation_decrease, 0);
     if (mem_tracker_ == nullptr) return;
-    if (GetParentMemTracker() == nullptr) {
-        mem_tracker_->release(reservation_decrease);
-    } else {
-        mem_tracker_->release_local(reservation_decrease, GetParentMemTracker());
-    }
+    mem_tracker_->release(reservation_decrease);
 }
 
 void ReservationTracker::DecreaseReservation(int64_t bytes, bool is_child_reservation) {
@@ -277,9 +273,6 @@ bool ReservationTracker::TransferReservationTo(ReservationTracker* other, int64_
     // so this is all atomic.
     for (ReservationTracker* tracker : other_path_to_common) {
         tracker->UpdateReservation(bytes);
-        // We don't handle MemTrackers with limit in this function - this should always
-        // succeed.
-        DCHECK(tracker->mem_tracker_ == nullptr || !tracker->mem_tracker_->has_limit());
         bool success = tracker->TryConsumeFromMemTracker(bytes);
         DCHECK(success);
         if (tracker != other_path_to_common[0]) tracker->child_reservations_ += bytes;
diff --git a/be/src/runtime/bufferpool/reservation_tracker.h b/be/src/runtime/bufferpool/reservation_tracker.h
index 0f01d55d02..80408aa6eb 100644
--- a/be/src/runtime/bufferpool/reservation_tracker.h
+++ b/be/src/runtime/bufferpool/reservation_tracker.h
@@ -101,7 +101,7 @@ public:
     /// 'reservation_limit' is the maximum reservation for this tracker in bytes.
     /// if 'profile' is not nullptr, the counters in 'counters_' are added to 'profile'.
     void InitChildTracker(RuntimeProfile* profile, ReservationTracker* parent,
-                          MemTracker* mem_tracker, int64_t reservation_limit);
+                          int64_t reservation_limit);
 
     /// If the tracker is initialized, deregister the ReservationTracker from its parent,
     /// relinquishing all this tracker's reservation. All of the reservation must be unused
diff --git a/be/src/runtime/bufferpool/system_allocator.cc b/be/src/runtime/bufferpool/system_allocator.cc
index 7cc59e67ab..9a576d9488 100644
--- a/be/src/runtime/bufferpool/system_allocator.cc
+++ b/be/src/runtime/bufferpool/system_allocator.cc
@@ -77,11 +77,11 @@ Status SystemAllocator::AllocateViaMMap(int64_t len, uint8_t** buffer_mem) {
         // Map an extra huge page so we can fix up the alignment if needed.
         map_len += HUGE_PAGE_SIZE;
     }
-    CONSUME_THREAD_LOCAL_MEM_TRACKER(map_len);
+    CONSUME_THREAD_MEM_TRACKER(map_len);
     uint8_t* mem = reinterpret_cast(
             mmap(nullptr, map_len, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
     if (mem == MAP_FAILED) {
-        RELEASE_THREAD_LOCAL_MEM_TRACKER(map_len);
+        RELEASE_THREAD_MEM_TRACKER(map_len);
         return Status::BufferAllocFailed("mmap failed");
     }
 
@@ -93,12 +93,12 @@ Status SystemAllocator::AllocateViaMMap(int64_t len, uint8_t** buffer_mem) {
         if (misalignment != 0) {
             uintptr_t fixup = HUGE_PAGE_SIZE - misalignment;
             munmap(mem, fixup);
-            RELEASE_THREAD_LOCAL_MEM_TRACKER(fixup);
+            RELEASE_THREAD_MEM_TRACKER(fixup);
             mem += fixup;
             map_len -= fixup;
         }
         munmap(mem + len, map_len - len);
-        RELEASE_THREAD_LOCAL_MEM_TRACKER(map_len - len);
+        RELEASE_THREAD_MEM_TRACKER(map_len - len);
         DCHECK_EQ(reinterpret_cast(mem) % HUGE_PAGE_SIZE, 0) << mem;
         // Mark the buffer as a candidate for promotion to huge pages. The Linux Transparent
         // Huge Pages implementation will try to back the memory with a huge page if it is
@@ -147,7 +147,7 @@ Status SystemAllocator::AllocateViaMalloc(int64_t len, uint8_t** buffer_mem) {
 void SystemAllocator::Free(BufferPool::BufferHandle&& buffer) {
     if (config::mmap_buffers) {
         int rc = munmap(buffer.data(), buffer.len());
-        RELEASE_THREAD_LOCAL_MEM_TRACKER(buffer.len());
+        RELEASE_THREAD_MEM_TRACKER(buffer.len());
         DCHECK_EQ(rc, 0) << "Unexpected munmap() error: " << errno;
     } else {
         bool use_huge_pages = buffer.len() % HUGE_PAGE_SIZE == 0 && config::madvise_huge_pages;
diff --git a/be/src/runtime/data_stream_recvr.cc b/be/src/runtime/data_stream_recvr.cc
index 5fb2981993..a89c39cea9 100644
--- a/be/src/runtime/data_stream_recvr.cc
+++ b/be/src/runtime/data_stream_recvr.cc
@@ -205,7 +205,7 @@ void DataStreamRecvr::SenderQueue::add_batch(const PRowBatch& pb_batch, int be_n
                                              ::google::protobuf::Closure** done) {
     // Avoid deadlock when calling SenderQueue::cancel() in tcmalloc hook,
     // limit memory via DataStreamRecvr::exceeds_limit.
-    STOP_CHECK_LIMIT_THREAD_LOCAL_MEM_TRACKER();
+    STOP_CHECK_THREAD_MEM_TRACKER_LIMIT();
     lock_guard l(_lock);
     if (_is_cancelled) {
         return;
@@ -277,7 +277,7 @@ void DataStreamRecvr::SenderQueue::add_batch(const PRowBatch& pb_batch, int be_n
 void DataStreamRecvr::SenderQueue::add_batch(RowBatch* batch, bool use_move) {
     // Avoid deadlock when calling SenderQueue::cancel() in tcmalloc hook,
     // limit memory via DataStreamRecvr::exceeds_limit.
-    STOP_CHECK_LIMIT_THREAD_LOCAL_MEM_TRACKER();
+    STOP_CHECK_THREAD_MEM_TRACKER_LIMIT();
     unique_lock l(_lock);
     if (_is_cancelled) {
         return;
@@ -371,7 +371,7 @@ void DataStreamRecvr::SenderQueue::close() {
 
 Status DataStreamRecvr::create_merger(const TupleRowComparator& less_than) {
     DCHECK(_is_merging);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     vector child_input_batch_suppliers;
     // Create the merger that will a single stream of sorted rows.
     _merger.reset(new SortedRunMerger(less_than, &_row_desc, _profile, false));
@@ -387,7 +387,7 @@ Status DataStreamRecvr::create_merger(const TupleRowComparator& less_than) {
 Status DataStreamRecvr::create_parallel_merger(const TupleRowComparator& less_than,
                                                uint32_t batch_size) {
     DCHECK(_is_merging);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     vector child_input_batch_suppliers;
 
     // Create the merger that will a single stream of sorted rows.
@@ -433,7 +433,7 @@ void DataStreamRecvr::transfer_all_resources(RowBatch* transfer_batch) {
     // _child_mergers is not empty, means use parallel merge need transfer resource from
     // _sender queue.
     // the need transfer resources from child_merger input_row_batch
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     if (!_child_mergers.empty()) {
         _merger->transfer_all_resources(transfer_batch);
     } else {
@@ -459,8 +459,7 @@ DataStreamRecvr::DataStreamRecvr(
           _num_buffered_bytes(0),
           _profile(profile),
           _sub_plan_query_statistics_recvr(sub_plan_query_statistics_recvr) {
-    _mem_tracker = MemTracker::create_tracker(-1, "DataStreamRecvr", nullptr,
-                                              MemTrackerLevel::VERBOSE, _profile);
+    _mem_tracker = std::make_unique("DataStreamRecvr", nullptr, _profile);
 
     // Create one queue per sender if is_merging is true.
     int num_queues = is_merging ? num_senders : 1;
@@ -482,20 +481,20 @@ DataStreamRecvr::DataStreamRecvr(
 
 Status DataStreamRecvr::get_next(RowBatch* output_batch, bool* eos) {
     DCHECK(_merger.get() != nullptr);
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     return _merger->get_next(output_batch, eos);
 }
 
 void DataStreamRecvr::add_batch(const PRowBatch& batch, int sender_id, int be_number,
                                 int64_t packet_seq, ::google::protobuf::Closure** done) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     int use_sender_id = _is_merging ? sender_id : 0;
     // Add all batches to the same queue if _is_merging is false.
     _sender_queues[use_sender_id]->add_batch(batch, be_number, packet_seq, done);
 }
 
 void DataStreamRecvr::add_batch(RowBatch* batch, int sender_id, bool use_move) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     int use_sender_id = _is_merging ? sender_id : 0;
     _sender_queues[use_sender_id]->add_batch(batch, use_move);
 }
diff --git a/be/src/runtime/data_stream_recvr.h b/be/src/runtime/data_stream_recvr.h
index 70a769dae0..efb036b5dd 100644
--- a/be/src/runtime/data_stream_recvr.h
+++ b/be/src/runtime/data_stream_recvr.h
@@ -102,7 +102,6 @@ public:
     const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; }
     PlanNodeId dest_node_id() const { return _dest_node_id; }
     const RowDescriptor& row_desc() const { return _row_desc; }
-    const std::shared_ptr& mem_tracker() const { return _mem_tracker; }
 
     void add_sub_plan_statistics(const PQueryStatistics& statistics, int sender_id) {
         _sub_plan_query_statistics_recvr->insert(statistics, sender_id);
@@ -157,7 +156,7 @@ private:
     std::atomic _num_buffered_bytes;
 
     // Memtracker for batches in the sender queue(s).
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
 
     // One or more queues of row batches received from senders. If _is_merging is true,
     // there is one SenderQueue for each sender. Otherwise, row batches from all senders
diff --git a/be/src/runtime/data_stream_sender.cpp b/be/src/runtime/data_stream_sender.cpp
index ca27660bfb..2731587e45 100644
--- a/be/src/runtime/data_stream_sender.cpp
+++ b/be/src/runtime/data_stream_sender.cpp
@@ -36,7 +36,7 @@
 #include "runtime/descriptors.h"
 #include "runtime/dpp_sink_internal.h"
 #include "runtime/exec_env.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/raw_value.h"
 #include "runtime/row_batch.h"
 #include "runtime/runtime_state.h"
@@ -402,10 +402,9 @@ Status DataStreamSender::prepare(RuntimeState* state) {
           << "])";
     _profile = _pool->add(new RuntimeProfile(title.str()));
     SCOPED_TIMER(_profile->total_time_counter());
-    _mem_tracker = MemTracker::create_tracker(
-            -1, "DataStreamSender:" + print_id(state->fragment_instance_id()), nullptr,
-            MemTrackerLevel::VERBOSE, _profile);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    _mem_tracker = std::make_unique(
+            "DataStreamSender:" + print_id(state->fragment_instance_id()), nullptr, _profile);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
 
     if (_part_type == TPartitionType::UNPARTITIONED || _part_type == TPartitionType::RANDOM) {
         std::random_device rd;
@@ -413,11 +412,11 @@ Status DataStreamSender::prepare(RuntimeState* state) {
         shuffle(_channels.begin(), _channels.end(), g);
     } else if (_part_type == TPartitionType::HASH_PARTITIONED ||
                _part_type == TPartitionType::BUCKET_SHFFULE_HASH_PARTITIONED) {
-        RETURN_IF_ERROR(Expr::prepare(_partition_expr_ctxs, state, _row_desc, _expr_mem_tracker));
+        RETURN_IF_ERROR(Expr::prepare(_partition_expr_ctxs, state, _row_desc));
     } else {
-        RETURN_IF_ERROR(Expr::prepare(_partition_expr_ctxs, state, _row_desc, _expr_mem_tracker));
+        RETURN_IF_ERROR(Expr::prepare(_partition_expr_ctxs, state, _row_desc));
         for (auto iter : _partition_infos) {
-            RETURN_IF_ERROR(iter->prepare(state, _row_desc, _expr_mem_tracker));
+            RETURN_IF_ERROR(iter->prepare(state, _row_desc));
         }
     }
 
@@ -446,7 +445,7 @@ DataStreamSender::~DataStreamSender() {
 
 Status DataStreamSender::open(RuntimeState* state) {
     DCHECK(state != nullptr);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     RETURN_IF_ERROR(Expr::open(_partition_expr_ctxs, state));
     for (auto iter : _partition_infos) {
         RETURN_IF_ERROR(iter->open(state));
@@ -456,7 +455,7 @@ Status DataStreamSender::open(RuntimeState* state) {
 
 Status DataStreamSender::send(RuntimeState* state, RowBatch* batch) {
     SCOPED_TIMER(_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
 
     // Unpartition or _channel size
     if (_part_type == TPartitionType::UNPARTITIONED || _channels.size() == 1) {
diff --git a/be/src/runtime/data_stream_sender.h b/be/src/runtime/data_stream_sender.h
index 3e743bc007..8d00cdbbe3 100644
--- a/be/src/runtime/data_stream_sender.h
+++ b/be/src/runtime/data_stream_sender.h
@@ -224,7 +224,7 @@ protected:
 
     RuntimeProfile* _profile; // Allocated from _pool
     PRowBatch* _cur_pb_batch;
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
     ObjectPool* _pool;
     // Sender instance id, unique within a fragment.
     int _sender_id;
diff --git a/be/src/runtime/disk_io_mgr.cc b/be/src/runtime/disk_io_mgr.cc
index c0bbea36e0..f2c68b1e99 100644
--- a/be/src/runtime/disk_io_mgr.cc
+++ b/be/src/runtime/disk_io_mgr.cc
@@ -219,8 +219,6 @@ void DiskIoMgr::BufferDescriptor::reset(RequestContext* reader, ScanRange* range
     _len = 0;
     _eosr = false;
     _status = Status::OK();
-    // Consume in the tls mem tracker when the buffer is allocated.
-    _buffer_mem_tracker = tls_ctx()->_thread_mem_tracker_mgr->mem_tracker().get();
 }
 
 void DiskIoMgr::BufferDescriptor::return_buffer() {
@@ -228,26 +226,6 @@ void DiskIoMgr::BufferDescriptor::return_buffer() {
     _io_mgr->return_buffer(this);
 }
 
-void DiskIoMgr::BufferDescriptor::update_mem_tracker(MemTracker* tracker) {
-    // Cached buffers don't count towards mem usage.
-    if (_scan_range->_cached_buffer != nullptr) {
-        return;
-    }
-    if (_buffer_mem_tracker == tracker) {
-        return;
-    }
-    // Only when the current tracker of desc and the parameter tracker are not null,
-    // the memory ownership will be transferred.
-    DCHECK(_buffer_mem_tracker && tracker);
-    _buffer_mem_tracker->transfer_to(tracker, _buffer_len);
-    _buffer_mem_tracker = std::move(tracker);
-}
-
-void DiskIoMgr::BufferDescriptor::set_mem_tracker(MemTracker* tracker) {
-    DCHECK(!_buffer_mem_tracker);
-    _buffer_mem_tracker = std::move(tracker);
-}
-
 DiskIoMgr::WriteRange::WriteRange(const string& file, int64_t file_offset, int disk_id,
                                   WriteDoneCallback callback) {
     _file = file;
@@ -284,8 +262,6 @@ DiskIoMgr::DiskIoMgr()
 //         std::min((uint64_t)config::max_cached_file_handles, FileSystemUtil::max_num_file_handles()),
 //         &HdfsCachedFileHandle::release) {
 {
-    _mem_tracker = MemTracker::create_tracker(-1, "DiskIO", nullptr, MemTrackerLevel::OVERVIEW);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
     int64_t max_buffer_size_scaled = bit_ceil(_max_buffer_size, _min_buffer_size);
     _free_buffers.resize(bit_log2(max_buffer_size_scaled) + 1);
     int num_local_disks = (config::num_disks == 0 ? DiskInfo::num_disks() : config::num_disks);
@@ -306,8 +282,6 @@ DiskIoMgr::DiskIoMgr(int num_local_disks, int threads_per_disk, int min_buffer_s
 // _file_handle_cache(::min(config::max_cached_file_handles,
 //             FileSystemUtil::max_num_file_handles()), &HdfsCachedFileHandle::release) {
 {
-    _mem_tracker = MemTracker::create_tracker(-1, "DiskIO", nullptr, MemTrackerLevel::OVERVIEW);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
     int64_t max_buffer_size_scaled = bit_ceil(_max_buffer_size, _min_buffer_size);
     _free_buffers.resize(bit_log2(max_buffer_size_scaled) + 1);
     if (num_local_disks == 0) {
@@ -373,13 +347,12 @@ DiskIoMgr::~DiskIoMgr() {
 }
 
 Status DiskIoMgr::init(const int64_t mem_limit) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
-    _mem_tracker->set_limit(mem_limit);
+    _mem_tracker = std::make_unique(mem_limit, "DiskIO");
     // If we hit the process limit, see if we can reclaim some memory by removing
     // previously allocated (but unused) io buffers.
     // TODO(zxy) After clearing the free buffer, how much impact will it have on subsequent
     // queries may need to be verified.
-    MemTracker::get_process_tracker()->add_gc_function(
+    ExecEnv::GetInstance()->process_mem_tracker()->add_gc_function(
             std::bind(&DiskIoMgr::gc_io_buffers, this, std::placeholders::_1));
 
     for (int i = 0; i < _disk_queues.size(); ++i) {
@@ -401,7 +374,7 @@ Status DiskIoMgr::init(const int64_t mem_limit) {
             // _disk_thread_group.AddThread(new Thread("disk-io-mgr", ss.str(),
             //             &DiskIoMgr::work_loop, this, _disk_queues[i]));
             _disk_thread_group.add_thread(new std::thread(
-                    std::bind(&DiskIoMgr::work_loop, this, _disk_queues[i], _mem_tracker)));
+                    std::bind(&DiskIoMgr::work_loop, this, _disk_queues[i], _mem_tracker.get())));
         }
     }
     _request_context_cache.reset(new RequestContextCache(this));
@@ -418,11 +391,10 @@ Status DiskIoMgr::init(const int64_t mem_limit) {
     return Status::OK();
 }
 
-Status DiskIoMgr::register_context(RequestContext** request_context,
-                                   std::shared_ptr mem_tracker) {
+Status DiskIoMgr::register_context(RequestContext** request_context) {
     DCHECK(_request_context_cache) << "Must call init() first.";
     *request_context = _request_context_cache->get_new_context();
-    (*request_context)->reset(std::move(mem_tracker));
+    (*request_context)->reset();
     return Status::OK();
 }
 
@@ -460,7 +432,6 @@ void DiskIoMgr::unregister_context(RequestContext* reader) {
 // is on.
 // If wait_for_disks_completion is true, wait for the number of active disks to become 0.
 void DiskIoMgr::cancel_context(RequestContext* context, bool wait_for_disks_completion) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
     context->cancel(Status::Cancelled("Cancelled"));
 
     if (wait_for_disks_completion) {
@@ -541,7 +512,6 @@ Status DiskIoMgr::add_scan_ranges(RequestContext* reader, const vectorlen() > _max_buffer_size) {
@@ -664,7 +632,6 @@ Status DiskIoMgr::read(RequestContext* reader, ScanRange* range, BufferDescripto
 
 void DiskIoMgr::return_buffer(BufferDescriptor* buffer_desc) {
     DCHECK(buffer_desc != nullptr);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
     if (!buffer_desc->_status.ok()) {
         DCHECK(buffer_desc->_buffer == nullptr);
     }
@@ -713,9 +680,6 @@ DiskIoMgr::BufferDescriptor* DiskIoMgr::get_buffer_desc(RequestContext* reader,
         }
     }
     buffer_desc->reset(reader, range, buffer, buffer_size);
-    // The buffer is consumed in the tls mem tracker, and we want to be recorded in the reader->_mem_tracker,
-    // so if the two trackers are different, transfer memory ownership.
-    buffer_desc->update_mem_tracker(reader->_mem_tracker.get());
     return buffer_desc;
 }
 
@@ -735,8 +699,7 @@ char* DiskIoMgr::get_free_buffer(int64_t* buffer_size) {
         buffer = new char[*buffer_size];
     } else {
         // This means the buffer's memory ownership is transferred from DiskIoMgr to tls tracker.
-        _mem_tracker->transfer_to(tls_ctx()->_thread_mem_tracker_mgr->mem_tracker().get(),
-                                  *buffer_size);
+        THREAD_MEM_TRACKER_TRANSFER_FROM(*buffer_size, _mem_tracker.get());
         buffer = _free_buffers[idx].front();
         _free_buffers[idx].pop_front();
     }
@@ -764,16 +727,14 @@ void DiskIoMgr::gc_io_buffers(int64_t bytes_to_free) {
     // The deleted buffer is released in the tls mem tracker, the deleted buffer belongs to DiskIoMgr,
     // so the freed memory should be recorded in the DiskIoMgr mem tracker. So if the tls mem tracker
     // and the DiskIoMgr tracker are different, transfer memory ownership.
-    _mem_tracker->transfer_to(tls_ctx()->_thread_mem_tracker_mgr->mem_tracker().get(), bytes_freed);
+    THREAD_MEM_TRACKER_TRANSFER_FROM(bytes_freed, _mem_tracker.get());
 }
 
 void DiskIoMgr::return_free_buffer(BufferDescriptor* desc) {
-    return_free_buffer(desc->_buffer, desc->_buffer_len, desc->buffer_mem_tracker());
-    // The buffer in the delete above has been released in the desc mem tracker, reset it to nullptr here.
-    desc->set_mem_tracker(nullptr);
+    return_free_buffer(desc->_buffer, desc->_buffer_len);
 }
 
-void DiskIoMgr::return_free_buffer(char* buffer, int64_t buffer_size, MemTracker* tracker) {
+void DiskIoMgr::return_free_buffer(char* buffer, int64_t buffer_size) {
     DCHECK(buffer != nullptr);
     int idx = free_buffers_idx(buffer_size);
     DCHECK_EQ(bit_ceil(buffer_size, _min_buffer_size) & ~(1 << idx), 0)
@@ -782,15 +743,11 @@ void DiskIoMgr::return_free_buffer(char* buffer, int64_t buffer_size, MemTracker
     unique_lock lock(_free_buffers_lock);
     if (!config::disable_mem_pools && _free_buffers[idx].size() < config::max_free_io_buffers) {
         // The buffer's memory ownership is transferred from desc->buffer_mem_tracker to DiskIoMgr tracker.
-        tracker->transfer_to(_mem_tracker.get(), buffer_size);
+        THREAD_MEM_TRACKER_TRANSFER_TO(buffer_size, _mem_tracker.get());
         _free_buffers[idx].push_back(buffer);
     } else {
         --_num_allocated_buffers;
         delete[] buffer;
-        // The deleted buffer is released in the tls mem tracker. When the buffer was allocated,
-        // it was consumed in BufferDescriptor->buffer_mem_tracker, so if the tls mem tracker and
-        // the tracker in the parameters are different, transfer memory ownership.
-        tracker->transfer_to(tls_ctx()->_thread_mem_tracker_mgr->mem_tracker().get(), buffer_size);
     }
 }
 
@@ -842,15 +799,6 @@ bool DiskIoMgr::get_next_request_range(DiskQueue* disk_queue, RequestRange** ran
         // same reader here (the reader is removed from the queue).  There can be
         // other disk threads operating on this reader in other functions though.
 
-        // We just picked a reader, check the mem limits.
-        // TODO: we can do a lot better here.  The reader can likely make progress
-        // with fewer io buffers.
-        if ((*request_context)->_mem_tracker != nullptr
-                    ? (*request_context)->_mem_tracker->any_limit_exceeded()
-                    : false) {
-            (*request_context)->cancel(Status::MemoryLimitExceeded("Memory limit exceeded"));
-        }
-
         unique_lock request_lock((*request_context)->_lock);
         VLOG_FILE << "Disk (id=" << disk_id << ") reading for "
                   << (*request_context)->debug_string();
@@ -999,7 +947,7 @@ void DiskIoMgr::handle_read_finished(DiskQueue* disk_queue, RequestContext* read
     state.decrement_request_thread();
 }
 
-void DiskIoMgr::work_loop(DiskQueue* disk_queue, const std::shared_ptr& mem_tracker) {
+void DiskIoMgr::work_loop(DiskQueue* disk_queue, MemTrackerLimiter* mem_tracker) {
     // The thread waits until there is work or the entire system is being shut down.
     // If there is work, performs the read or write requested and re-enqueues the
     // requesting context.
@@ -1011,7 +959,9 @@ void DiskIoMgr::work_loop(DiskQueue* disk_queue, const std::shared_ptr_len - range->_bytes_read;
     DCHECK_GT(bytes_remaining, 0);
     int64_t buffer_size = std::min(bytes_remaining, static_cast(_max_buffer_size));
-    bool enough_memory = true;
-    if (reader->_mem_tracker != nullptr) {
-        enough_memory = reader->_mem_tracker->spare_capacity() > LOW_MEMORY;
-        if (!enough_memory) {
-            // Low memory, GC and try again.
-            gc_io_buffers();
-            enough_memory = reader->_mem_tracker->spare_capacity() > LOW_MEMORY;
-        }
+    bool enough_memory = _mem_tracker->spare_capacity() > LOW_MEMORY;
+    if (!enough_memory) {
+        // Low memory, GC and try again.
+        gc_io_buffers();
+        enough_memory = _mem_tracker->spare_capacity() > LOW_MEMORY;
     }
 
     if (!enough_memory) {
@@ -1171,7 +1118,6 @@ int DiskIoMgr::free_buffers_idx(int64_t buffer_size) {
 Status DiskIoMgr::add_write_range(RequestContext* writer, WriteRange* write_range) {
     DCHECK_LE(write_range->len(), _max_buffer_size);
     unique_lock writer_lock(writer->_lock);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
 
     if (writer->_state == RequestContext::Cancelled) {
         DCHECK(!writer->_status.ok());
diff --git a/be/src/runtime/disk_io_mgr.h b/be/src/runtime/disk_io_mgr.h
index 1b47d6f139..3279ac2559 100644
--- a/be/src/runtime/disk_io_mgr.h
+++ b/be/src/runtime/disk_io_mgr.h
@@ -30,7 +30,7 @@
 #include "common/config.h"
 #include "common/object_pool.h"
 #include "common/status.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker_limiter.h"
 #include "util/error_util.h"
 #include "util/internal_queue.h"
 #include "util/metrics.h"
@@ -238,19 +238,10 @@ public:
         int64_t buffer_len() { return _buffer_len; }
         int64_t len() { return _len; }
         bool eosr() { return _eosr; }
-        MemTracker* buffer_mem_tracker() { return _buffer_mem_tracker; }
 
         // Returns the offset within the scan range that this buffer starts at
         int64_t scan_range_offset() const { return _scan_range_offset; }
 
-        // Updates this buffer to be owned by the new tracker.
-        // Transfer memory ownership between two trackers.
-        void update_mem_tracker(MemTracker* tracker);
-
-        // To set a tracker, make sure that in an external location,
-        // the desc buffer's memory must have transferred ownership,
-        void set_mem_tracker(MemTracker* tracker);
-
         // Returns the buffer to the IoMgr. This must be called for every buffer
         // returned by get_next()/read() that did not return an error. This is non-blocking.
         // After calling this, the buffer descriptor is invalid and cannot be accessed.
@@ -268,9 +259,6 @@ public:
         // Reader that this buffer is for
         RequestContext* _reader;
 
-        // The current tracker this buffer is associated with.
-        MemTracker* _buffer_mem_tracker;
-
         // Scan range that this buffer is for.
         ScanRange* _scan_range;
 
@@ -559,9 +547,7 @@ public:
     //    used for this reader will be tracked by this. If the limit is exceeded
     //    the reader will be cancelled and MEM_LIMIT_EXCEEDED will be returned via
     //    get_next().
-    Status register_context(
-            RequestContext** request_context,
-            std::shared_ptr reader_mem_tracker = std::shared_ptr());
+    Status register_context(RequestContext** request_context);
 
     // Unregisters context from the disk IoMgr. This must be called for every
     // register_context() regardless of cancellation and must be called in the
@@ -634,6 +620,7 @@ public:
     int64_t bytes_read_dn_cache(RequestContext* reader) const;
     int num_remote_ranges(RequestContext* reader) const;
     int64_t unexpected_remote_bytes(RequestContext* reader) const;
+    MemTrackerLimiter* mem_tracker() const { return _mem_tracker.get(); }
 
     // Returns the read throughput across all readers.
     // TODO: should this be a sliding window?  This should report metrics for the
@@ -698,7 +685,7 @@ private:
     // Pool to allocate BufferDescriptors.
     ObjectPool _pool;
 
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
 
     // Number of worker(read) threads per disk. Also the max depth of queued
     // work to the disk.
@@ -800,16 +787,15 @@ private:
     // Returns a buffer to the free list. buffer_size / _min_buffer_size should be a power
     // of 2, and buffer_size should be <= _max_buffer_size. These constraints will be met
     // if buffer was acquired via get_free_buffer() (which it should have been).
-    void return_free_buffer(char* buffer, int64_t buffer_size, MemTracker* tracker);
+    void return_free_buffer(char* buffer, int64_t buffer_size);
 
-    // Returns the buffer in desc (cannot be nullptr), sets buffer to nullptr and clears the
-    // mem tracker.
+    // Returns the buffer in desc (cannot be nullptr), sets buffer to nullptr
     void return_free_buffer(BufferDescriptor* desc);
 
     // Disk worker thread loop. This function retrieves the next range to process on
     // the disk queue and invokes read_range() or Write() depending on the type of Range().
     // There can be multiple threads per disk running this loop.
-    void work_loop(DiskQueue* queue, const std::shared_ptr& mem_tracker);
+    void work_loop(DiskQueue* queue, MemTrackerLimiter* mem_tracker);
 
     // This is called from the disk thread to get the next range to process. It will
     // wait until a scan range and buffer are available, or a write range is available.
diff --git a/be/src/runtime/disk_io_mgr_internal.h b/be/src/runtime/disk_io_mgr_internal.h
index ef6f6868e0..46b74aa946 100644
--- a/be/src/runtime/disk_io_mgr_internal.h
+++ b/be/src/runtime/disk_io_mgr_internal.h
@@ -139,7 +139,7 @@ public:
     RequestContext(DiskIoMgr* parent, int num_disks);
 
     // Resets this object.
-    void reset(std::shared_ptr tracker);
+    void reset();
 
     // Decrements the number of active disks for this reader.  If the disk count
     // goes to 0, the disk complete condition variable is signaled.
@@ -195,9 +195,6 @@ private:
     // Parent object
     DiskIoMgr* _parent;
 
-    // Memory used for this reader.  This is unowned by this object.
-    std::shared_ptr _mem_tracker;
-
     // Total bytes read for this reader
     RuntimeProfile::Counter* _bytes_read_counter;
 
diff --git a/be/src/runtime/disk_io_mgr_reader_context.cc b/be/src/runtime/disk_io_mgr_reader_context.cc
index b055fed1d8..011b21615f 100644
--- a/be/src/runtime/disk_io_mgr_reader_context.cc
+++ b/be/src/runtime/disk_io_mgr_reader_context.cc
@@ -154,7 +154,7 @@ DiskIoMgr::RequestContext::RequestContext(DiskIoMgr* parent, int num_disks)
           _disk_states(num_disks) {}
 
 // Resets this object.
-void DiskIoMgr::RequestContext::reset(std::shared_ptr tracker) {
+void DiskIoMgr::RequestContext::reset() {
     DCHECK_EQ(_state, Inactive);
     _status = Status::OK();
 
@@ -164,7 +164,6 @@ void DiskIoMgr::RequestContext::reset(std::shared_ptr tracker) {
     _disks_accessed_bitmap = nullptr;
 
     _state = Active;
-    _mem_tracker = std::move(tracker);
 
     _num_unstarted_scan_ranges = 0;
     _num_disks_with_ranges = 0;
diff --git a/be/src/runtime/dpp_sink_internal.cpp b/be/src/runtime/dpp_sink_internal.cpp
index dece00cb9b..2f9ef1e9d1 100644
--- a/be/src/runtime/dpp_sink_internal.cpp
+++ b/be/src/runtime/dpp_sink_internal.cpp
@@ -150,10 +150,9 @@ Status PartitionInfo::from_thrift(ObjectPool* pool, const TRangePartition& t_par
     return Status::OK();
 }
 
-Status PartitionInfo::prepare(RuntimeState* state, const RowDescriptor& row_desc,
-                              const std::shared_ptr& mem_tracker) {
+Status PartitionInfo::prepare(RuntimeState* state, const RowDescriptor& row_desc) {
     if (_distributed_expr_ctxs.size() > 0) {
-        RETURN_IF_ERROR(Expr::prepare(_distributed_expr_ctxs, state, row_desc, mem_tracker));
+        RETURN_IF_ERROR(Expr::prepare(_distributed_expr_ctxs, state, row_desc));
     }
     return Status::OK();
 }
diff --git a/be/src/runtime/dpp_sink_internal.h b/be/src/runtime/dpp_sink_internal.h
index 9ba4d428e3..9e2122c3a5 100644
--- a/be/src/runtime/dpp_sink_internal.h
+++ b/be/src/runtime/dpp_sink_internal.h
@@ -29,7 +29,6 @@
 namespace doris {
 
 class ExprContext;
-class MemTracker;
 class ObjectPool;
 class RuntimeState;
 class RowDescriptor;
@@ -192,8 +191,7 @@ public:
     static Status from_thrift(ObjectPool* pool, const TRangePartition& t_partition,
                               PartitionInfo* partition);
 
-    Status prepare(RuntimeState* state, const RowDescriptor& row_desc,
-                   const std::shared_ptr& mem_tracker);
+    Status prepare(RuntimeState* state, const RowDescriptor& row_desc);
 
     Status open(RuntimeState* state);
 
diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h
index 7a02dae866..2f08c72365 100644
--- a/be/src/runtime/exec_env.h
+++ b/be/src/runtime/exec_env.h
@@ -19,8 +19,6 @@
 
 #include "common/status.h"
 #include "olap/options.h"
-#include "runtime/mem_tracker.h"
-#include "runtime/mem_tracker_task_pool.h"
 #include "util/threadpool.h"
 
 namespace doris {
@@ -44,7 +42,7 @@ class FragmentMgr;
 class ResultCache;
 class LoadPathMgr;
 class LoadStreamMgr;
-class MemTracker;
+class MemTrackerLimiter;
 class StorageEngine;
 class MemTrackerTaskPool;
 class PriorityThreadPool;
@@ -115,11 +113,11 @@ public:
         return nullptr;
     }
 
-    std::shared_ptr query_pool_mem_tracker() { return _query_pool_mem_tracker; }
-    std::shared_ptr load_pool_mem_tracker() { return _load_pool_mem_tracker; }
-    MemTrackerTaskPool* task_pool_mem_tracker_registry() {
-        return _task_pool_mem_tracker_registry.get();
-    }
+    MemTrackerLimiter* process_mem_tracker() { return _process_mem_tracker; }
+    void set_process_mem_tracker(MemTrackerLimiter* tracker) { _process_mem_tracker = tracker; }
+    MemTrackerLimiter* query_pool_mem_tracker() { return _query_pool_mem_tracker; }
+    MemTrackerLimiter* load_pool_mem_tracker() { return _load_pool_mem_tracker; }
+    MemTrackerTaskPool* task_pool_mem_tracker_registry() { return _task_pool_mem_tracker_registry; }
     ThreadResourceMgr* thread_mgr() { return _thread_mgr; }
     PriorityThreadPool* scan_thread_pool() { return _scan_thread_pool; }
     ThreadPool* limited_scan_thread_pool() { return _limited_scan_thread_pool.get(); }
@@ -183,11 +181,14 @@ private:
     ClientCache* _broker_client_cache = nullptr;
     ThreadResourceMgr* _thread_mgr = nullptr;
 
+    // The ancestor for all trackers. Every tracker is visible from the process down.
+    // Not limit total memory by process tracker, and it's just used to track virtual memory of process.
+    MemTrackerLimiter* _process_mem_tracker;
     // The ancestor for all querys tracker.
-    std::shared_ptr _query_pool_mem_tracker = nullptr;
+    MemTrackerLimiter* _query_pool_mem_tracker;
     // The ancestor for all load tracker.
-    std::shared_ptr _load_pool_mem_tracker = nullptr;
-    std::unique_ptr _task_pool_mem_tracker_registry;
+    MemTrackerLimiter* _load_pool_mem_tracker;
+    MemTrackerTaskPool* _task_pool_mem_tracker_registry;
 
     // The following two thread pools are used in different scenarios.
     // _scan_thread_pool is a priority thread pool.
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index e82473b01f..a35ae80ee3 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -39,8 +39,8 @@
 #include "runtime/heartbeat_flags.h"
 #include "runtime/load_channel_mgr.h"
 #include "runtime/load_path_mgr.h"
-#include "runtime/mem_tracker.h"
-#include "runtime/mem_tracker_task_pool.h"
+#include "runtime/memory/mem_tracker.h"
+#include "runtime/memory/mem_tracker_task_pool.h"
 #include "runtime/result_buffer_mgr.h"
 #include "runtime/result_queue_mgr.h"
 #include "runtime/routine_load/routine_load_task_executor.h"
@@ -60,6 +60,11 @@
 #include "util/priority_work_stealing_thread_pool.hpp"
 #include "vec/runtime/vdata_stream_mgr.h"
 
+#if !defined(__SANITIZE_ADDRESS__) && !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && \
+        !defined(THREAD_SANITIZER) && !defined(USE_JEMALLOC)
+#include "runtime/memory/tcmalloc_hook.h"
+#endif
+
 namespace doris {
 
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(scanner_thread_pool_queue_size, MetricUnit::NOUNIT);
@@ -94,7 +99,7 @@ Status ExecEnv::_init(const std::vector& store_paths) {
     _backend_client_cache = new BackendServiceClientCache(config::max_client_cache_size_per_host);
     _frontend_client_cache = new FrontendServiceClientCache(config::max_client_cache_size_per_host);
     _broker_client_cache = new BrokerServiceClientCache(config::max_client_cache_size_per_host);
-    _task_pool_mem_tracker_registry.reset(new MemTrackerTaskPool());
+    _task_pool_mem_tracker_registry = new MemTrackerTaskPool();
     _thread_mgr = new ThreadResourceMgr();
     if (config::doris_enable_scanner_thread_pool_per_disk &&
         config::doris_scanner_thread_pool_thread_num >= store_paths.size() &&
@@ -156,7 +161,8 @@ Status ExecEnv::_init(const std::vector& store_paths) {
     _small_file_mgr->init();
     _init_mem_tracker();
 
-    RETURN_IF_ERROR(_load_channel_mgr->init(MemTracker::get_process_tracker()->limit()));
+    RETURN_IF_ERROR(
+            _load_channel_mgr->init(ExecEnv::GetInstance()->process_mem_tracker()->limit()));
     _heartbeat_flags = new HeartbeatFlags();
     _register_metrics();
     _is_init = true;
@@ -183,12 +189,19 @@ Status ExecEnv::_init_mem_tracker() {
                      << ". Using physical memory instead";
         global_memory_limit_bytes = MemInfo::physical_mem();
     }
-    _query_pool_mem_tracker = MemTracker::create_tracker(
-            -1, "QueryPool", MemTracker::get_process_tracker(), MemTrackerLevel::OVERVIEW);
+    _process_mem_tracker = new MemTrackerLimiter(global_memory_limit_bytes, "Process");
+    thread_context()->_thread_mem_tracker_mgr->init();
+#if defined(USE_MEM_TRACKER) && !defined(__SANITIZE_ADDRESS__) && !defined(ADDRESS_SANITIZER) && \
+        !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER) && !defined(USE_JEMALLOC)
+    if (doris::config::enable_tcmalloc_hook) {
+        init_hook();
+    }
+#endif
+
+    _query_pool_mem_tracker = new MemTrackerLimiter(-1, "QueryPool", _process_mem_tracker);
     REGISTER_HOOK_METRIC(query_mem_consumption,
                          [this]() { return _query_pool_mem_tracker->consumption(); });
-    _load_pool_mem_tracker = MemTracker::create_tracker(
-            -1, "LoadPool", MemTracker::get_process_tracker(), MemTrackerLevel::OVERVIEW);
+    _load_pool_mem_tracker = new MemTrackerLimiter(-1, "LoadPool", _process_mem_tracker);
     REGISTER_HOOK_METRIC(load_mem_consumption,
                          [this]() { return _load_pool_mem_tracker->consumption(); });
     LOG(INFO) << "Using global memory limit: "
@@ -281,9 +294,6 @@ Status ExecEnv::_init_mem_tracker() {
     LOG(INFO) << "Chunk allocator memory limit: "
               << PrettyPrinter::print(chunk_reserved_bytes_limit, TUnit::BYTES)
               << ", origin config value: " << config::chunk_reserved_bytes_limit;
-
-    // TODO(zc): The current memory usage configuration is a bit confusing,
-    // we need to sort out the use of memory
     return Status::OK();
 }
 
@@ -347,6 +357,10 @@ void ExecEnv::_destroy() {
     SAFE_DELETE(_routine_load_task_executor);
     SAFE_DELETE(_external_scan_context_mgr);
     SAFE_DELETE(_heartbeat_flags);
+    SAFE_DELETE(_process_mem_tracker);
+    SAFE_DELETE(_query_pool_mem_tracker);
+    SAFE_DELETE(_load_pool_mem_tracker);
+    SAFE_DELETE(_task_pool_mem_tracker_registry);
 
     DEREGISTER_HOOK_METRIC(query_mem_consumption);
     DEREGISTER_HOOK_METRIC(load_mem_consumption);
diff --git a/be/src/runtime/export_sink.cpp b/be/src/runtime/export_sink.cpp
index 9f72c365de..3389027994 100644
--- a/be/src/runtime/export_sink.cpp
+++ b/be/src/runtime/export_sink.cpp
@@ -71,7 +71,7 @@ Status ExportSink::prepare(RuntimeState* state) {
     SCOPED_TIMER(_profile->total_time_counter());
 
     // Prepare the exprs to run.
-    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _expr_mem_tracker));
+    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc));
 
     // TODO(lingbin): add some Counter
     _bytes_written_counter = ADD_COUNTER(profile(), "BytesExported", TUnit::BYTES);
diff --git a/be/src/runtime/fold_constant_executor.cpp b/be/src/runtime/fold_constant_executor.cpp
index b9dffe0acb..2ee1b7cde9 100644
--- a/be/src/runtime/fold_constant_executor.cpp
+++ b/be/src/runtime/fold_constant_executor.cpp
@@ -27,7 +27,7 @@
 #include "gen_cpp/internal_service.pb.h"
 #include "runtime/exec_env.h"
 #include "runtime/large_int_value.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/runtime_state.h"
 #include "runtime/thread_context.h"
 #include "runtime/tuple_row.h"
@@ -51,7 +51,7 @@ Status FoldConstantExecutor::fold_constant_expr(const TFoldConstantParams& param
     // init
     RETURN_IF_ERROR(_init(query_globals));
     // only after init operation, _mem_tracker is ready
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
 
     for (const auto& m : expr_map) {
         PExprResultMap pexpr_result_map;
@@ -103,7 +103,7 @@ Status FoldConstantExecutor::fold_constant_vexpr(const TFoldConstantParams& para
     // init
     RETURN_IF_ERROR(_init(query_globals));
     // only after init operation, _mem_tracker is ready
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
 
     for (const auto& m : expr_map) {
         PExprResultMap pexpr_result_map;
@@ -179,8 +179,8 @@ Status FoldConstantExecutor::_init(const TQueryGlobals& query_globals) {
 
     _runtime_profile = _runtime_state->runtime_profile();
     _runtime_profile->set_name("FoldConstantExpr");
-    _mem_tracker = MemTracker::create_tracker(-1, "FoldConstantExpr",
-                                              _runtime_state->instance_mem_tracker());
+    SCOPED_ATTACH_TASK(_runtime_state.get());
+    _mem_tracker = std::make_unique("FoldConstantExpr");
     _mem_pool.reset(new MemPool(_mem_tracker.get()));
 
     return Status::OK();
@@ -188,7 +188,7 @@ Status FoldConstantExecutor::_init(const TQueryGlobals& query_globals) {
 
 template 
 Status FoldConstantExecutor::_prepare_and_open(Context* ctx) {
-    RETURN_IF_ERROR(ctx->prepare(_runtime_state.get(), RowDescriptor(), _mem_tracker));
+    RETURN_IF_ERROR(ctx->prepare(_runtime_state.get(), RowDescriptor()));
     return ctx->open(_runtime_state.get());
 }
 
diff --git a/be/src/runtime/fold_constant_executor.h b/be/src/runtime/fold_constant_executor.h
index 4e1e629064..b9e5e501ea 100644
--- a/be/src/runtime/fold_constant_executor.h
+++ b/be/src/runtime/fold_constant_executor.h
@@ -53,7 +53,7 @@ private:
     std::string _get_result(void* src, size_t size, PrimitiveType slot_type);
 
     std::unique_ptr _runtime_state;
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
     RuntimeProfile* _runtime_profile = nullptr;
     std::unique_ptr _mem_pool;
     ObjectPool _pool;
diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp
index 9e1918d5f1..c5fcfa97c4 100644
--- a/be/src/runtime/fragment_mgr.cpp
+++ b/be/src/runtime/fragment_mgr.cpp
@@ -255,8 +255,7 @@ Status FragmentExecState::execute() {
 Status FragmentExecState::cancel_before_execute() {
     // set status as 'abort', cuz cancel() won't effect the status arg of DataSink::close().
 #ifndef BE_TEST
-    SCOPED_ATTACH_TASK_THREAD(executor()->runtime_state()->query_type(),
-                              executor()->runtime_state()->instance_mem_tracker());
+    SCOPED_ATTACH_TASK(executor()->runtime_state());
 #endif
     _executor.set_abort();
     _executor.cancel();
@@ -495,8 +494,7 @@ void FragmentMgr::_exec_actual(std::shared_ptr exec_state, Fi
             .instance_id(exec_state->fragment_instance_id())
             .tag("pthread_id", std::to_string((uintptr_t)pthread_self()));
 #ifndef BE_TEST
-    SCOPED_ATTACH_TASK_THREAD(exec_state->executor()->runtime_state(),
-                              exec_state->executor()->runtime_state()->instance_mem_tracker());
+    SCOPED_ATTACH_TASK(exec_state->executor()->runtime_state());
 #endif
     exec_state->execute();
 
diff --git a/be/src/runtime/initial_reservations.cc b/be/src/runtime/initial_reservations.cc
index f4e223fe71..73757eaac4 100644
--- a/be/src/runtime/initial_reservations.cc
+++ b/be/src/runtime/initial_reservations.cc
@@ -26,7 +26,6 @@
 #include "common/logging.h"
 #include "common/object_pool.h"
 #include "runtime/exec_env.h"
-#include "runtime/mem_tracker.h"
 #include "util/pretty_printer.h"
 #include "util/uid_util.h"
 
@@ -36,13 +35,9 @@ namespace doris {
 
 InitialReservations::InitialReservations(ObjectPool* obj_pool,
                                          ReservationTracker* query_reservation,
-                                         std::shared_ptr query_mem_tracker,
                                          int64_t initial_reservation_total_claims)
-        : initial_reservation_mem_tracker_(
-                  MemTracker::create_tracker(-1, "InitialReservations", query_mem_tracker)),
-          remaining_initial_reservation_claims_(initial_reservation_total_claims) {
+        : remaining_initial_reservation_claims_(initial_reservation_total_claims) {
     initial_reservations_.InitChildTracker(nullptr, query_reservation,
-                                           initial_reservation_mem_tracker_.get(),
                                            numeric_limits::max());
 }
 
diff --git a/be/src/runtime/initial_reservations.h b/be/src/runtime/initial_reservations.h
index c1661d5b53..9ffb3ab367 100644
--- a/be/src/runtime/initial_reservations.h
+++ b/be/src/runtime/initial_reservations.h
@@ -44,7 +44,6 @@ public:
     /// claimed over the lifetime of the query. The total bytes claimed via Claim()
     /// cannot exceed this. Allocated objects are stored in 'obj_pool'.
     InitialReservations(ObjectPool* obj_pool, ReservationTracker* query_reservation,
-                        std::shared_ptr query_mem_tracker,
                         int64_t initial_reservation_total_claims);
 
     /// Initialize the query's pool of initial reservations by acquiring the minimum
@@ -72,8 +71,6 @@ private:
     // Return() returns reservations to.
     ReservationTracker initial_reservations_;
 
-    std::shared_ptr const initial_reservation_mem_tracker_;
-
     /// The total bytes of additional reservations that we expect to be claimed.
     /// initial_reservations_->GetReservation() <= remaining_initial_reservation_claims_.
     int64_t remaining_initial_reservation_claims_;
diff --git a/be/src/runtime/load_channel.cpp b/be/src/runtime/load_channel.cpp
index d2223554eb..727206e4f0 100644
--- a/be/src/runtime/load_channel.cpp
+++ b/be/src/runtime/load_channel.cpp
@@ -19,17 +19,17 @@
 
 #include "olap/lru_cache.h"
 #include "runtime/exec_env.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/tablets_channel.h"
 #include "runtime/thread_context.h"
 
 namespace doris {
 
-LoadChannel::LoadChannel(const UniqueId& load_id, std::shared_ptr& mem_tracker,
+LoadChannel::LoadChannel(const UniqueId& load_id, std::unique_ptr mem_tracker,
                          int64_t timeout_s, bool is_high_priority, const std::string& sender_ip,
                          bool is_vec)
         : _load_id(load_id),
-          _mem_tracker(mem_tracker),
+          _mem_tracker(std::move(mem_tracker)),
           _timeout_s(timeout_s),
           _is_high_priority(is_high_priority),
           _sender_ip(sender_ip),
@@ -45,9 +45,12 @@ LoadChannel::~LoadChannel() {
               << ", info=" << _mem_tracker->debug_string() << ", load_id=" << _load_id
               << ", is high priority=" << _is_high_priority << ", sender_ip=" << _sender_ip
               << ", is_vec=" << _is_vec;
+    // Load channel tracker cannot be completely accurate, offsetting the impact on the load channel mgr tracker.
+    _mem_tracker->parent()->consumption_revise(-_mem_tracker->consumption());
 }
 
 Status LoadChannel::open(const PTabletWriterOpenRequest& params) {
+    SCOPED_ATTACH_TASK(_mem_tracker.get(), ThreadContext::TaskType::LOAD);
     int64_t index_id = params.index_id();
     std::shared_ptr channel;
     {
@@ -134,6 +137,7 @@ bool LoadChannel::is_finished() {
 
 Status LoadChannel::cancel() {
     std::lock_guard l(_lock);
+    SCOPED_ATTACH_TASK(_mem_tracker.get(), ThreadContext::TaskType::LOAD);
     for (auto& it : _tablets_channels) {
         it.second->cancel();
     }
diff --git a/be/src/runtime/load_channel.h b/be/src/runtime/load_channel.h
index 91005a6b89..299272ae96 100644
--- a/be/src/runtime/load_channel.h
+++ b/be/src/runtime/load_channel.h
@@ -26,7 +26,7 @@
 #include "gen_cpp/PaloInternalService_types.h"
 #include "gen_cpp/Types_types.h"
 #include "gen_cpp/internal_service.pb.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/tablets_channel.h"
 #include "runtime/thread_context.h"
 #include "util/uid_util.h"
@@ -39,7 +39,7 @@ class Cache;
 // corresponding to a certain load job
 class LoadChannel {
 public:
-    LoadChannel(const UniqueId& load_id, std::shared_ptr& mem_tracker,
+    LoadChannel(const UniqueId& load_id, std::unique_ptr mem_tracker,
                 int64_t timeout_s, bool is_high_priority, const std::string& sender_ip,
                 bool is_vec);
     ~LoadChannel();
@@ -99,7 +99,7 @@ private:
 
     UniqueId _load_id;
     // Tracks the total memory consumed by current load job on this BE
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
 
     // lock protect the tablets channel map
     std::mutex _lock;
@@ -129,6 +129,7 @@ private:
 template 
 Status LoadChannel::add_batch(const TabletWriterAddRequest& request,
                               TabletWriterAddResult* response) {
+    SCOPED_ATTACH_TASK(_mem_tracker.get(), ThreadContext::TaskType::LOAD);
     int64_t index_id = request.index_id();
     // 1. get tablets channel
     std::shared_ptr channel;
diff --git a/be/src/runtime/load_channel_mgr.cpp b/be/src/runtime/load_channel_mgr.cpp
index fa30fca822..7429c09c6e 100644
--- a/be/src/runtime/load_channel_mgr.cpp
+++ b/be/src/runtime/load_channel_mgr.cpp
@@ -19,7 +19,7 @@
 
 #include "gutil/strings/substitute.h"
 #include "runtime/load_channel.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/thread_context.h"
 #include "service/backend_options.h"
 #include "util/doris_metrics.h"
@@ -84,9 +84,7 @@ LoadChannelMgr::~LoadChannelMgr() {
 
 Status LoadChannelMgr::init(int64_t process_mem_limit) {
     int64_t load_mgr_mem_limit = calc_process_max_load_memory(process_mem_limit);
-    _mem_tracker = MemTracker::create_virtual_tracker(load_mgr_mem_limit, "LoadChannelMgr",
-                                                      MemTracker::get_process_tracker(),
-                                                      MemTrackerLevel::OVERVIEW);
+    _mem_tracker = std::make_unique(load_mgr_mem_limit, "LoadChannelMgr");
     REGISTER_HOOK_METRIC(load_channel_mem_consumption,
                          [this]() { return _mem_tracker->consumption(); });
     _last_success_channel = new_lru_cache("LastestSuccessChannelCache", 1024);
@@ -112,22 +110,13 @@ Status LoadChannelMgr::open(const PTabletWriterOpenRequest& params) {
             int64_t load_mem_limit = params.has_load_mem_limit() ? params.load_mem_limit() : -1;
             int64_t channel_mem_limit =
                     calc_channel_max_load_memory(load_mem_limit, _mem_tracker->limit());
-            auto channel_mem_tracker =
-                    MemTracker::create_tracker(channel_mem_limit,
-                                               fmt::format("LoadChannel#senderIp={}#loadID={}",
-                                                           params.sender_ip(), load_id.to_string()),
-                                               _mem_tracker);
-            // TODO
-            // auto channel_mem_tracker_job = std::make_shared(
-            //         -1,
-            //         fmt::format("LoadChannel#senderIp={}#loadID={}", params.sender_ip(),
-            //                     load_id.to_string()),
-            //         ExecEnv::GetInstance()
-            //                 ->task_pool_mem_tracker_registry()
-            //                 ->register_load_mem_tracker(load_id.to_string(), load_mem_limit),
-            //         MemTrackerLevel::TASK);
-            channel.reset(new LoadChannel(load_id, channel_mem_tracker, channel_timeout_s,
-                                          is_high_priority, params.sender_ip(),
+            auto channel_mem_tracker = std::make_unique(
+                    channel_mem_limit,
+                    fmt::format("LoadChannel#senderIp={}#loadID={}", params.sender_ip(),
+                                load_id.to_string()),
+                    _mem_tracker.get());
+            channel.reset(new LoadChannel(load_id, std::move(channel_mem_tracker),
+                                          channel_timeout_s, is_high_priority, params.sender_ip(),
                                           params.is_vectorized()));
             _load_channels.insert({load_id, channel});
         }
diff --git a/be/src/runtime/load_channel_mgr.h b/be/src/runtime/load_channel_mgr.h
index 77a046e8a8..0e46d8bf52 100644
--- a/be/src/runtime/load_channel_mgr.h
+++ b/be/src/runtime/load_channel_mgr.h
@@ -58,8 +58,6 @@ public:
     // cancel all tablet stream for 'load_id' load
     Status cancel(const PTabletWriterCancelRequest& request);
 
-    std::shared_ptr mem_tracker() { return _mem_tracker; }
-
 private:
     template 
     Status _get_load_channel(std::shared_ptr& channel, bool& is_eof,
@@ -80,8 +78,7 @@ protected:
     Cache* _last_success_channel = nullptr;
 
     // check the total load channel mem consumption of this Backend
-    // TODO no used, refactor soon
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
 
     CountDownLatch _stop_background_threads_latch;
     // thread to clean timeout load channels
diff --git a/be/src/runtime/mem_pool.cpp b/be/src/runtime/mem_pool.cpp
index ae70dc4df6..8a01c8afdf 100644
--- a/be/src/runtime/mem_pool.cpp
+++ b/be/src/runtime/mem_pool.cpp
@@ -25,8 +25,8 @@
 #include 
 #include 
 
-#include "runtime/mem_tracker.h"
 #include "runtime/memory/chunk_allocator.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/thread_context.h"
 #include "util/bit_util.h"
 #include "util/doris_metrics.h"
@@ -49,23 +49,13 @@ MemPool::MemPool(MemTracker* mem_tracker)
           peak_allocated_bytes_(0),
           _mem_tracker(mem_tracker) {}
 
-MemPool::MemPool(const std::string& label)
-        : current_chunk_idx_(-1),
-          next_chunk_size_(INITIAL_CHUNK_SIZE),
-          total_allocated_bytes_(0),
-          total_reserved_bytes_(0),
-          peak_allocated_bytes_(0) {
-    _mem_tracker_own = MemTracker::create_tracker(-1, label + ":MemPool");
-    _mem_tracker = _mem_tracker_own.get();
-}
-
 MemPool::MemPool()
         : current_chunk_idx_(-1),
           next_chunk_size_(INITIAL_CHUNK_SIZE),
           total_allocated_bytes_(0),
           total_reserved_bytes_(0),
           peak_allocated_bytes_(0),
-          _mem_tracker(tls_ctx()->_thread_mem_tracker_mgr->mem_tracker().get()) {}
+          _mem_tracker(nullptr) {}
 
 MemPool::ChunkInfo::ChunkInfo(const Chunk& chunk_) : chunk(chunk_), allocated_bytes(0) {
     DorisMetrics::instance()->memory_pool_bytes_total->increment(chunk.size);
@@ -75,8 +65,9 @@ MemPool::~MemPool() {
     int64_t total_bytes_released = 0;
     for (auto& chunk : chunks_) {
         total_bytes_released += chunk.chunk.size;
-        ChunkAllocator::instance()->free(chunk.chunk, _mem_tracker);
+        ChunkAllocator::instance()->free(chunk.chunk);
     }
+    if (_mem_tracker) _mem_tracker->release(total_bytes_released);
     DorisMetrics::instance()->memory_pool_bytes_total->increment(-total_bytes_released);
 }
 
@@ -94,8 +85,9 @@ void MemPool::free_all() {
     int64_t total_bytes_released = 0;
     for (auto& chunk : chunks_) {
         total_bytes_released += chunk.chunk.size;
-        ChunkAllocator::instance()->free(chunk.chunk, _mem_tracker);
+        ChunkAllocator::instance()->free(chunk.chunk);
     }
+    if (_mem_tracker) _mem_tracker->release(total_bytes_released);
     chunks_.clear();
     next_chunk_size_ = INITIAL_CHUNK_SIZE;
     current_chunk_idx_ = -1;
@@ -143,11 +135,17 @@ Status MemPool::find_chunk(size_t min_size, bool check_limits) {
     }
 
     chunk_size = BitUtil::RoundUpToPowerOfTwo(chunk_size);
+    if (check_limits &&
+        !thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit(
+                chunk_size)) {
+        return Status::MemoryAllocFailed("MemPool find new chunk {} bytes faild, exceed limit",
+                                         chunk_size);
+    }
 
     // Allocate a new chunk. Return early if allocate fails.
     Chunk chunk;
-    RETURN_IF_ERROR(
-            ChunkAllocator::instance()->allocate(chunk_size, &chunk, _mem_tracker, check_limits));
+    RETURN_IF_ERROR(ChunkAllocator::instance()->allocate(chunk_size, &chunk));
+    if (_mem_tracker) _mem_tracker->consume(chunk_size);
     ASAN_POISON_MEMORY_REGION(chunk.data, chunk_size);
     // Put it before the first free chunk. If no free chunks, it goes at the end.
     if (first_free_idx == static_cast(chunks_.size())) {
@@ -192,7 +190,12 @@ void MemPool::acquire_data(MemPool* src, bool keep_current) {
 
     // Skip unnecessary atomic ops if the mem_trackers are the same.
     if (src->_mem_tracker != _mem_tracker) {
-        src->_mem_tracker->transfer_to(_mem_tracker, total_transferred_bytes);
+        if (src->_mem_tracker) {
+            src->_mem_tracker->release(total_transferred_bytes);
+        }
+        if (_mem_tracker) {
+            _mem_tracker->consume(total_transferred_bytes);
+        }
     }
 
     // insert new chunks after current_chunk_idx_
@@ -222,7 +225,12 @@ void MemPool::acquire_data(MemPool* src, bool keep_current) {
 void MemPool::exchange_data(MemPool* other) {
     int64_t delta_size = other->total_reserved_bytes_ - total_reserved_bytes_;
     if (other->_mem_tracker != _mem_tracker) {
-        other->_mem_tracker->transfer_to(_mem_tracker, delta_size);
+        if (other->_mem_tracker) {
+            other->_mem_tracker->release(delta_size);
+        }
+        if (_mem_tracker) {
+            _mem_tracker->consume(delta_size);
+        }
     }
 
     std::swap(current_chunk_idx_, other->current_chunk_idx_);
diff --git a/be/src/runtime/mem_pool.h b/be/src/runtime/mem_pool.h
index 0523fd736e..8fccacc987 100644
--- a/be/src/runtime/mem_pool.h
+++ b/be/src/runtime/mem_pool.h
@@ -93,7 +93,6 @@ class MemPool {
 public:
     // 'tracker' tracks the amount of memory allocated by this pool. Must not be nullptr.
     MemPool(MemTracker* mem_tracker);
-    MemPool(const std::string& label);
     MemPool();
 
     /// Frees all chunks of memory and subtracts the total allocated bytes
@@ -318,8 +317,6 @@ private:
     /// The current and peak memory footprint of this pool. This is different from
     /// total allocated_bytes_ since it includes bytes in chunks that are not used.
     MemTracker* _mem_tracker;
-    // TODO(zxy) temp variable, In the future, mem trackers should all use raw pointers.
-    std::shared_ptr _mem_tracker_own;
 };
 
 // Stamp out templated implementations here so they're included in IR module
diff --git a/be/src/runtime/mem_tracker.cpp b/be/src/runtime/mem_tracker.cpp
deleted file mode 100644
index ffa7ae188b..0000000000
--- a/be/src/runtime/mem_tracker.cpp
+++ /dev/null
@@ -1,354 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-// This file is copied from
-// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/mem-tracker.cpp
-// and modified by Doris
-
-#include "runtime/mem_tracker.h"
-
-#include 
-
-#include 
-
-#include "exec/exec_node.h"
-#include "gutil/once.h"
-#include "runtime/exec_env.h"
-#include "runtime/runtime_state.h"
-#include "service/backend_options.h"
-#include "util/pretty_printer.h"
-#include "util/string_util.h"
-#include "util/uid_util.h"
-
-namespace doris {
-
-const std::string MemTracker::COUNTER_NAME = "PeakMemoryUsage";
-
-// The ancestor for all trackers. Every tracker is visible from the process down.
-// All manually created trackers should specify the process tracker as the parent.
-// Not limit total memory by process tracker, and it's just used to track virtual memory of process.
-static std::shared_ptr process_tracker;
-static MemTracker* raw_process_tracker;
-static GoogleOnceType process_tracker_once = GOOGLE_ONCE_INIT;
-
-void MemTracker::create_process_tracker() {
-    process_tracker.reset(
-            new MemTracker(-1, "Process", nullptr, MemTrackerLevel::OVERVIEW, nullptr));
-    process_tracker->init();
-    raw_process_tracker = process_tracker.get();
-}
-
-std::shared_ptr MemTracker::get_process_tracker() {
-    GoogleOnceInit(&process_tracker_once, &MemTracker::create_process_tracker);
-    return process_tracker;
-}
-
-MemTracker* MemTracker::get_raw_process_tracker() {
-    GoogleOnceInit(&process_tracker_once, &MemTracker::create_process_tracker);
-    return raw_process_tracker;
-}
-
-static TrackersMap _temporary_mem_trackers;
-
-std::shared_ptr MemTracker::get_temporary_mem_tracker(const std::string& label) {
-    // First time this label registered, make a new object, otherwise do nothing.
-    // Avoid using locks to resolve erase conflicts.
-    _temporary_mem_trackers.try_emplace_l(
-            label, [](std::shared_ptr) {},
-            MemTracker::create_tracker(-1, fmt::format("[Temporary]-{}", label), nullptr,
-                                       MemTrackerLevel::OVERVIEW));
-    return _temporary_mem_trackers[label];
-}
-
-void MemTracker::list_process_trackers(std::vector>* trackers) {
-    trackers->clear();
-    std::deque> to_process;
-    to_process.push_front(get_process_tracker());
-    while (!to_process.empty()) {
-        std::shared_ptr t = to_process.back();
-        to_process.pop_back();
-
-        trackers->push_back(t);
-        std::list> children;
-        {
-            lock_guard l(t->_child_trackers_lock);
-            children = t->_child_trackers;
-        }
-        for (const auto& child_weak : children) {
-            std::shared_ptr child = child_weak.lock();
-            if (child && static_cast(child->_level) <=
-                                 config::mem_tracker_level) {
-                to_process.emplace_back(std::move(child));
-            }
-        }
-    }
-}
-
-std::shared_ptr MemTracker::create_tracker(int64_t byte_limit, const std::string& label,
-                                                       const std::shared_ptr& parent,
-                                                       MemTrackerLevel level,
-                                                       RuntimeProfile* profile) {
-    std::shared_ptr tracker =
-            MemTracker::create_tracker_impl(byte_limit, label, parent, level, profile);
-    tracker->init();
-    return tracker;
-}
-
-std::shared_ptr MemTracker::create_virtual_tracker(
-        int64_t byte_limit, const std::string& label, const std::shared_ptr& parent,
-        MemTrackerLevel level) {
-    std::shared_ptr tracker = MemTracker::create_tracker_impl(
-            byte_limit, "[Virtual]-" + label, parent, level, nullptr);
-    tracker->init_virtual();
-    return tracker;
-}
-
-std::shared_ptr MemTracker::create_tracker_impl(
-        int64_t byte_limit, const std::string& label, const std::shared_ptr& parent,
-        MemTrackerLevel level, RuntimeProfile* profile) {
-    std::shared_ptr reset_parent =
-            parent ? parent : tls_ctx()->_thread_mem_tracker_mgr->mem_tracker();
-    DCHECK(reset_parent);
-    std::string reset_label;
-    MemTracker* task_parent_tracker = reset_parent->parent_task_mem_tracker();
-    if (task_parent_tracker) {
-        reset_label = fmt::format("{}#{}", label, split(task_parent_tracker->label(), "#")[1]);
-    } else {
-        reset_label = label;
-    }
-    if (byte_limit == -1) byte_limit = reset_parent->limit();
-
-    std::shared_ptr tracker(
-            new MemTracker(byte_limit, reset_label, reset_parent,
-                           level > reset_parent->_level ? level : reset_parent->_level, profile));
-    // Do not check limit exceed when add_child_tracker, otherwise it will cause deadlock when log_usage is called.
-    STOP_CHECK_LIMIT_THREAD_LOCAL_MEM_TRACKER();
-    reset_parent->add_child_tracker(tracker);
-    return tracker;
-}
-
-MemTracker::MemTracker(int64_t byte_limit, const std::string& label)
-        : MemTracker(byte_limit, label, std::shared_ptr(), MemTrackerLevel::VERBOSE,
-                     nullptr) {}
-
-MemTracker::MemTracker(int64_t byte_limit, const std::string& label,
-                       const std::shared_ptr& parent, MemTrackerLevel level,
-                       RuntimeProfile* profile)
-        : _limit(byte_limit),
-          _label(label),
-          // Not 100% sure the id is unique. This is generated because it is faster than converting to int after hash.
-          _id((GetCurrentTimeMicros() % 1000000) * 100 + _label.length()),
-          _parent(parent),
-          _level(level) {
-    if (profile == nullptr) {
-        _consumption = std::make_shared(TUnit::BYTES);
-    } else {
-        _consumption = profile->AddSharedHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES);
-    }
-}
-
-void MemTracker::init() {
-    DCHECK_GE(_limit, -1);
-    MemTracker* tracker = this;
-    while (tracker != nullptr) {
-        _all_trackers.push_back(tracker);
-        if (tracker->has_limit()) _limit_trackers.push_back(tracker);
-        // This means that it terminates when recursively consume/release from the current tracker up to the virtual tracker.
-        if (tracker->_virtual == true) {
-            break;
-        }
-        tracker = tracker->_parent.get();
-    }
-    DCHECK_GT(_all_trackers.size(), 0);
-    DCHECK_EQ(_all_trackers[0], this);
-}
-
-void MemTracker::init_virtual() {
-    DCHECK_GE(_limit, -1);
-    _all_trackers.push_back(this);
-    if (this->has_limit()) _limit_trackers.push_back(this);
-    _virtual = true;
-}
-
-MemTracker::~MemTracker() {
-    consume(_untracked_mem.exchange(0)); // before memory_leak_check
-    // TCMalloc hook will be triggered during destructor memtracker, may cause crash.
-    if (_label == "Process") doris::thread_local_ctx._init = false;
-    if (!_virtual && config::memory_leak_detection) MemTracker::memory_leak_check(this);
-    if (!_virtual && parent()) {
-        // Do not call release on the parent tracker to avoid repeated releases.
-        // Ensure that all consume/release are triggered by TCMalloc new/delete hook.
-        lock_guard l(_parent->_child_trackers_lock);
-        if (_child_tracker_it != _parent->_child_trackers.end()) {
-            _parent->_child_trackers.erase(_child_tracker_it);
-            _child_tracker_it = _parent->_child_trackers.end();
-        }
-    }
-    DCHECK_EQ(_untracked_mem, 0);
-}
-
-void MemTracker::transfer_to_relative(MemTracker* dst, int64_t bytes) {
-    if (id() == dst->id()) return;
-    DCHECK_EQ(_all_trackers.back(), dst->_all_trackers.back()) << "Must have same ancestor";
-    DCHECK(!dst->has_limit());
-    // Find the common ancestor and update trackers between 'this'/'dst' and
-    // the common ancestor. This logic handles all cases, including the
-    // two trackers being the same or being ancestors of each other because
-    // 'all_trackers_' includes the current tracker.
-    int ancestor_idx = _all_trackers.size() - 1;
-    int dst_ancestor_idx = dst->_all_trackers.size() - 1;
-    while (ancestor_idx > 0 && dst_ancestor_idx > 0 &&
-           _all_trackers[ancestor_idx - 1] == dst->_all_trackers[dst_ancestor_idx - 1]) {
-        DCHECK(!dst->_all_trackers[dst_ancestor_idx - 1]->has_limit());
-        --ancestor_idx;
-        --dst_ancestor_idx;
-    }
-    MemTracker* common_ancestor = _all_trackers[ancestor_idx];
-    release_local(bytes, common_ancestor);
-    dst->consume_local(bytes, common_ancestor);
-}
-
-// Calling this on the query tracker results in output like:
-//
-//  Query(4a4c81fedaed337d:4acadfda00000000) Limit=10.00 GB Total=508.28 MB Peak=508.45 MB
-//    Fragment 4a4c81fedaed337d:4acadfda00000000: Total=8.00 KB Peak=8.00 KB
-//      EXCHANGE_NODE (id=4): Total=0 Peak=0
-//      DataStreamRecvr: Total=0 Peak=0
-//    Block Manager: Limit=6.68 GB Total=394.00 MB Peak=394.00 MB
-//    Fragment 4a4c81fedaed337d:4acadfda00000006: Total=233.72 MB Peak=242.24 MB
-//      AGGREGATION_NODE (id=1): Total=139.21 MB Peak=139.84 MB
-//      HDFS_SCAN_NODE (id=0): Total=93.94 MB Peak=102.24 MB
-//      DataStreamSender (dst_id=2): Total=45.99 KB Peak=85.99 KB
-//    Fragment 4a4c81fedaed337d:4acadfda00000003: Total=274.55 MB Peak=274.62 MB
-//      AGGREGATION_NODE (id=3): Total=274.50 MB Peak=274.50 MB
-//      EXCHANGE_NODE (id=2): Total=0 Peak=0
-//      DataStreamRecvr: Total=45.91 KB Peak=684.07 KB
-//      DataStreamSender (dst_id=4): Total=680.00 B Peak=680.00 B
-//
-// If 'reservation_metrics_' are set, we ge a more granular breakdown:
-//   TrackerName: Limit=5.00 MB Reservation=5.00 MB OtherMemory=1.04 MB
-//                Total=6.04 MB Peak=6.45 MB
-//
-std::string MemTracker::log_usage(int max_recursive_depth, int64_t* logged_consumption) {
-    // Make sure the consumption is up to date.
-    int64_t curr_consumption = consumption();
-    int64_t peak_consumption = _consumption->value();
-    if (logged_consumption != nullptr) *logged_consumption = curr_consumption;
-
-    if (_level > MemTrackerLevel::INSTANCE && curr_consumption == 0) return "";
-
-    std::string detail =
-            "MemTracker log_usage Label: {}, Limit: {}, Total: {}, Peak: {}, Exceeded: {}";
-    detail = fmt::format(detail, _label, PrettyPrinter::print(_limit, TUnit::BYTES),
-                         PrettyPrinter::print(curr_consumption, TUnit::BYTES),
-                         PrettyPrinter::print(peak_consumption, TUnit::BYTES),
-                         limit_exceeded() ? "true" : "false");
-
-    // This call does not need the children, so return early.
-    if (max_recursive_depth == 0) return detail;
-
-    // Recurse and get information about the children
-    int64_t child_consumption;
-    std::string child_trackers_usage;
-    std::list> children;
-    {
-        lock_guard l(_child_trackers_lock);
-        children = _child_trackers;
-    }
-    child_trackers_usage = log_usage(max_recursive_depth - 1, children, &child_consumption);
-    if (!child_trackers_usage.empty()) detail += "\n" + child_trackers_usage;
-    return detail;
-}
-
-std::string MemTracker::log_usage(int max_recursive_depth,
-                                  const std::list>& trackers,
-                                  int64_t* logged_consumption) {
-    *logged_consumption = 0;
-    std::vector usage_strings;
-    for (const auto& tracker_weak : trackers) {
-        std::shared_ptr tracker = tracker_weak.lock();
-        if (tracker) {
-            int64_t tracker_consumption;
-            std::string usage_string =
-                    tracker->log_usage(max_recursive_depth, &tracker_consumption);
-            if (!usage_string.empty()) usage_strings.push_back(usage_string);
-            *logged_consumption += tracker_consumption;
-        }
-    }
-    return join(usage_strings, "\n");
-}
-
-Status MemTracker::mem_limit_exceeded(RuntimeState* state, const std::string& details,
-                                      int64_t failed_allocation_size, Status failed_alloc) {
-    STOP_CHECK_LIMIT_THREAD_LOCAL_MEM_TRACKER();
-    MemTracker* process_tracker = MemTracker::get_raw_process_tracker();
-    std::string detail =
-            "Memory exceed limit. fragment={}, details={}, on backend={}. Memory left in process "
-            "limit={}.";
-    detail = fmt::format(detail, state != nullptr ? print_id(state->fragment_instance_id()) : "",
-                         details, BackendOptions::get_localhost(),
-                         PrettyPrinter::print(process_tracker->spare_capacity(), TUnit::BYTES));
-    if (!failed_alloc) {
-        detail += " failed alloc=<{}>. current tracker={}.";
-        detail = fmt::format(detail, failed_alloc.to_string(), _label);
-    } else {
-        detail += " current tracker .";
-        detail = fmt::format(detail, _label, _consumption->current_value(), _limit,
-                             PrettyPrinter::print(failed_allocation_size, TUnit::BYTES));
-    }
-    detail += " If this is a query, can change the limit by session variable exec_mem_limit.";
-    Status status = Status::MemoryLimitExceeded(detail);
-    if (state != nullptr) state->log_error(detail);
-
-    // only print the tracker log_usage in be log.
-    if (process_tracker->spare_capacity() < failed_allocation_size) {
-        // Dumping the process MemTracker is expensive. Limiting the recursive depth to two
-        // levels limits the level of detail to a one-line summary for each query MemTracker.
-        detail += "\n" + process_tracker->log_usage(2);
-    }
-    if (parent_task_mem_tracker() != nullptr) {
-        detail += "\n" + parent_task_mem_tracker()->log_usage();
-    }
-    LOG(WARNING) << detail;
-
-    return status;
-}
-
-bool MemTracker::gc_memory(int64_t max_consumption) {
-    if (max_consumption < 0) return true;
-    lock_guard l(_gc_lock);
-    int64_t pre_gc_consumption = consumption();
-    // Check if someone gc'd before us
-    if (pre_gc_consumption < max_consumption) return false;
-
-    int64_t curr_consumption = pre_gc_consumption;
-    // Free some extra memory to avoid frequent GC, 4M is an empirical value, maybe it will be tested later.
-    const int64_t EXTRA_BYTES_TO_FREE = 4L * 1024L * 1024L * 1024L;
-    // Try to free up some memory
-    for (int i = 0; i < _gc_functions.size(); ++i) {
-        // Try to free up the amount we are over plus some extra so that we don't have to
-        // immediately GC again. Don't free all the memory since that can be unnecessarily
-        // expensive.
-        int64_t bytes_to_free = curr_consumption - max_consumption + EXTRA_BYTES_TO_FREE;
-        _gc_functions[i](bytes_to_free);
-        curr_consumption = consumption();
-        if (max_consumption - curr_consumption <= EXTRA_BYTES_TO_FREE) break;
-    }
-
-    return curr_consumption > max_consumption;
-}
-
-} // namespace doris
diff --git a/be/src/runtime/mem_tracker.h b/be/src/runtime/mem_tracker.h
deleted file mode 100644
index 5b51ae30e2..0000000000
--- a/be/src/runtime/mem_tracker.h
+++ /dev/null
@@ -1,554 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-// This file is copied from
-// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/mem-tracker.h
-// and modified by Doris
-
-#pragma once
-
-#include 
-
-#include 
-#include 
-#include 
-
-#include "common/config.h"
-#include "common/status.h"
-#include "util/mem_info.h"
-#include "util/perf_counters.h"
-#include "util/runtime_profile.h"
-#include "util/spinlock.h"
-
-namespace doris {
-
-// The Level use to decide whether to show it in web page,
-// each MemTracker have a Level less than or equal to parent, only be set explicit,
-// TASK contains query, import, compaction, etc.
-enum class MemTrackerLevel { OVERVIEW = 0, TASK, INSTANCE, VERBOSE };
-
-class MemTracker;
-class RuntimeState;
-
-using TrackersMap = phmap::parallel_flat_hash_map<
-        std::string, std::shared_ptr, phmap::priv::hash_default_hash,
-        phmap::priv::hash_default_eq,
-        std::allocator>>, 12, std::mutex>;
-
-/// A MemTracker tracks memory consumption; it contains an optional limit
-/// and can be arranged into a tree structure such that the consumption tracked
-/// by a MemTracker is also tracked by its ancestors.
-///
-/// We use a five-level hierarchy of mem trackers: process, pool, query, fragment
-/// instance. Specific parts of the fragment (exec nodes, sinks, etc) will add a
-/// fifth level when they are initialized. This function also initializes a user
-/// function mem tracker (in the fifth level).
-///
-/// By default, memory consumption is tracked via calls to Consume()/Release(), either to
-/// the tracker itself or to one of its descendents. Alternatively, a consumption metric
-/// can be specified, and then the metric's value is used as the consumption rather than
-/// the tally maintained by Consume() and Release(). A tcmalloc metric is used to track
-/// process memory consumption, since the process memory usage may be higher than the
-/// computed total memory (tcmalloc does not release deallocated memory immediately).
-/// Other consumption metrics are used in trackers below the process level to account
-/// for memory (such as free buffer pool buffers) that is not tracked by Consume() and
-/// Release().
-///
-/// GcFunctions can be attached to a MemTracker in order to free up memory if the limit is
-/// reached. If limit_exceeded() is called and the limit is exceeded, it will first call
-/// the GcFunctions to try to free memory and recheck the limit. For example, the process
-/// tracker has a GcFunction that releases any unused memory still held by tcmalloc, so
-/// this will be called before the process limit is reported as exceeded. GcFunctions are
-/// called in the order they are added, so expensive functions should be added last.
-/// GcFunctions are called with a global lock held, so should be non-blocking and not
-/// call back into MemTrackers, except to release memory.
-//
-/// This class is thread-safe.
-class MemTracker {
-public:
-    // Creates and adds the tracker to the tree
-    static std::shared_ptr create_tracker(
-            int64_t byte_limit = -1, const std::string& label = std::string(),
-            const std::shared_ptr& parent = std::shared_ptr(),
-            MemTrackerLevel level = MemTrackerLevel::VERBOSE, RuntimeProfile* profile = nullptr);
-
-    // Cosume/release will not sync to parent.Usually used to manually record the specified memory,
-    // It is independent of the recording of TCMalloc Hook in the thread local tracker, so the same
-    // block of memory is recorded independently in these two trackers.
-    // TODO(zxy) At present, the purpose of most virtual trackers is only to preserve the logic of
-    // manually recording memory before, which may be used later. After each virtual tracker is
-    // required case by case, discuss its necessity.
-    static std::shared_ptr create_virtual_tracker(
-            int64_t byte_limit = -1, const std::string& label = std::string(),
-            const std::shared_ptr& parent = std::shared_ptr(),
-            MemTrackerLevel level = MemTrackerLevel::VERBOSE);
-
-    // this is used for creating an orphan mem tracker, or for unit test.
-    // If a mem tracker has parent, it should be created by `create_tracker()`
-    MemTracker(int64_t byte_limit = -1, const std::string& label = std::string());
-
-    ~MemTracker();
-
-    // Returns a list of all the valid trackers.
-    static void list_process_trackers(std::vector>* trackers);
-
-    // Gets a shared_ptr to the "process" tracker, creating it if necessary.
-    static std::shared_ptr get_process_tracker();
-    static MemTracker* get_raw_process_tracker();
-    // Get a temporary tracker with a specified label, and the tracker will be created when the label is first get.
-    // Temporary trackers are not automatically destructed, which is usually used for debugging.
-    static std::shared_ptr get_temporary_mem_tracker(const std::string& label);
-
-    Status check_sys_mem_info(int64_t bytes) {
-        // Limit process memory usage using the actual physical memory of the process in `/proc/self/status`.
-        // This is independent of the consumption value of the mem tracker, which counts the virtual memory
-        // of the process malloc.
-        // for fast, expect MemInfo::initialized() to be true.
-        if (PerfCounters::get_vm_rss() + bytes >= MemInfo::mem_limit()) {
-            return Status::MemoryLimitExceeded(
-                    "{}: TryConsume failed, bytes={} process whole consumption={}  mem limit={}",
-                    _label, bytes, MemInfo::current_mem(), MemInfo::mem_limit());
-        }
-        return Status::OK();
-    }
-
-    // Increases consumption of this tracker and its ancestors by 'bytes'.
-    void consume(int64_t bytes) {
-#ifdef USE_MEM_TRACKER
-        if (bytes <= 0) {
-            release(-bytes);
-            return;
-        }
-        for (auto& tracker : _all_trackers) {
-            tracker->_consumption->add(bytes);
-        }
-#endif
-    }
-
-    // Increases consumption of this tracker and its ancestors by 'bytes' only if
-    // they can all consume 'bytes' without exceeding limit. If limit would be exceed,
-    // no MemTrackers are updated. Returns true if the consumption was successfully updated.
-    WARN_UNUSED_RESULT
-    Status try_consume(int64_t bytes) {
-#ifdef USE_MEM_TRACKER
-        if (bytes <= 0) {
-            release(-bytes);
-            return Status::OK();
-        }
-        RETURN_IF_ERROR(check_sys_mem_info(bytes));
-        int i;
-        // Walk the tracker tree top-down.
-        for (i = _all_trackers.size() - 1; i >= 0; --i) {
-            MemTracker* tracker = _all_trackers[i];
-            if (tracker->limit() < 0) {
-                tracker->_consumption->add(bytes); // No limit at this tracker.
-            } else {
-                // If TryConsume fails, we can try to GC, but we may need to try several times if
-                // there are concurrent consumers because we don't take a lock before trying to
-                // update _consumption.
-                while (true) {
-                    if (LIKELY(tracker->_consumption->try_add(bytes, tracker->limit()))) break;
-                    Status st = tracker->try_gc_memory(bytes);
-                    if (!st) {
-                        // Failed for this mem tracker. Roll back the ones that succeeded.
-                        for (int j = _all_trackers.size() - 1; j > i; --j) {
-                            _all_trackers[j]->_consumption->add(-bytes);
-                        }
-                        return st;
-                    }
-                }
-            }
-        }
-        // Everyone succeeded, return.
-        DCHECK_EQ(i, -1);
-#endif
-        return Status::OK();
-    }
-
-    // Decreases consumption of this tracker and its ancestors by 'bytes'.
-    void release(int64_t bytes) {
-#ifdef USE_MEM_TRACKER
-        if (bytes < 0) {
-            consume(-bytes);
-            return;
-        }
-        if (bytes == 0) {
-            return;
-        }
-        for (auto& tracker : _all_trackers) {
-            tracker->_consumption->add(-bytes);
-        }
-#endif
-    }
-
-    static void batch_consume(int64_t bytes,
-                              const std::vector>& trackers) {
-        for (auto& tracker : trackers) {
-            tracker->consume(bytes);
-        }
-    }
-
-    // When the accumulated untracked memory value exceeds the upper limit,
-    // the current value is returned and set to 0.
-    // Thread safety.
-    int64_t add_untracked_mem(int64_t bytes) {
-        _untracked_mem += bytes;
-        if (std::abs(_untracked_mem) >= config::mem_tracker_consume_min_size_bytes) {
-            return _untracked_mem.exchange(0);
-        }
-        return 0;
-    }
-
-    // In most cases, no need to call flush_untracked_mem on the child tracker,
-    // because when it is destructed, theoretically all its children have been destructed.
-    void flush_untracked_mem() {
-        consume(_untracked_mem.exchange(0));
-        for (const auto& tracker_weak : _child_trackers) {
-            std::shared_ptr tracker = tracker_weak.lock();
-            if (tracker) tracker->flush_untracked_mem();
-        }
-    }
-
-    void release_cache(int64_t bytes) {
-        int64_t consume_bytes = add_untracked_mem(-bytes);
-        if (consume_bytes != 0) {
-            release(-consume_bytes);
-        }
-    }
-
-    void consume_cache(int64_t bytes) {
-        int64_t consume_bytes = add_untracked_mem(bytes);
-        if (consume_bytes != 0) {
-            consume(consume_bytes);
-        }
-    }
-
-    WARN_UNUSED_RESULT
-    Status try_consume_cache(int64_t bytes) {
-        if (bytes <= 0) {
-            release_cache(-bytes);
-            return Status::OK();
-        }
-        int64_t consume_bytes = add_untracked_mem(bytes);
-        if (consume_bytes != 0) {
-            Status st = try_consume(consume_bytes);
-            if (!st) {
-                _untracked_mem += consume_bytes;
-                return st;
-            }
-        }
-        return Status::OK();
-    }
-
-    // up to (but not including) end_tracker.
-    // This is useful if we want to move tracking between trackers that share a common (i.e. end_tracker)
-    // ancestor. This happens when we want to update tracking on a particular mem tracker but the consumption
-    // against the limit recorded in one of its ancestors already happened.
-    void consume_local(int64_t bytes, MemTracker* end_tracker) {
-#ifdef USE_MEM_TRACKER
-        DCHECK(end_tracker);
-        if (bytes == 0) return;
-        for (auto& tracker : _all_trackers) {
-            if (tracker == end_tracker) return;
-            tracker->_consumption->add(bytes);
-        }
-#endif
-    }
-
-    // up to (but not including) end_tracker.
-    void release_local(int64_t bytes, MemTracker* end_tracker) {
-#ifdef USE_MEM_TRACKER
-        DCHECK(end_tracker);
-        if (bytes == 0) return;
-        for (auto& tracker : _all_trackers) {
-            if (tracker == end_tracker) return;
-            tracker->_consumption->add(-bytes);
-        }
-#endif
-    }
-
-    // Transfer 'bytes' of consumption from this tracker to 'dst'.
-    // updating all ancestors up to the first shared ancestor. Must not be used if
-    // 'dst' has a limit, or an ancestor with a limit, that is not a common
-    // ancestor with the tracker, because this does not check memory limits.
-    void transfer_to_relative(MemTracker* dst, int64_t bytes);
-
-    WARN_UNUSED_RESULT
-    Status try_transfer_to(MemTracker* dst, int64_t bytes) {
-#ifdef USE_MEM_TRACKER
-        if (id() == dst->id()) return Status::OK();
-        // Must release first, then consume
-        release_cache(bytes);
-        Status st = dst->try_consume_cache(bytes);
-        if (!st) {
-            consume_cache(bytes);
-            return st;
-        }
-#endif
-        return Status::OK();
-    }
-
-    // Forced transfer, 'dst' may limit exceed, and more ancestor trackers will be updated.
-    void transfer_to(MemTracker* dst, int64_t bytes) {
-#ifdef USE_MEM_TRACKER
-        if (id() == dst->id()) return;
-        release_cache(bytes);
-        dst->consume_cache(bytes);
-#endif
-    }
-
-    // Returns true if a valid limit of this tracker or one of its ancestors is exceeded.
-    MemTracker* limit_exceeded_tracker() const {
-        for (const auto& tracker : _limit_trackers) {
-            if (tracker->limit_exceeded()) {
-                return tracker;
-            }
-        }
-        return nullptr;
-    }
-
-    bool any_limit_exceeded() const { return limit_exceeded_tracker() != nullptr; }
-
-    // Returns the maximum consumption that can be made without exceeding the limit on
-    // this tracker or any of its parents. Returns int64_t::max() if there are no
-    // limits and a negative value if any limit is already exceeded.
-    int64_t spare_capacity() const {
-        int64_t result = std::numeric_limits::max();
-        for (const auto& tracker : _limit_trackers) {
-            int64_t mem_left = tracker->limit() - tracker->consumption();
-            result = std::min(result, mem_left);
-        }
-        return result;
-    }
-
-    // Returns the lowest limit for this tracker and its ancestors. Returns -1 if there is no limit.
-    int64_t get_lowest_limit() const {
-        if (_limit_trackers.empty()) return -1;
-        int64_t min_limit = std::numeric_limits::max();
-        for (const auto& tracker : _limit_trackers) {
-            DCHECK(tracker->has_limit());
-            min_limit = std::min(min_limit, tracker->limit());
-        }
-        return min_limit;
-    }
-
-    bool limit_exceeded() const { return _limit >= 0 && _limit < consumption(); }
-    int64_t limit() const { return _limit; }
-    void set_limit(int64_t limit) {
-        DCHECK_GE(limit, -1);
-        DCHECK(!_virtual);
-        _limit = limit;
-        _limit_trackers.push_back(this);
-        for (const auto& tracker_weak : _child_trackers) {
-            std::shared_ptr tracker = tracker_weak.lock();
-            if (tracker) tracker->_limit_trackers.push_back(this);
-        }
-    }
-    bool has_limit() const { return _limit >= 0; }
-
-    Status check_limit(int64_t bytes) {
-        if (bytes <= 0) return Status::OK();
-        RETURN_IF_ERROR(check_sys_mem_info(bytes));
-        int i;
-        // Walk the tracker tree top-down.
-        for (i = _all_trackers.size() - 1; i >= 0; --i) {
-            MemTracker* tracker = _all_trackers[i];
-            if (tracker->limit() > 0) {
-                while (true) {
-                    if (LIKELY(tracker->_consumption->current_value() + bytes < tracker->limit()))
-                        break;
-                    RETURN_IF_ERROR(tracker->try_gc_memory(bytes));
-                }
-            }
-        }
-        return Status::OK();
-    }
-
-    const std::string& label() const { return _label; }
-
-    // Returns the memory consumed in bytes.
-    int64_t consumption() const { return _consumption->current_value(); }
-    int64_t peak_consumption() const { return _consumption->value(); }
-
-    std::shared_ptr parent() const { return _parent; }
-
-    typedef std::function GcFunction;
-    /// Add a function 'f' to be called if the limit is reached, if none of the other
-    /// previously-added GC functions were successful at freeing up enough memory.
-    /// 'f' does not need to be thread-safe as long as it is added to only one MemTracker.
-    /// Note that 'f' must be valid for the lifetime of this MemTracker.
-    void add_gc_function(GcFunction f) { _gc_functions.push_back(f); }
-
-    /// Logs the usage of this tracker and optionally its children (recursively).
-    /// If 'logged_consumption' is non-nullptr, sets the consumption value logged.
-    /// 'max_recursive_depth' specifies the maximum number of levels of children
-    /// to include in the dump. If it is zero, then no children are dumped.
-    /// Limiting the recursive depth reduces the cost of dumping, particularly
-    /// for the process MemTracker.
-    std::string log_usage(int max_recursive_depth = INT_MAX, int64_t* logged_consumption = nullptr);
-
-    /// Log the memory usage when memory limit is exceeded and return a status object with
-    /// details of the allocation which caused the limit to be exceeded.
-    /// If 'failed_allocation_size' is greater than zero, logs the allocation size. If
-    /// 'failed_allocation_size' is zero, nothing about the allocation size is logged.
-    /// If 'state' is non-nullptr, logs the error to 'state'.
-    Status mem_limit_exceeded(RuntimeState* state, const std::string& details = std::string(),
-                              int64_t failed_allocation = -1, Status failed_alloc = Status::OK());
-
-    // Usually, a negative values means that the statistics are not accurate,
-    // 1. The released memory is not consumed.
-    // 2. The same block of memory, tracker A calls consume, and tracker B calls release.
-    // 3. Repeated releases of MemTacker. When the consume is called on the child MemTracker,
-    //    after the release is called on the parent MemTracker,
-    //    the child ~MemTracker will cause repeated releases.
-    static void memory_leak_check(MemTracker* tracker, bool flush = true) {
-        if (flush) tracker->flush_untracked_mem();
-        DCHECK_EQ(tracker->_consumption->current_value(), 0) << std::endl << tracker->log_usage();
-    }
-
-    // If an ancestor of this tracker is a Task MemTracker, return that tracker. Otherwise return nullptr.
-    MemTracker* parent_task_mem_tracker() {
-        if (this->_level == MemTrackerLevel::TASK) {
-            return this;
-        } else {
-            return parent_task_mem_tracker_no_own().get();
-        }
-    }
-
-    std::shared_ptr parent_task_mem_tracker_no_own() {
-        std::shared_ptr tracker = this->_parent;
-        while (tracker != nullptr && tracker->_level != MemTrackerLevel::TASK) {
-            tracker = tracker->_parent;
-        }
-        return tracker;
-    }
-
-    bool has_virtual_ancestor() {
-        MemTracker* tracker = this;
-        while (tracker != nullptr && tracker->_virtual == false) {
-            tracker = tracker->_parent.get();
-        }
-        return tracker == nullptr ? false : true;
-    }
-
-    int64_t id() { return _id; }
-
-    std::string debug_string() {
-        std::stringstream msg;
-        msg << "limit: " << _limit << "; "
-            << "consumption: " << _consumption->current_value() << "; "
-            << "label: " << _label << "; "
-            << "all tracker size: " << _all_trackers.size() << "; "
-            << "limit trackers size: " << _limit_trackers.size() << "; "
-            << "parent is null: " << ((_parent == nullptr) ? "true" : "false") << "; ";
-        return msg.str();
-    }
-
-    static const std::string COUNTER_NAME;
-
-private:
-    static std::shared_ptr create_tracker_impl(
-            int64_t byte_limit, const std::string& label, const std::shared_ptr& parent,
-            MemTrackerLevel level, RuntimeProfile* profile);
-
-    /// 'byte_limit' < 0 means no limit
-    /// 'label' is the label used in the usage string (log_usage())
-    MemTracker(int64_t byte_limit, const std::string& label,
-               const std::shared_ptr& parent, MemTrackerLevel, RuntimeProfile* profile);
-
-private:
-    // If consumption is higher than max_consumption, attempts to free memory by calling
-    // any added GC functions.  Returns true if max_consumption is still exceeded. Takes gc_lock.
-    // Note: If the cache of segment/chunk is released due to insufficient query memory at a certain moment,
-    // the performance of subsequent queries may be degraded, so the use of gc function should be careful enough.
-    bool gc_memory(int64_t max_consumption);
-
-    inline Status try_gc_memory(int64_t bytes) {
-        if (UNLIKELY(gc_memory(_limit - bytes))) {
-            return Status::MemoryLimitExceeded(
-                    "label={} TryConsume failed size={}, used={}, limit={}", label(), bytes,
-                    _consumption->current_value(), _limit);
-        }
-        VLOG_NOTICE << "GC succeeded, TryConsume bytes=" << bytes
-                    << " consumption=" << _consumption->current_value() << " limit=" << _limit;
-        return Status::OK();
-    }
-
-    // Walks the MemTracker hierarchy and populates _all_trackers and
-    // limit_trackers_
-    void init();
-    void init_virtual();
-
-    // Adds tracker to _child_trackers
-    void add_child_tracker(const std::shared_ptr& tracker) {
-        std::lock_guard l(_child_trackers_lock);
-        tracker->_child_tracker_it = _child_trackers.insert(_child_trackers.end(), tracker);
-    }
-
-    /// Log consumption of all the trackers provided. Returns the sum of consumption in
-    /// 'logged_consumption'. 'max_recursive_depth' specifies the maximum number of levels
-    /// of children to include in the dump. If it is zero, then no children are dumped.
-    static std::string log_usage(int max_recursive_depth,
-                                 const std::list>& trackers,
-                                 int64_t* logged_consumption);
-
-    // Creates the process tracker.
-    static void create_process_tracker();
-
-    // Limit on memory consumption, in bytes. If limit_ == -1, there is no consumption limit.
-    int64_t _limit;
-
-    std::string _label;
-
-    int64_t _id;
-
-    std::shared_ptr _parent; // The parent of this tracker.
-
-    MemTrackerLevel _level;
-
-    bool _virtual = false;
-
-    std::shared_ptr _consumption; // in bytes
-
-    // Consume size smaller than mem_tracker_consume_min_size_bytes will continue to accumulate
-    // to avoid frequent calls to consume/release of MemTracker.
-    std::atomic _untracked_mem = 0;
-
-    std::vector _all_trackers;   // this tracker plus all of its ancestors
-    std::vector _limit_trackers; // _all_trackers with valid limits
-
-    // All the child trackers of this tracker. Used for error reporting and
-    // listing only (i.e. updating the consumption of a parent tracker does not
-    // update that of its children).
-    SpinLock _child_trackers_lock;
-    std::list> _child_trackers;
-    // Iterator into parent_->child_trackers_ for this object. Stored to have O(1) remove.
-    std::list>::iterator _child_tracker_it;
-
-    // Lock to protect gc_memory(). This prevents many GCs from occurring at once.
-    std::mutex _gc_lock;
-    // Functions to call after the limit is reached to free memory.
-    std::vector _gc_functions;
-};
-
-#define RETURN_LIMIT_EXCEEDED(tracker, ...) return tracker->mem_limit_exceeded(__VA_ARGS__);
-#define RETURN_IF_LIMIT_EXCEEDED(tracker, state, msg) \
-    if (tracker->any_limit_exceeded()) RETURN_LIMIT_EXCEEDED(tracker, state, msg);
-#define RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, msg)        \
-    if (state->instance_mem_tracker()->any_limit_exceeded()) \
-        RETURN_LIMIT_EXCEEDED(state->instance_mem_tracker(), state, msg);
-
-} // namespace doris
diff --git a/be/src/runtime/mem_tracker_task_pool.cpp b/be/src/runtime/mem_tracker_task_pool.cpp
deleted file mode 100644
index 84f1a951e0..0000000000
--- a/be/src/runtime/mem_tracker_task_pool.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "runtime/mem_tracker_task_pool.h"
-
-#include "common/config.h"
-#include "runtime/exec_env.h"
-#include "util/pretty_printer.h"
-
-namespace doris {
-
-std::shared_ptr MemTrackerTaskPool::register_task_mem_tracker_impl(
-        const std::string& task_id, int64_t mem_limit, const std::string& label,
-        std::shared_ptr parent) {
-    DCHECK(!task_id.empty());
-    // First time this task_id registered, make a new object, otherwise do nothing.
-    // Combine create_tracker and emplace into one operation to avoid the use of locks
-    // Name for task MemTrackers. '$0' is replaced with the task id.
-    _task_mem_trackers.try_emplace_l(
-            task_id, [](std::shared_ptr) {},
-            MemTracker::create_tracker(mem_limit, label, parent, MemTrackerLevel::TASK));
-    return get_task_mem_tracker(task_id);
-}
-
-std::shared_ptr MemTrackerTaskPool::register_query_mem_tracker(
-        const std::string& query_id, int64_t mem_limit) {
-    VLOG_FILE << "Register Query memory tracker, query id: " << query_id
-              << " limit: " << PrettyPrinter::print(mem_limit, TUnit::BYTES);
-    return register_task_mem_tracker_impl(query_id, mem_limit,
-                                          fmt::format("Query#queryId={}", query_id),
-                                          ExecEnv::GetInstance()->query_pool_mem_tracker());
-}
-
-std::shared_ptr MemTrackerTaskPool::register_load_mem_tracker(
-        const std::string& load_id, int64_t mem_limit) {
-    // In load, the query id of the fragment is executed, which is the same as the load id of the load channel.
-    VLOG_FILE << "Register Load memory tracker, load id: " << load_id
-              << " limit: " << PrettyPrinter::print(mem_limit, TUnit::BYTES);
-    return register_task_mem_tracker_impl(load_id, mem_limit,
-                                          fmt::format("Load#loadId={}", load_id),
-                                          ExecEnv::GetInstance()->load_pool_mem_tracker());
-}
-
-std::shared_ptr MemTrackerTaskPool::get_task_mem_tracker(const std::string& task_id) {
-    DCHECK(!task_id.empty());
-    std::shared_ptr tracker = nullptr;
-    // Avoid using locks to resolve erase conflicts
-    _task_mem_trackers.if_contains(task_id,
-                                   [&tracker](std::shared_ptr v) { tracker = v; });
-    return tracker;
-}
-
-void MemTrackerTaskPool::logout_task_mem_tracker() {
-    std::vector expired_tasks;
-    for (auto it = _task_mem_trackers.begin(); it != _task_mem_trackers.end(); it++) {
-        if (!it->second) {
-            // when parallel querying, after phmap _task_mem_trackers.erase,
-            // there have been cases where the key still exists in _task_mem_trackers.
-            // https://github.com/apache/incubator-doris/issues/10006
-            expired_tasks.emplace_back(it->first);
-        } else if (it->second.use_count() == 1) {
-            // No RuntimeState uses this task MemTracker, it is only referenced by this map, delete it
-            if (config::memory_leak_detection && it->second->consumption() != 0) {
-                // If consumption is not equal to 0 before query mem tracker is destructed,
-                // there are two possibilities in theory.
-                // 1. A memory leak occurs.
-                // 2. Some of the memory consumed/released on the query mem tracker is actually released/consume on
-                // other trackers such as the process mem tracker, and there is no manual transfer between the two trackers.
-                //
-                // The second case should be eliminated in theory, but it has not been done so far, so the query memory leak
-                // cannot be located, and the value of the query pool mem tracker statistics will be inaccurate.
-                //
-                // In order to ensure that the query pool mem tracker is the sum of all currently running query mem trackers,
-                // the effect of the ended query mem tracker on the query pool mem tracker should be cleared, that is,
-                // the negative number of the current value of consume.
-                LOG(WARNING) << "Task memory tracker memory leak:" << it->second->debug_string();
-            }
-            it->second->parent()->consume_local(-it->second->consumption(),
-                                                MemTracker::get_process_tracker().get());
-            expired_tasks.emplace_back(it->first);
-        } else {
-            // Log limit exceeded query tracker.
-            if (it->second->limit_exceeded()) {
-                it->second->mem_limit_exceeded(
-                        nullptr,
-                        fmt::format("Task mem limit exceeded but no cancel, queryId:{}", it->first),
-                        0, Status::OK());
-            }
-        }
-    }
-    for (auto tid : expired_tasks) {
-        // This means that after all RuntimeState is destructed,
-        // there are still task mem trackers that are get or register.
-        // The only known case: after an load task ends all fragments on a BE,`tablet_writer_open` is still
-        // called to create a channel, and the load task tracker will be re-registered in the channel open.
-        // https://github.com/apache/incubator-doris/issues/9905
-        if (!_task_mem_trackers[tid]) {
-            _task_mem_trackers.erase(tid);
-            VLOG_FILE << "Deregister null task mem tracker, task id: " << tid;
-        } else if (_task_mem_trackers[tid].use_count() == 1) {
-            _task_mem_trackers.erase(tid);
-            VLOG_FILE << "Deregister not used task mem tracker, task id: " << tid;
-        }
-    }
-}
-
-// TODO(zxy) More observable methods
-// /// Logs the usage of 'limit' number of queries based on maximum total memory
-// /// consumption.
-// std::string MemTracker::LogTopNQueries(int limit) {
-//     if (limit == 0) return "";
-//     priority_queue, std::vector>,
-//                    std::greater>>
-//             min_pq;
-//     GetTopNQueries(min_pq, limit);
-//     std::vector usage_strings(min_pq.size());
-//     while (!min_pq.empty()) {
-//         usage_strings.push_back(min_pq.top().second);
-//         min_pq.pop();
-//     }
-//     std::reverse(usage_strings.begin(), usage_strings.end());
-//     return join(usage_strings, "\n");
-// }
-
-// /// Helper function for LogTopNQueries that iterates through the MemTracker hierarchy
-// /// and populates 'min_pq' with 'limit' number of elements (that contain state related
-// /// to query MemTrackers) based on maximum total memory consumption.
-// void MemTracker::GetTopNQueries(
-//         priority_queue, std::vector>,
-//                        greater>>& min_pq,
-//         int limit) {
-//     list> children;
-//     {
-//         lock_guard l(child_trackers_lock_);
-//         children = child_trackers_;
-//     }
-//     for (const auto& child_weak : children) {
-//         shared_ptr child = child_weak.lock();
-//         if (child) {
-//             child->GetTopNQueries(min_pq, limit);
-//         }
-//     }
-// }
-
-} // namespace doris
diff --git a/be/src/runtime/mem_tracker_task_pool.h b/be/src/runtime/mem_tracker_task_pool.h
deleted file mode 100644
index e3baec8d51..0000000000
--- a/be/src/runtime/mem_tracker_task_pool.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "runtime/mem_tracker.h"
-
-namespace doris {
-
-// Global task pool for query MemTrackers. Owned by ExecEnv.
-class MemTrackerTaskPool {
-public:
-    // Construct a MemTracker object for 'task_id' with 'mem_limit' as the memory limit.
-    // The MemTracker is a child of the pool MemTracker, Calling this with the same
-    // 'task_id' will return the same MemTracker object. This is used to track the local
-    // memory usage of all tasks executing. The first time this is called for a task,
-    // a new MemTracker object is created with the pool tracker as its parent.
-    // Newly created trackers will always have a limit of -1.
-    std::shared_ptr register_task_mem_tracker_impl(const std::string& task_id,
-                                                               int64_t mem_limit,
-                                                               const std::string& label,
-                                                               std::shared_ptr parent);
-    std::shared_ptr register_query_mem_tracker(const std::string& query_id,
-                                                           int64_t mem_limit);
-    std::shared_ptr register_load_mem_tracker(const std::string& load_id,
-                                                          int64_t mem_limit);
-
-    std::shared_ptr get_task_mem_tracker(const std::string& task_id);
-
-    // Remove the mem tracker that has ended the query.
-    void logout_task_mem_tracker();
-
-private:
-    // All per-task MemTracker objects.
-    // The life cycle of task memtracker in the process is the same as task runtime state,
-    // MemTrackers will be removed from this map after query finish or cancel.
-    TrackersMap _task_mem_trackers;
-};
-
-} // namespace doris
\ No newline at end of file
diff --git a/be/src/runtime/memory/chunk_allocator.cpp b/be/src/runtime/memory/chunk_allocator.cpp
index a07a967ed4..e69b4c2e82 100644
--- a/be/src/runtime/memory/chunk_allocator.cpp
+++ b/be/src/runtime/memory/chunk_allocator.cpp
@@ -22,8 +22,8 @@
 #include 
 #include 
 
-#include "runtime/mem_tracker.h"
 #include "runtime/memory/chunk.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/memory/system_allocator.h"
 #include "runtime/thread_context.h"
 #include "util/bit_util.h"
@@ -101,7 +101,6 @@ public:
         // Poison this chunk to make asan can detect invalid access
         ASAN_POISON_MEMORY_REGION(ptr, size);
         std::lock_guard l(_lock);
-        // TODO(zxy) The memory of vector resize is not recorded in chunk allocator mem tracker
         _chunk_lists[idx].push_back(ptr);
     }
 
@@ -120,9 +119,7 @@ ChunkAllocator::ChunkAllocator(size_t reserve_limit)
           _steal_arena_limit(reserve_limit * 0.1),
           _reserved_bytes(0),
           _arenas(CpuInfo::get_max_num_cores()) {
-    _mem_tracker =
-            MemTracker::create_tracker(-1, "ChunkAllocator", nullptr, MemTrackerLevel::OVERVIEW);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_END_CLEAR(_mem_tracker);
+    _mem_tracker = std::make_unique(-1, "ChunkAllocator");
     for (int i = 0; i < _arenas.size(); ++i) {
         _arenas[i].reset(new ChunkArena());
     }
@@ -138,18 +135,8 @@ ChunkAllocator::ChunkAllocator(size_t reserve_limit)
     INT_GAUGE_METRIC_REGISTER(_chunk_allocator_metric_entity, chunk_pool_reserved_bytes);
 }
 
-Status ChunkAllocator::allocate(size_t size, Chunk* chunk, MemTracker* tracker, bool check_limits) {
-    MemTracker* reset_tracker =
-            tracker ? tracker : tls_ctx()->_thread_mem_tracker_mgr->mem_tracker().get();
-    // In advance, transfer the memory ownership of allocate from ChunkAllocator::tracker to the parameter tracker.
-    // Next, if the allocate is successful, it will exit normally;
-    // if the allocate fails, return this part of the memory to the parameter tracker.
-    if (check_limits) {
-        RETURN_IF_ERROR(_mem_tracker->try_transfer_to(reset_tracker, size));
-    } else {
-        _mem_tracker->transfer_to(reset_tracker, size);
-    }
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+Status ChunkAllocator::allocate(size_t size, Chunk* chunk) {
+    DCHECK(BitUtil::RoundUpToPowerOfTwo(size) == size);
 
     // fast path: allocate from current core arena
     int core_id = CpuInfo::get_current_core();
@@ -160,6 +147,8 @@ Status ChunkAllocator::allocate(size_t size, Chunk* chunk, MemTracker* tracker,
         DCHECK_GE(_reserved_bytes, 0);
         _reserved_bytes.fetch_sub(size);
         chunk_pool_local_core_alloc_count->increment(1);
+        // transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker.
+        THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get());
         return Status::OK();
     }
     // Second path: try to allocate from other core's arena
@@ -175,6 +164,8 @@ Status ChunkAllocator::allocate(size_t size, Chunk* chunk, MemTracker* tracker,
                 chunk_pool_other_core_alloc_count->increment(1);
                 // reset chunk's core_id to other
                 chunk->core_id = core_id % _arenas.size();
+                // transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker.
+                THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get());
                 return Status::OK();
             }
         }
@@ -189,23 +180,13 @@ Status ChunkAllocator::allocate(size_t size, Chunk* chunk, MemTracker* tracker,
     chunk_pool_system_alloc_count->increment(1);
     chunk_pool_system_alloc_cost_ns->increment(cost_ns);
     if (chunk->data == nullptr) {
-        // allocate fails, return this part of the memory to the parameter tracker.
-        reset_tracker->transfer_to(_mem_tracker.get(), size);
         return Status::MemoryAllocFailed("ChunkAllocator failed to allocate chunk {} bytes", size);
     }
     return Status::OK();
 }
 
-void ChunkAllocator::free(const Chunk& chunk, MemTracker* tracker) {
-    // The chunk's memory ownership is transferred from tls tracker to ChunkAllocator.
+void ChunkAllocator::free(const Chunk& chunk) {
     DCHECK(chunk.core_id != -1);
-    if (tracker) {
-        tracker->transfer_to(_mem_tracker.get(), chunk.size);
-    } else {
-        tls_ctx()->_thread_mem_tracker_mgr->mem_tracker()->transfer_to(_mem_tracker.get(),
-                                                                       chunk.size);
-    }
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
     int64_t old_reserved_bytes = _reserved_bytes;
     int64_t new_reserved_bytes = 0;
     do {
@@ -232,20 +213,21 @@ void ChunkAllocator::free(const Chunk& chunk, MemTracker* tracker) {
     if (_reserved_bytes % 100 == 32) {
         chunk_pool_reserved_bytes->set_value(_reserved_bytes);
     }
+    // The chunk's memory ownership is transferred from tls tracker to ChunkAllocator.
+    THREAD_MEM_TRACKER_TRANSFER_TO(chunk.size, _mem_tracker.get());
     _arenas[chunk.core_id]->push_free_chunk(chunk.data, chunk.size);
 }
 
-Status ChunkAllocator::allocate_align(size_t size, Chunk* chunk, MemTracker* tracker,
-                                      bool check_limits) {
-    return allocate(BitUtil::RoundUpToPowerOfTwo(size), chunk, tracker, check_limits);
+Status ChunkAllocator::allocate_align(size_t size, Chunk* chunk) {
+    return allocate(BitUtil::RoundUpToPowerOfTwo(size), chunk);
 }
 
-void ChunkAllocator::free(uint8_t* data, size_t size, MemTracker* tracker) {
+void ChunkAllocator::free(uint8_t* data, size_t size) {
     Chunk chunk;
     chunk.data = data;
     chunk.size = size;
     chunk.core_id = CpuInfo::get_current_core();
-    free(chunk, tracker);
+    free(chunk);
 }
 
 } // namespace doris
diff --git a/be/src/runtime/memory/chunk_allocator.h b/be/src/runtime/memory/chunk_allocator.h
index d425b69e4a..fef62e6db8 100644
--- a/be/src/runtime/memory/chunk_allocator.h
+++ b/be/src/runtime/memory/chunk_allocator.h
@@ -28,7 +28,7 @@ namespace doris {
 struct Chunk;
 class ChunkArena;
 class MetricEntity;
-class MemTracker;
+class MemTrackerLimiter;
 class Status;
 
 // Used to allocate memory with power-of-two length.
@@ -62,23 +62,25 @@ public:
 
     ChunkAllocator(size_t reserve_limit);
 
-    // Allocate a Chunk with a power-of-two length "size".
-    // Return true if success and allocated chunk is saved in "chunk".
-    // Otherwise return false.
-    Status allocate(size_t size, Chunk* chunk, MemTracker* tracker = nullptr,
-                    bool check_limits = false);
-
-    Status allocate_align(size_t size, Chunk* chunk, MemTracker* tracker = nullptr,
-                          bool check_limits = false);
+    // Up size to 2^n length, allocate a chunk.
+    Status allocate_align(size_t size, Chunk* chunk);
 
     // Free chunk allocated from this allocator
-    void free(const Chunk& chunk, MemTracker* tracker = nullptr);
+    void free(const Chunk& chunk);
 
     // Transfer the memory ownership to the chunk allocator.
     // If the chunk allocator is full, then free to the system.
     // Note: make sure that the length of 'data' is equal to size,
     // otherwise the capacity of chunk allocator will be wrong.
-    void free(uint8_t* data, size_t size, MemTracker* tracker = nullptr);
+    void free(uint8_t* data, size_t size);
+
+private:
+    friend class MemPool;
+
+    // Allocate a Chunk with a power-of-two length "size".
+    // Return true if success and allocated chunk is saved in "chunk".
+    // Otherwise return false.
+    Status allocate(size_t size, Chunk* Chunk);
 
 private:
     static ChunkAllocator* _s_instance;
@@ -93,7 +95,7 @@ private:
 
     std::shared_ptr _chunk_allocator_metric_entity;
 
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
 };
 
 } // namespace doris
diff --git a/be/src/runtime/memory/mem_tracker.cpp b/be/src/runtime/memory/mem_tracker.cpp
new file mode 100644
index 0000000000..dec04a20f5
--- /dev/null
+++ b/be/src/runtime/memory/mem_tracker.cpp
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/mem-tracker.cpp
+// and modified by Doris
+
+#include "runtime/memory/mem_tracker.h"
+
+#include 
+#include 
+
+#include "runtime/memory/mem_tracker_limiter.h"
+#include "runtime/thread_context.h"
+#include "util/pretty_printer.h"
+#include "util/time.h"
+
+namespace doris {
+
+const std::string MemTracker::COUNTER_NAME = "PeakMemoryUsage";
+
+using StaticTrackersMap = phmap::parallel_flat_hash_map<
+        std::string, MemTracker*, phmap::priv::hash_default_hash,
+        phmap::priv::hash_default_eq,
+        std::allocator>, 12, std::mutex>;
+
+static StaticTrackersMap _static_mem_trackers;
+
+MemTracker::MemTracker(const std::string& label, MemTrackerLimiter* parent, RuntimeProfile* profile,
+                       bool is_limiter) {
+    // Do not check limit exceed when add_child_tracker, otherwise it will cause deadlock when log_usage is called.
+    STOP_CHECK_THREAD_MEM_TRACKER_LIMIT();
+    _parent = parent ? parent : thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker();
+    DCHECK(_parent || label == "Process");
+    if (_parent && _parent->label().find("queryId=") != _parent->label().npos) {
+        // Add the queryId suffix to the tracker below the query.
+        _label = fmt::format("{}#{}", label,
+                             _parent->label().substr(_parent->label().find("queryId="), -1));
+    } else {
+        _label = label;
+    }
+    if (profile == nullptr) {
+        _consumption = std::make_shared(TUnit::BYTES);
+    } else {
+        // By default, memory consumption is tracked via calls to consume()/release(), either to
+        // the tracker itself or to one of its descendents. Alternatively, a consumption metric
+        // can be specified, and then the metric's value is used as the consumption rather than
+        // the tally maintained by consume() and release(). A tcmalloc metric is used to track
+        // process memory consumption, since the process memory usage may be higher than the
+        // computed total memory (tcmalloc does not release deallocated memory immediately).
+        // Other consumption metrics are used in trackers below the process level to account
+        // for memory (such as free buffer pool buffers) that is not tracked by consume() and
+        // release().
+        _consumption = profile->AddSharedHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES);
+    }
+    _is_limiter = is_limiter;
+    if (_parent && !_is_limiter) _parent->add_child(this);
+}
+
+MemTracker::~MemTracker() {
+    if (_parent && !_is_limiter) _parent->remove_child(this);
+}
+
+// Count the memory in the scope to a temporary tracker with the specified label name.
+// This is very useful when debugging. You can find the position where the tracker statistics are
+// inaccurate through the temporary tracker layer by layer. As well as finding memory hotspots.
+MemTracker* MemTracker::get_static_mem_tracker(const std::string& label) {
+    // First time this label registered, make a new object, otherwise do nothing.
+    // Avoid using locks to resolve erase conflicts.
+    _static_mem_trackers.lazy_emplace_l(
+            label, [&](MemTracker*) {},
+            [&](const auto& ctor) {
+                ctor(label, new MemTracker(fmt::format("[Static]-{}", label)));
+            });
+    return _static_mem_trackers[label];
+}
+
+MemTracker::Snapshot MemTracker::make_snapshot(size_t level) const {
+    Snapshot snapshot;
+    snapshot.label = _label;
+    if (_parent != nullptr) {
+        snapshot.parent = _parent->label();
+    }
+    snapshot.level = level;
+    snapshot.limit = -1;
+    snapshot.cur_consumption = _consumption->current_value();
+    snapshot.peak_consumption = _consumption->value();
+    snapshot.child_count = 0;
+    return snapshot;
+}
+
+std::string MemTracker::log_usage() {
+    // Make sure the consumption is up to date.
+    int64_t curr_consumption = consumption();
+    int64_t peak_consumption = _consumption->value();
+    if (curr_consumption == 0) return "";
+    std::string detail = "MemTracker Label={}, Total={}, Peak={}";
+    detail = fmt::format(detail, _label, PrettyPrinter::print(curr_consumption, TUnit::BYTES),
+                         PrettyPrinter::print(peak_consumption, TUnit::BYTES));
+    return detail;
+}
+
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/runtime/memory/mem_tracker.h b/be/src/runtime/memory/mem_tracker.h
new file mode 100644
index 0000000000..9f00c23499
--- /dev/null
+++ b/be/src/runtime/memory/mem_tracker.h
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/mem-tracker.h
+// and modified by Doris
+#pragma once
+
+#include "util/runtime_profile.h"
+
+namespace doris {
+
+class MemTrackerLimiter;
+
+// Used to track memory usage.
+//
+// MemTracker can be consumed manually by consume()/release(), or put into SCOPED_CONSUME_MEM_TRACKER,
+// which will automatically track all memory usage of the code segment where it is located.
+//
+// MemTracker's father can only be MemTrackerLimiter, which is only used to print tree-like statistics.
+// Consuming MemTracker will not consume its father synchronously.
+// Usually, it is not necessary to specify the father. by default, the MemTrackerLimiter in the thread context
+// is used as the parent, which is specified when the thread starts.
+//
+// This class is thread-safe.
+class MemTracker {
+public:
+    struct Snapshot {
+        std::string label;
+        std::string parent = "";
+        size_t level = 0;
+        int64_t limit = 0;
+        int64_t cur_consumption = 0;
+        int64_t peak_consumption = 0;
+        size_t child_count = 0;
+    };
+
+    // Creates and adds the tracker to the tree.
+    MemTracker(const std::string& label = std::string(), MemTrackerLimiter* parent = nullptr,
+               RuntimeProfile* profile = nullptr, bool is_limiter = false);
+
+    ~MemTracker();
+
+    // Get a temporary tracker with a specified label, and the tracker will be created when the label is first get.
+    // Temporary trackers are not automatically destructed, which is usually used for debugging.
+    static MemTracker* get_static_mem_tracker(const std::string& label);
+
+public:
+    const std::string& label() const { return _label; }
+    MemTrackerLimiter* parent() const { return _parent; }
+    // Returns the memory consumed in bytes.
+    int64_t consumption() const { return _consumption->current_value(); }
+    int64_t peak_consumption() const { return _consumption->value(); }
+
+public:
+    void consume(int64_t bytes);
+    void release(int64_t bytes) { consume(-bytes); }
+    // Transfer 'bytes' of consumption from this tracker to 'dst'.
+    void transfer_to(MemTracker* dst, int64_t bytes);
+
+public:
+    bool limit_exceeded(int64_t limit) const { return limit >= 0 && limit < consumption(); }
+    bool check_limit(int64_t limit, int64_t bytes) const {
+        return limit >= 0 && limit < consumption() + bytes;
+    }
+
+    // Usually, a negative values means that the statistics are not accurate,
+    // 1. The released memory is not consumed.
+    // 2. The same block of memory, tracker A calls consume, and tracker B calls release.
+    // 3. Repeated releases of MemTacker. When the consume is called on the child MemTracker,
+    //    after the release is called on the parent MemTracker,
+    //    the child ~MemTracker will cause repeated releases.
+    void memory_leak_check() { DCHECK_EQ(consumption(), 0) << std::endl << log_usage(); }
+
+    Snapshot make_snapshot(size_t level) const;
+
+    std::string log_usage();
+
+    std::string debug_string() {
+        std::stringstream msg;
+        msg << "label: " << _label << "; "
+            << "consumption: " << consumption() << "; "
+            << "peak_consumption: " << peak_consumption() << "; ";
+        return msg.str();
+    }
+
+    // Iterator into parent_->_child_trackers for this object. Stored to have O(1) remove.
+    std::list::iterator _child_tracker_it;
+
+    static const std::string COUNTER_NAME;
+
+protected:
+    // label used in the usage string (log_usage())
+    std::string _label;
+
+    std::shared_ptr _consumption; // in bytes
+
+    MemTrackerLimiter* _parent; // The parent of this tracker.
+
+    bool _is_limiter;
+};
+
+inline void MemTracker::consume(int64_t bytes) {
+    if (bytes == 0) {
+        return;
+    } else {
+        _consumption->add(bytes);
+    }
+}
+
+inline void MemTracker::transfer_to(MemTracker* dst, int64_t bytes) {
+    release(bytes);
+    dst->consume(bytes);
+}
+
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/runtime/memory/mem_tracker_base.cpp b/be/src/runtime/memory/mem_tracker_base.cpp
deleted file mode 100644
index bb407e2bf8..0000000000
--- a/be/src/runtime/memory/mem_tracker_base.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-// This file is copied from
-// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/mem-tracker.cpp
-// and modified by Doris
-
-#include "runtime/memory/mem_tracker_base.h"
-
-#include "util/time.h"
-
-namespace doris {
-
-const std::string MemTrackerBase::COUNTER_NAME = "PeakMemoryUsage";
-
-MemTrackerBase::MemTrackerBase(const std::string& label, MemTrackerLimiter* parent,
-                               RuntimeProfile* profile)
-        : _label(label),
-          // Not 100% sure the id is unique. This is generated because it is faster than converting to int after hash.
-          _id((GetCurrentTimeMicros() % 1000000) * 100 + _label.length()),
-          _parent(parent) {
-    if (profile == nullptr) {
-        _consumption = std::make_shared(TUnit::BYTES);
-    } else {
-        // By default, memory consumption is tracked via calls to consume()/release(), either to
-        // the tracker itself or to one of its descendents. Alternatively, a consumption metric
-        // can be specified, and then the metric's value is used as the consumption rather than
-        // the tally maintained by consume() and release(). A tcmalloc metric is used to track
-        // process memory consumption, since the process memory usage may be higher than the
-        // computed total memory (tcmalloc does not release deallocated memory immediately).
-        // Other consumption metrics are used in trackers below the process level to account
-        // for memory (such as free buffer pool buffers) that is not tracked by consume() and
-        // release().
-        _consumption = profile->AddSharedHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES);
-    }
-}
-
-MemTrackerBase::MemTrackerBase(const std::string& label)
-        : MemTrackerBase(label, nullptr, nullptr) {}
-} // namespace doris
diff --git a/be/src/runtime/memory/mem_tracker_base.h b/be/src/runtime/memory/mem_tracker_base.h
deleted file mode 100644
index 10d554839b..0000000000
--- a/be/src/runtime/memory/mem_tracker_base.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-// This file is copied from
-// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/mem-tracker.h
-// and modified by Doris
-
-#pragma once
-
-#include "util/runtime_profile.h"
-
-namespace doris {
-
-class MemTrackerLimiter;
-
-// A MemTracker tracks memory consumption.
-// This class is thread-safe.
-class MemTrackerBase {
-public:
-    const std::string& label() const { return _label; }
-
-    // Returns the memory consumed in bytes.
-    int64_t consumption() const { return _consumption->current_value(); }
-    int64_t peak_consumption() const { return _consumption->value(); }
-
-    MemTrackerBase(const std::string& label, MemTrackerLimiter* parent, RuntimeProfile* profile);
-
-    // this is used for creating an orphan mem tracker, or for unit test.
-    // If a mem tracker has parent, it should be created by `create_tracker()`
-    MemTrackerBase(const std::string& label = std::string());
-
-    MemTrackerLimiter* parent() const { return _parent; }
-    int64_t id() { return _id; }
-    bool is_limited() { return _is_limited; } // MemTrackerLimiter
-    bool is_observed() { return _is_observed; }
-    void set_is_limited() { _is_limited = true; } // MemTrackerObserve
-    void set_is_observed() { _is_observed = true; }
-
-    // Usually, a negative values means that the statistics are not accurate,
-    // 1. The released memory is not consumed.
-    // 2. The same block of memory, tracker A calls consume, and tracker B calls release.
-    // 3. Repeated releases of MemTacker. When the consume is called on the child MemTracker,
-    //    after the release is called on the parent MemTracker,
-    //    the child ~MemTracker will cause repeated releases.
-    void memory_leak_check() { DCHECK_EQ(_consumption->current_value(), 0); }
-
-    static const std::string COUNTER_NAME;
-
-protected:
-    // label used in the usage string (log_usage())
-    std::string _label;
-
-    // Automatically generated, unique for each mem tracker.
-    int64_t _id;
-
-    std::shared_ptr _consumption; // in bytes
-
-    bool _is_limited = false; // is MemTrackerLimiter
-
-    bool _is_observed = false; // is MemTrackerObserve
-
-    MemTrackerLimiter* _parent; // The parent of this tracker.
-};
-
-} // namespace doris
diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp
index 6193558ab0..4bdc2a2781 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.cpp
+++ b/be/src/runtime/memory/mem_tracker_limiter.cpp
@@ -20,7 +20,7 @@
 #include 
 
 #include "gutil/once.h"
-#include "runtime/memory/mem_tracker_observe.h"
+#include "runtime/runtime_state.h"
 #include "runtime/thread_context.h"
 #include "service/backend_options.h"
 #include "util/pretty_printer.h"
@@ -28,151 +28,84 @@
 
 namespace doris {
 
-// The ancestor for all trackers. Every tracker is visible from the process down.
-// All manually created trackers should specify the process tracker as the parent.
-static MemTrackerLimiter* process_tracker;
-static GoogleOnceType process_tracker_once = GOOGLE_ONCE_INIT;
-
-MemTrackerLimiter* MemTrackerLimiter::create_tracker(int64_t byte_limit, const std::string& label,
-                                                     MemTrackerLimiter* parent,
-                                                     RuntimeProfile* profile) {
-    // Do not check limit exceed when add_child_tracker, otherwise it will cause deadlock when log_usage is called.
-    STOP_CHECK_THREAD_MEM_TRACKER_LIMIT();
-    if (!parent) {
-        parent = MemTrackerLimiter::get_process_tracker();
-    }
-    MemTrackerLimiter* tracker(new MemTrackerLimiter("[Limit]-" + label, parent, profile));
-    parent->add_child_tracker(tracker);
-    tracker->set_is_limited();
-    tracker->init(byte_limit);
-    return tracker;
-}
-
-void MemTrackerLimiter::init(int64_t limit) {
-    DCHECK_GE(limit, -1);
-    _limit = limit;
+MemTrackerLimiter::MemTrackerLimiter(int64_t byte_limit, const std::string& label,
+                                     MemTrackerLimiter* parent, RuntimeProfile* profile)
+        : MemTracker(label, parent, profile, true) {
+    // Walks the MemTrackerLimiter hierarchy and populates _all_ancestors and _limited_ancestors
+    DCHECK_GE(byte_limit, -1);
+    _limit = byte_limit;
     MemTrackerLimiter* tracker = this;
     while (tracker != nullptr) {
-        _ancestor_all_trackers.push_back(tracker);
-        if (tracker->has_limit()) _ancestor_limiter_trackers.push_back(tracker);
+        _all_ancestors.push_back(tracker);
+        if (tracker->has_limit()) _limited_ancestors.push_back(tracker);
         tracker = tracker->_parent;
     }
-    DCHECK_GT(_ancestor_all_trackers.size(), 0);
-    DCHECK_EQ(_ancestor_all_trackers[0], this);
+    DCHECK_GT(_all_ancestors.size(), 0);
+    DCHECK_EQ(_all_ancestors[0], this);
+    if (_parent) _parent->add_child(this);
 }
 
 MemTrackerLimiter::~MemTrackerLimiter() {
     // TCMalloc hook will be triggered during destructor memtracker, may cause crash.
-    if (_label == "Process") doris::thread_local_ctx._init = false;
-    flush_untracked_mem();
-    if (parent()) {
-        // Do not call release on the parent tracker to avoid repeated releases.
-        // Ensure that all consume/release are triggered by TCMalloc new/delete hook.
-        std::lock_guard l(_parent->_child_trackers_lock);
-        if (_child_tracker_it != _parent->_child_limiter_trackers.end()) {
-            _parent->_child_limiter_trackers.erase(_child_tracker_it);
-            _child_tracker_it = _parent->_child_limiter_trackers.end();
-        }
-    }
-    // The child observe tracker life cycle is controlled by its parent limiter tarcker.
-    for (audo tracker : _child_observe_trackers) {
-        delete tracker;
-    }
-    DCHECK_EQ(_untracked_mem, 0);
+    if (_label == "Process") doris::thread_context_ptr._init = false;
+    DCHECK(remain_child_count() == 0 || _label == "Process");
+    if (_parent) _parent->remove_child(this);
 }
 
-void MemTrackerLimiter::add_child_tracker(MemTrackerLimiter* tracker) {
-    std::lock_guard l(_child_trackers_lock);
+void MemTrackerLimiter::add_child(MemTrackerLimiter* tracker) {
+    std::lock_guard l(_child_tracker_limiter_lock);
     tracker->_child_tracker_it =
-            _child_limiter_trackers.insert(_child_limiter_trackers.end(), tracker);
+            _child_tracker_limiters.insert(_child_tracker_limiters.end(), tracker);
+    _had_child_count++;
 }
 
-void MemTrackerLimiter::add_child_tracker(MemTrackerObserve* tracker) {
-    std::lock_guard l(_child_trackers_lock);
-    tracker->_child_tracker_it =
-            _child_observe_trackers.insert(_child_observe_trackers.end(), tracker);
+void MemTrackerLimiter::add_child(MemTracker* tracker) {
+    std::lock_guard l(_child_tracker_lock);
+    tracker->_child_tracker_it = _child_trackers.insert(_child_trackers.end(), tracker);
+    _had_child_count++;
 }
 
-void MemTrackerLimiter::remove_child_tracker(MemTrackerLimiter* tracker) {
-    std::lock_guard l(_child_trackers_lock);
-    if (tracker->_child_tracker_it != _child_limiter_trackers.end()) {
-        _child_limiter_trackers.erase(tracker->_child_tracker_it);
-        tracker->_child_tracker_it = _child_limiter_trackers.end();
+void MemTrackerLimiter::remove_child(MemTrackerLimiter* tracker) {
+    std::lock_guard l(_child_tracker_limiter_lock);
+    if (tracker->_child_tracker_it != _child_tracker_limiters.end()) {
+        _child_tracker_limiters.erase(tracker->_child_tracker_it);
+        tracker->_child_tracker_it = _child_tracker_limiters.end();
     }
 }
 
-void MemTrackerLimiter::remove_child_tracker(MemTrackerObserve* tracker) {
-    std::lock_guard l(_child_trackers_lock);
-    if (tracker->_child_tracker_it != _child_observe_trackers.end()) {
-        _child_observe_trackers.erase(tracker->_child_tracker_it);
-        tracker->_child_tracker_it = _child_observe_trackers.end();
+void MemTrackerLimiter::remove_child(MemTracker* tracker) {
+    std::lock_guard l(_child_tracker_lock);
+    if (tracker->_child_tracker_it != _child_trackers.end()) {
+        _child_trackers.erase(tracker->_child_tracker_it);
+        tracker->_child_tracker_it = _child_trackers.end();
     }
 }
 
-void MemTrackerLimiter::create_process_tracker() {
-    process_tracker = new MemTrackerLimiter("Process", nullptr, nullptr);
-    process_tracker->init(-1);
-}
-
-MemTrackerLimiter* MemTrackerLimiter::get_process_tracker() {
-    GoogleOnceInit(&process_tracker_once, &MemTrackerLimiter::create_process_tracker);
-    return process_tracker;
-}
-
-void MemTrackerLimiter::list_process_trackers(std::vector* trackers) {
-    trackers->clear();
-    std::deque to_process;
-    to_process.push_front(get_process_tracker());
-    while (!to_process.empty()) {
-        MemTrackerLimiter* t = to_process.back();
-        to_process.pop_back();
-
-        trackers->push_back(t);
-        std::list limiter_children;
-        std::list observe_children;
+void MemTrackerLimiter::make_snapshot(std::vector* snapshots,
+                                      size_t cur_level, size_t upper_level) const {
+    Snapshot snapshot = MemTracker::make_snapshot(cur_level);
+    snapshot.limit = _limit;
+    snapshot.child_count = remain_child_count();
+    (*snapshots).emplace_back(snapshot);
+    if (cur_level < upper_level) {
         {
-            std::lock_guard l(t->_child_trackers_lock);
-            limiter_children = t->_child_limiter_trackers;
-            observe_children = t->_child_observe_trackers;
+            std::lock_guard l(_child_tracker_limiter_lock);
+            for (const auto& child : _child_tracker_limiters) {
+                child->make_snapshot(snapshots, cur_level + 1, upper_level);
+            }
         }
-        for (const auto& child : limiter_children) {
-            to_process.emplace_back(std::move(child));
-        }
-        if (config::show_observe_tracker) {
-            for (const auto& child : observe_children) {
-                trackers->push_back(child);
+        {
+            std::lock_guard l(_child_tracker_lock);
+            for (const auto& child : _child_trackers) {
+                (*snapshots).emplace_back(child->make_snapshot(cur_level + 1));
             }
         }
     }
 }
 
-MemTrackerLimiter* MemTrackerLimiter::common_ancestor(MemTrackerLimiter* dst) {
-    if (id() == dst->id()) return dst;
-    DCHECK_EQ(_ancestor_all_trackers.back(), dst->_ancestor_all_trackers.back())
-            << "Must have same ancestor";
-    int ancestor_idx = _ancestor_all_trackers.size() - 1;
-    int dst_ancestor_idx = dst->_ancestor_all_trackers.size() - 1;
-    while (ancestor_idx > 0 && dst_ancestor_idx > 0 &&
-           _ancestor_all_trackers[ancestor_idx - 1] ==
-                   dst->_ancestor_all_trackers[dst_ancestor_idx - 1]) {
-        --ancestor_idx;
-        --dst_ancestor_idx;
-    }
-    return _ancestor_all_trackers[ancestor_idx];
-}
-
-MemTrackerLimiter* MemTrackerLimiter::limit_exceeded_tracker() const {
-    for (const auto& tracker : _ancestor_limiter_trackers) {
-        if (tracker->limit_exceeded()) {
-            return tracker;
-        }
-    }
-    return nullptr;
-}
-
 int64_t MemTrackerLimiter::spare_capacity() const {
     int64_t result = std::numeric_limits::max();
-    for (const auto& tracker : _ancestor_limiter_trackers) {
+    for (const auto& tracker : _limited_ancestors) {
         int64_t mem_left = tracker->limit() - tracker->consumption();
         result = std::min(result, mem_left);
     }
@@ -180,9 +113,9 @@ int64_t MemTrackerLimiter::spare_capacity() const {
 }
 
 int64_t MemTrackerLimiter::get_lowest_limit() const {
-    if (_ancestor_limiter_trackers.empty()) return -1;
+    if (_limited_ancestors.empty()) return -1;
     int64_t min_limit = std::numeric_limits::max();
-    for (const auto& tracker : _ancestor_limiter_trackers) {
+    for (const auto& tracker : _limited_ancestors) {
         DCHECK(tracker->has_limit());
         min_limit = std::min(min_limit, tracker->limit());
     }
@@ -246,13 +179,12 @@ Status MemTrackerLimiter::try_gc_memory(int64_t bytes) {
 //                Total=6.04 MB Peak=6.45 MB
 //
 std::string MemTrackerLimiter::log_usage(int max_recursive_depth, int64_t* logged_consumption) {
-    // Make sure the consumption is up to date.
     int64_t curr_consumption = consumption();
     int64_t peak_consumption = _consumption->value();
     if (logged_consumption != nullptr) *logged_consumption = curr_consumption;
 
     std::string detail =
-            "MemTracker log_usage Label: {}, Limit: {}, Total: {}, Peak: {}, Exceeded: {}";
+            "MemTrackerLimiter log_usage Label={}, Limit={}, Total={}, Peak={}, Exceeded={}";
     detail = fmt::format(detail, _label, PrettyPrinter::print(_limit, TUnit::BYTES),
                          PrettyPrinter::print(curr_consumption, TUnit::BYTES),
                          PrettyPrinter::print(peak_consumption, TUnit::BYTES),
@@ -264,16 +196,16 @@ std::string MemTrackerLimiter::log_usage(int max_recursive_depth, int64_t* logge
     // Recurse and get information about the children
     int64_t child_consumption;
     std::string child_trackers_usage;
-    std::list limiter_children;
-    std::list observe_children;
     {
-        std::lock_guard l(_child_trackers_lock);
-        limiter_children = _child_limiter_trackers;
-        observe_children = _child_observe_trackers;
+        std::lock_guard l(_child_tracker_limiter_lock);
+        child_trackers_usage =
+                log_usage(max_recursive_depth - 1, _child_tracker_limiters, &child_consumption);
     }
-    child_trackers_usage = log_usage(max_recursive_depth - 1, limiter_children, &child_consumption);
-    for (const auto& child : observe_children) {
-        child_trackers_usage += "\n" + child->log_usage(&child_consumption);
+    {
+        std::lock_guard l(_child_tracker_lock);
+        for (const auto& child : _child_trackers) {
+            child_trackers_usage += "\n" + child->log_usage();
+        }
     }
     if (!child_trackers_usage.empty()) detail += "\n" + child_trackers_usage;
     return detail;
@@ -285,13 +217,10 @@ std::string MemTrackerLimiter::log_usage(int max_recursive_depth,
     *logged_consumption = 0;
     std::vector usage_strings;
     for (const auto& tracker : trackers) {
-        if (tracker) {
-            int64_t tracker_consumption;
-            std::string usage_string =
-                    tracker->log_usage(max_recursive_depth, &tracker_consumption);
-            if (!usage_string.empty()) usage_strings.push_back(usage_string);
-            *logged_consumption += tracker_consumption;
-        }
+        int64_t tracker_consumption;
+        std::string usage_string = tracker->log_usage(max_recursive_depth, &tracker_consumption);
+        if (!usage_string.empty()) usage_strings.push_back(usage_string);
+        *logged_consumption += tracker_consumption;
     }
     return join(usage_strings, "\n");
 }
@@ -299,7 +228,7 @@ std::string MemTrackerLimiter::log_usage(int max_recursive_depth,
 Status MemTrackerLimiter::mem_limit_exceeded(RuntimeState* state, const std::string& details,
                                              int64_t failed_allocation_size, Status failed_alloc) {
     STOP_CHECK_THREAD_MEM_TRACKER_LIMIT();
-    MemTrackerLimiter* process_tracker = MemTrackerLimiter::get_process_tracker();
+    MemTrackerLimiter* process_tracker = ExecEnv::GetInstance()->process_mem_tracker();
     std::string detail =
             "Memory exceed limit. fragment={}, details={}, on backend={}. Memory left in process "
             "limit={}.";
diff --git a/be/src/runtime/memory/mem_tracker_limiter.h b/be/src/runtime/memory/mem_tracker_limiter.h
index d5d937523f..7dcfd80dc7 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.h
+++ b/be/src/runtime/memory/mem_tracker_limiter.h
@@ -17,110 +17,61 @@
 
 #pragma once
 
+#include 
+
 #include "common/config.h"
-#include "runtime/memory/mem_tracker_base.h"
-#include "runtime/runtime_state.h"
+#include "runtime/exec_env.h"
+#include "runtime/memory/mem_tracker.h"
 #include "util/mem_info.h"
+#include "util/perf_counters.h"
 
 namespace doris {
 
-class MemTrackerObserve;
+class RuntimeState;
 
-// Tracker contains an limit, and can be arranged into a tree structure such that the consumption
-// tracked by a MemTracker is also tracked by its ancestors.
-// Used for:
-// 1. Track and limit the memory usage of process and query.
-//    Automatic memory consume based on system memory allocation (Currently, based on TCMlloc hook).
-// 2. Execution logic that requires memory size to participate in control.
-//    Manual consumption, but will not affect the overall statistics of the process.
+// Track and limit the memory usage of process and query.
+// Contains an limit, arranged into a tree structure, the consumption also tracked by its ancestors.
 //
-// We use a five-level hierarchy of mem trackers: process, query pool, query, instance,
-// node. Specific parts of the fragment (exec nodes, sinks, etc) will add a
-// fifth level when they are initialized.
+// Automatically track every once malloc/free of the system memory allocator (Currently, based on TCMlloc hook).
+// Put Query MemTrackerLimiter into SCOPED_ATTACH_TASK when the thread starts,all memory used by this thread
+// will be recorded on this Query, otherwise it will be recorded in Process Tracker by default.
 //
-// GcFunctions can be attached to a MemTracker in order to free up memory if the limit is
-// reached. If limit_exceeded() is called and the limit is exceeded, it will first call
-// the GcFunctions to try to free memory and recheck the limit. For example, the process
-// tracker has a GcFunction that releases any unused memory still held by tcmalloc, so
-// this will be called before the process limit is reported as exceeded. GcFunctions are
-// called in the order they are added, so expensive functions should be added last.
-// GcFunctions are called with a global lock held, so should be non-blocking and not
-// call back into MemTrackers, except to release memory.
-class MemTrackerLimiter final : public MemTrackerBase {
+// We use a five-level hierarchy of mem trackers: process, query pool, query, instance, node.
+// The first four layers are MemTrackerLimiter with limit, and the fifth layer is MemTracker without limit.
+// Specific parts of the fragment (exec nodes, sinks, etc) will add a fifth level when they are initialized.
+class MemTrackerLimiter final : public MemTracker {
 public:
-    // Creates and adds the tracker to the tree
-    static MemTrackerLimiter* create_tracker(int64_t byte_limit, const std::string& label,
-                                             MemTrackerLimiter* parent = nullptr,
-                                             RuntimeProfile* profile = nullptr);
-
-    // Walks the MemTrackerLimiter hierarchy and populates _ancestor_all_trackers and limit_trackers_
-    void init(int64_t limit);
+    // Creates and adds the tracker limiter to the tree
+    MemTrackerLimiter(int64_t byte_limit = -1, const std::string& label = std::string(),
+                      MemTrackerLimiter* parent = nullptr, RuntimeProfile* profile = nullptr);
 
     ~MemTrackerLimiter();
 
-    // Adds tracker to _child_trackers
-    void add_child_tracker(MemTrackerLimiter* tracker);
-    void add_child_tracker(MemTrackerObserve* tracker);
-    // Remove tracker from _child_trackers
-    void remove_child_tracker(MemTrackerLimiter* tracker);
-    void remove_child_tracker(MemTrackerObserve* tracker);
+    void add_child(MemTrackerLimiter* tracker);
+    void add_child(MemTracker* tracker);
+    void remove_child(MemTrackerLimiter* tracker);
+    void remove_child(MemTracker* tracker);
 
     // Leaf tracker, without any child
-    bool is_leaf() { _child_limiter_trackers.size() + _child_observe_trackers.size() == 0; }
-
-    // Gets a "process" tracker, creating it if necessary.
-    static MemTrackerLimiter* get_process_tracker();
-
-    // Returns a list of all the valid trackers.
-    static void list_process_trackers(std::vector* trackers);
-
-public:
-    // The following func, for execution logic that requires memory size to participate in control.
-    // this does not change the value of process tracker.
-
-    // only consume self, will not sync to parent. Usually used to manually record the specified memory,
-    // It is independent of the automatically recording of thread local tracker, so the same block of memory
-    // will be recorded in the thread local tracker and the current tracker at the same time.
-    void consume_self(int64_t bytes);
-    void release_self(int64_t bytes) { consume_self(-bytes); }
-
-    // up to (but not including) end_tracker.
-    // This is useful if we want to move tracking between trackers that share a common (i.e. end_tracker)
-    // ancestor. This happens when we want to update tracking on a particular mem tracker but the consumption
-    // against the limit recorded in one of its ancestors already happened.
-    void consume_local(int64_t bytes, MemTrackerLimiter* end_tracker);
-    void release_local(int64_t bytes, MemTrackerLimiter* end_tracker) {
-        consume_local(-bytes, end_tracker);
+    size_t remain_child_count() const {
+        return _child_tracker_limiters.size() + _child_trackers.size();
     }
+    size_t had_child_count() const { return _had_child_count; }
 
-    // Transfer 'bytes' of consumption from this tracker to 'dst'.
-    // Forced transfer, 'dst' may limit exceed, and more ancestor trackers will be updated.
-    void transfer_to(MemTrackerLimiter* dst, int64_t bytes);
-
-    // When the accumulated untracked memory value exceeds the upper limit,
-    // the current value is returned and set to 0.
-    // Thread safety.
-    int64_t add_untracked_mem(int64_t bytes);
-
-    // In most cases, no need to call flush_untracked_mem on the child tracker,
-    // because when it is destructed, theoretically all its children have been destructed.
-    void flush_untracked_mem() { consume(_untracked_mem.exchange(0)); }
-
-    // Find the common ancestor and update trackers between 'this'/'dst' and
-    // the common ancestor. This logic handles all cases, including the
-    // two trackers being the same or being ancestors of each other because
-    // 'all_trackers_' includes the current tracker.
-    MemTrackerLimiter* common_ancestor(MemTrackerLimiter* dst);
+    // Returns a list of all the valid tracker snapshots.
+    void make_snapshot(std::vector* snapshots, size_t cur_level,
+                       size_t upper_level) const;
 
 public:
-    // The following func, for mem limit.
-
     Status check_sys_mem_info(int64_t bytes) {
-        // TODO add mmap
-        if (MemInfo::initialized() && MemInfo::current_mem() + bytes >= MemInfo::mem_limit()) {
-            return Status::MemoryLimitExceeded(fmt::format(
+        // Limit process memory usage using the actual physical memory of the process in `/proc/self/status`.
+        // This is independent of the consumption value of the mem tracker, which counts the virtual memory
+        // of the process malloc.
+        // for fast, expect MemInfo::initialized() to be true.
+        if (PerfCounters::get_vm_rss() + bytes >= MemInfo::mem_limit()) {
+            return Status::MemoryLimitExceeded(
                     "{}: TryConsume failed, bytes={} process whole consumption={}  mem limit={}",
-                    _label, bytes, MemInfo::current_mem(), MemInfo::mem_limit()));
+                    _label, bytes, MemInfo::current_mem(), MemInfo::mem_limit());
         }
         return Status::OK();
     }
@@ -132,26 +83,32 @@ public:
         _limit = limit;
     }
     bool limit_exceeded() const { return _limit >= 0 && _limit < consumption(); }
-    bool any_limit_exceeded() const { return limit_exceeded_tracker() != nullptr; }
 
-    // Returns true if a valid limit of this tracker or one of its ancestors is exceeded.
-    MemTrackerLimiter* limit_exceeded_tracker() const;
+    // Returns true if a valid limit of this tracker limiter or one of its ancestors is exceeded.
+    bool any_limit_exceeded() const {
+        for (const auto& tracker : _limited_ancestors) {
+            if (tracker->limit_exceeded()) {
+                return true;
+            }
+        }
+        return false;
+    }
 
     Status check_limit(int64_t bytes);
 
     // Returns the maximum consumption that can be made without exceeding the limit on
-    // this tracker or any of its parents. Returns int64_t::max() if there are no
+    // this tracker limiter or any of its parents. Returns int64_t::max() if there are no
     // limits and a negative value if any limit is already exceeded.
     int64_t spare_capacity() const;
 
-    // Returns the lowest limit for this tracker and its ancestors. Returns -1 if there is no limit.
+    // Returns the lowest limit for this tracker limiter and its ancestors. Returns -1 if there is no limit.
     int64_t get_lowest_limit() const;
 
     typedef std::function GcFunction;
-    /// Add a function 'f' to be called if the limit is reached, if none of the other
-    /// previously-added GC functions were successful at freeing up enough memory.
-    /// 'f' does not need to be thread-safe as long as it is added to only one MemTrackerLimiter.
-    /// Note that 'f' must be valid for the lifetime of this MemTrackerLimiter.
+    // Add a function 'f' to be called if the limit is reached, if none of the other
+    // previously-added GC functions were successful at freeing up enough memory.
+    // 'f' does not need to be thread-safe as long as it is added to only one tracker limiter.
+    // Note that 'f' must be valid for the lifetime of this tracker limiter.
     void add_gc_function(GcFunction f) { _gc_functions.push_back(f); }
 
     // If consumption is higher than max_consumption, attempts to free memory by calling
@@ -161,12 +118,15 @@ public:
     bool gc_memory(int64_t max_consumption);
     Status try_gc_memory(int64_t bytes);
 
-    /// Logs the usage of this tracker and optionally its children (recursively).
-    /// If 'logged_consumption' is non-nullptr, sets the consumption value logged.
-    /// 'max_recursive_depth' specifies the maximum number of levels of children
-    /// to include in the dump. If it is zero, then no children are dumped.
-    /// Limiting the recursive depth reduces the cost of dumping, particularly
-    /// for the process MemTracker.
+public:
+    void consumption_revise(int64_t bytes) { _consumption->add(bytes); }
+
+    // Logs the usage of this tracker limiter and optionally its children (recursively).
+    // If 'logged_consumption' is non-nullptr, sets the consumption value logged.
+    // 'max_recursive_depth' specifies the maximum number of levels of children
+    // to include in the dump. If it is zero, then no children are dumped.
+    // Limiting the recursive depth reduces the cost of dumping, particularly
+    // for the process tracker limiter.
     std::string log_usage(int max_recursive_depth = INT_MAX, int64_t* logged_consumption = nullptr);
 
     // Log the memory usage when memory limit is exceeded and return a status object with
@@ -182,9 +142,8 @@ public:
         msg << "limit: " << _limit << "; "
             << "consumption: " << _consumption->current_value() << "; "
             << "label: " << _label << "; "
-            << "all tracker size: " << _ancestor_all_trackers.size() << "; "
-            << "limit trackers size: " << _ancestor_limiter_trackers.size() << "; "
-            << "parent is null: " << ((_parent == nullptr) ? "true" : "false") << "; ";
+            << "all ancestor size: " << _all_ancestors.size() - 1 << "; "
+            << "limited ancestor size: " << _limited_ancestors.size() - 1 << "; ";
         return msg.str();
     }
 
@@ -192,12 +151,6 @@ private:
     // The following func, for automatic memory tracking and limiting based on system memory allocation.
     friend class ThreadMemTrackerMgr;
 
-    MemTrackerLimiter(const std::string& label, MemTrackerLimiter* parent, RuntimeProfile* profile)
-            : MemTrackerBase(label, parent, profile) {}
-
-    // Creates the process tracker.
-    static void create_process_tracker();
-
     // Increases consumption of this tracker and its ancestors by 'bytes'.
     void consume(int64_t bytes);
 
@@ -206,13 +159,13 @@ private:
 
     // Increases consumption of this tracker and its ancestors by 'bytes' only if
     // they can all consume 'bytes' without exceeding limit. If limit would be exceed,
-    // no MemTrackers are updated. Returns true if the consumption was successfully updated.
+    // no MemTrackerLimiters are updated. Returns true if the consumption was successfully updated.
     WARN_UNUSED_RESULT
     Status try_consume(int64_t bytes);
 
-    /// Log consumption of all the trackers provided. Returns the sum of consumption in
-    /// 'logged_consumption'. 'max_recursive_depth' specifies the maximum number of levels
-    /// of children to include in the dump. If it is zero, then no children are dumped.
+    // Log consumption of all the trackers provided. Returns the sum of consumption in
+    // 'logged_consumption'. 'max_recursive_depth' specifies the maximum number of levels
+    // of children to include in the dump. If it is zero, then no children are dumped.
     static std::string log_usage(int max_recursive_depth,
                                  const std::list& trackers,
                                  int64_t* logged_consumption);
@@ -221,27 +174,37 @@ private:
     // Limit on memory consumption, in bytes. If limit_ == -1, there is no consumption limit. Used in log_usage。
     int64_t _limit;
 
-    // Consume size smaller than mem_tracker_consume_min_size_bytes will continue to accumulate
-    // to avoid frequent calls to consume/release of MemTracker.
-    std::atomic _untracked_mem = 0;
+    // this tracker limiter plus all of its ancestors
+    std::vector _all_ancestors;
+    // _all_ancestors with valid limits
+    std::vector _limited_ancestors;
 
-    // All the child trackers of this tracker. Used for error reporting and
-    // listing only (i.e. updating the consumption of a parent tracker does not
+    // Child trackers of this tracker limiter. Used for error reporting and
+    // listing only (i.e. updating the consumption of a parent tracker limiter does not
     // update that of its children).
-    SpinLock _child_trackers_lock;
-    std::list _child_limiter_trackers;
-    std::list _child_observe_trackers;
-    // Iterator into parent_->_child_limiter_trackers for this object. Stored to have O(1) remove.
-    std::list::iterator _child_tracker_it;
+    mutable std::mutex _child_tracker_limiter_lock;
+    std::list _child_tracker_limiters;
 
-    // this tracker plus all of its ancestors
-    std::vector _ancestor_all_trackers;
-    // _ancestor_all_trackers with valid limits
-    std::vector _ancestor_limiter_trackers;
+    mutable std::mutex _child_tracker_lock;
+    std::list _child_trackers;
+
+    // The number of child trackers that have been added.
+    std::atomic_size_t _had_child_count = 0;
+
+    // Iterator into parent_->_child_tracker_limiters for this object. Stored to have O(1) remove.
+    std::list::iterator _child_tracker_it;
 
     // Lock to protect gc_memory(). This prevents many GCs from occurring at once.
     std::mutex _gc_lock;
     // Functions to call after the limit is reached to free memory.
+    // GcFunctions can be attached to a MemTracker in order to free up memory if the limit is
+    // reached. If limit_exceeded() is called and the limit is exceeded, it will first call
+    // the GcFunctions to try to free memory and recheck the limit. For example, the process
+    // tracker has a GcFunction that releases any unused memory still held by tcmalloc, so
+    // this will be called before the process limit is reported as exceeded. GcFunctions are
+    // called in the order they are added, so expensive functions should be added last.
+    // GcFunctions are called with a global lock held, so should be non-blocking and not
+    // call back into MemTrackers, except to release memory.
     std::vector _gc_functions;
 };
 
@@ -249,7 +212,7 @@ inline void MemTrackerLimiter::consume(int64_t bytes) {
     if (bytes == 0) {
         return;
     } else {
-        for (auto& tracker : _ancestor_all_trackers) {
+        for (auto& tracker : _all_ancestors) {
             tracker->_consumption->add(bytes);
         }
     }
@@ -263,8 +226,8 @@ inline Status MemTrackerLimiter::try_consume(int64_t bytes) {
     RETURN_IF_ERROR(check_sys_mem_info(bytes));
     int i;
     // Walk the tracker tree top-down.
-    for (i = _ancestor_all_trackers.size() - 1; i >= 0; --i) {
-        MemTrackerLimiter* tracker = _ancestor_all_trackers[i];
+    for (i = _all_ancestors.size() - 1; i >= 0; --i) {
+        MemTrackerLimiter* tracker = _all_ancestors[i];
         if (tracker->limit() < 0) {
             tracker->_consumption->add(bytes); // No limit at this tracker.
         } else {
@@ -276,8 +239,8 @@ inline Status MemTrackerLimiter::try_consume(int64_t bytes) {
                 Status st = tracker->try_gc_memory(bytes);
                 if (!st) {
                     // Failed for this mem tracker. Roll back the ones that succeeded.
-                    for (int j = _ancestor_all_trackers.size() - 1; j > i; --j) {
-                        _ancestor_all_trackers[j]->_consumption->add(-bytes);
+                    for (int j = _all_ancestors.size() - 1; j > i; --j) {
+                        _all_ancestors[j]->_consumption->add(-bytes);
                     }
                     return st;
                 }
@@ -289,44 +252,13 @@ inline Status MemTrackerLimiter::try_consume(int64_t bytes) {
     return Status::OK();
 }
 
-inline void MemTrackerLimiter::consume_self(int64_t bytes) {
-    int64_t consume_bytes = add_untracked_mem(bytes);
-    if (consume_bytes != 0) {
-        _consumption->add(consume_bytes);
-    }
-}
-
-inline void MemTrackerLimiter::consume_local(int64_t bytes, MemTrackerLimiter* end_tracker) {
-    DCHECK(end_tracker);
-    if (bytes == 0) return;
-    for (auto& tracker : _ancestor_all_trackers) {
-        if (tracker == end_tracker) return;
-        tracker->consume_self(bytes);
-    }
-}
-
-inline void MemTrackerLimiter::transfer_to(MemTrackerLimiter* dst, int64_t bytes) {
-    DCHECK(dst->is_limited());
-    if (id() == dst->id()) return;
-    release_local(bytes, MemTrackerLimiter::get_process_tracker());
-    dst->consume_local(bytes, MemTrackerLimiter::get_process_tracker());
-}
-
-inline int64_t MemTrackerLimiter::add_untracked_mem(int64_t bytes) {
-    _untracked_mem += bytes;
-    if (std::abs(_untracked_mem) >= config::mem_tracker_consume_min_size_bytes) {
-        return _untracked_mem.exchange(0);
-    }
-    return 0;
-}
-
 inline Status MemTrackerLimiter::check_limit(int64_t bytes) {
     if (bytes <= 0) return Status::OK();
     RETURN_IF_ERROR(check_sys_mem_info(bytes));
     int i;
     // Walk the tracker tree top-down.
-    for (i = _ancestor_all_trackers.size() - 1; i >= 0; --i) {
-        MemTrackerLimiter* tracker = _ancestor_all_trackers[i];
+    for (i = _all_ancestors.size() - 1; i >= 0; --i) {
+        MemTrackerLimiter* tracker = _all_ancestors[i];
         if (tracker->limit() > 0) {
             while (true) {
                 if (LIKELY(tracker->_consumption->current_value() + bytes < tracker->limit()))
@@ -341,8 +273,5 @@ inline Status MemTrackerLimiter::check_limit(int64_t bytes) {
 #define RETURN_LIMIT_EXCEEDED(tracker, ...) return tracker->mem_limit_exceeded(__VA_ARGS__);
 #define RETURN_IF_LIMIT_EXCEEDED(tracker, state, msg) \
     if (tracker->any_limit_exceeded()) RETURN_LIMIT_EXCEEDED(tracker, state, msg);
-#define RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, msg)        \
-    if (state->instance_mem_tracker()->any_limit_exceeded()) \
-        RETURN_LIMIT_EXCEEDED(state->instance_mem_tracker(), state, msg);
 
 } // namespace doris
diff --git a/be/src/runtime/memory/mem_tracker_observe.cpp b/be/src/runtime/memory/mem_tracker_observe.cpp
deleted file mode 100644
index f696df2f94..0000000000
--- a/be/src/runtime/memory/mem_tracker_observe.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "runtime/memory/mem_tracker_observe.h"
-
-#include 
-#include 
-
-#include "runtime/memory/mem_tracker_limiter.h"
-#include "runtime/thread_context.h"
-#include "util/pretty_printer.h"
-
-namespace doris {
-
-using TemporaryTrackersMap = phmap::parallel_flat_hash_map<
-        std::string, MemTrackerObserve*, phmap::priv::hash_default_hash,
-        phmap::priv::hash_default_eq,
-        std::allocator>, 12, std::mutex>;
-
-static TemporaryTrackersMap _temporary_mem_trackers;
-
-MemTrackerObserve* MemTrackerObserve::create_tracker(const std::string& label,
-                                                     RuntimeProfile* profile) {
-    STOP_CHECK_THREAD_MEM_TRACKER_LIMIT();
-    MemTrackerLimiter* parent = tls_ctx()->_thread_mem_tracker_mgr->limiter_mem_tracker();
-    DCHECK(parent);
-    std::string parent_label = parent->label();
-    std::string reset_label;
-    if (parent_label.find_first_of("#") != parent_label.npos) {
-        reset_label = fmt::format("[Observe]-{}#{}", label,
-                                  parent_label.substr(parent_label.find_first_of("#"), -1));
-    } else {
-        reset_label = fmt::format("[Observe]-{}", label);
-    }
-    MemTrackerObserve* tracker(new MemTrackerObserve(reset_label, parent, profile));
-    parent->add_child_tracker(tracker);
-    tracker->set_is_observed();
-    return tracker;
-}
-
-MemTrackerObserve::~MemTrackerObserve() {
-    if (parent()) {
-        parent()->remove_child_tracker(this);
-    }
-}
-
-// Count the memory in the scope to a temporary tracker with the specified label name.
-// This is very useful when debugging. You can find the position where the tracker statistics are
-// inaccurate through the temporary tracker layer by layer. As well as finding memory hotspots.
-// TODO(zxy) track specifies the memory for each line in the code segment, instead of manually adding
-// a switch temporary tracker to each line. Maybe there are open source tools to do this?
-MemTrackerObserve* MemTrackerObserve::get_temporary_mem_tracker(const std::string& label) {
-    // First time this label registered, make a new object, otherwise do nothing.
-    // Avoid using locks to resolve erase conflicts.
-    _temporary_mem_trackers.try_emplace_l(
-            label, [](MemTrackerObserve*) {},
-            MemTrackerObserve::create_tracker(fmt::format("[Temporary]-{}", label)));
-    return _temporary_mem_trackers[label];
-}
-
-std::string MemTrackerObserve::log_usage(int64_t* logged_consumption) {
-    // Make sure the consumption is up to date.
-    int64_t curr_consumption = consumption();
-    int64_t peak_consumption = _consumption->value();
-    if (logged_consumption != nullptr) *logged_consumption = curr_consumption;
-    if (curr_consumption == 0) return "";
-    std::string detail = "MemTracker log_usage Label: {}, Total: {}, Peak: {}";
-    detail = fmt::format(detail, _label, PrettyPrinter::print(curr_consumption, TUnit::BYTES),
-                         PrettyPrinter::print(peak_consumption, TUnit::BYTES));
-    return detail;
-}
-
-} // namespace doris
diff --git a/be/src/runtime/memory/mem_tracker_observe.h b/be/src/runtime/memory/mem_tracker_observe.h
deleted file mode 100644
index 3213319207..0000000000
--- a/be/src/runtime/memory/mem_tracker_observe.h
+++ /dev/null
@@ -1,91 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "runtime/memory/mem_tracker_base.h"
-
-namespace doris {
-
-class MemTrackerLimiter;
-
-// Used to manually track memory usage at specified locations, including all exec node trackers.
-//
-// There is no parent-child relationship between MemTrackerObserves. Both fathers are fragment instance trakcers,
-// but their consumption will not consume fragment instance trakcers synchronously. Therefore, errors in statistics
-// will not affect the memory tracking and restrictions of processes and Query.
-class MemTrackerObserve final : public MemTrackerBase {
-public:
-    // Creates and adds the tracker to the tree
-    static MemTrackerObserve* create_tracker(const std::string& label,
-                                             RuntimeProfile* profile = nullptr);
-
-    ~MemTrackerObserve();
-
-    // Get a temporary tracker with a specified label, and the tracker will be created when the label is first get.
-    // Temporary trackers are not automatically destructed, which is usually used for debugging.
-    static MemTrackerObserve* get_temporary_mem_tracker(const std::string& label);
-
-public:
-    void consume(int64_t bytes);
-
-    void release(int64_t bytes) { consume(-bytes); }
-
-    static void batch_consume(int64_t bytes, const std::vector& trackers) {
-        for (auto& tracker : trackers) {
-            tracker->consume(bytes);
-        }
-    }
-
-    // Transfer 'bytes' of consumption from this tracker to 'dst'.
-    void transfer_to(MemTrackerObserve* dst, int64_t bytes);
-
-    bool limit_exceeded(int64_t limit) const { return limit >= 0 && limit < consumption(); }
-
-    std::string log_usage(int64_t* logged_consumption = nullptr);
-
-    std::string debug_string() {
-        std::stringstream msg;
-        msg << "label: " << _label << "; "
-            << "consumption: " << _consumption->current_value() << "; "
-            << "parent is null: " << ((_parent == nullptr) ? "true" : "false") << "; ";
-        return msg.str();
-    }
-
-    // Iterator into parent_->_child_observe_trackers for this object. Stored to have O(1) remove.
-    std::list::iterator _child_tracker_it;
-
-private:
-    MemTrackerObserve(const std::string& label, MemTrackerLimiter* parent, RuntimeProfile* profile)
-            : MemTrackerBase(label, parent, profile) {}
-};
-
-inline void MemTrackerObserve::consume(int64_t bytes) {
-    if (bytes == 0) {
-        return;
-    } else {
-        _consumption->add(bytes);
-    }
-}
-
-inline void MemTrackerObserve::transfer_to(MemTrackerObserve* dst, int64_t bytes) {
-    if (id() == dst->id()) return;
-    release(bytes);
-    dst->consume(bytes);
-}
-
-} // namespace doris
diff --git a/be/src/runtime/memory/mem_tracker_task_pool.cpp b/be/src/runtime/memory/mem_tracker_task_pool.cpp
index d643acdc4b..8947e019bd 100644
--- a/be/src/runtime/memory/mem_tracker_task_pool.cpp
+++ b/be/src/runtime/memory/mem_tracker_task_pool.cpp
@@ -23,24 +23,33 @@
 
 namespace doris {
 
+// When MemTracker is a negative value, it is considered that a memory leak has occurred,
+// but the actual MemTracker records inaccurately will also cause a negative value,
+// so this feature is in the experimental stage.
+const bool QUERY_MEMORY_LEAK_DETECTION = false;
+
 MemTrackerLimiter* MemTrackerTaskPool::register_task_mem_tracker_impl(const std::string& task_id,
                                                                       int64_t mem_limit,
                                                                       const std::string& label,
                                                                       MemTrackerLimiter* parent) {
     DCHECK(!task_id.empty());
     // First time this task_id registered, make a new object, otherwise do nothing.
-    // Combine create_tracker and emplace into one operation to avoid the use of locks
+    // Combine new tracker and emplace into one operation to avoid the use of locks
     // Name for task MemTrackers. '$0' is replaced with the task id.
-    _task_mem_trackers.try_emplace_l(
-            task_id, [](MemTrackerLimiter*) {},
-            MemTrackerLimiter::create_tracker(mem_limit, label, parent));
-    return get_task_mem_tracker(task_id);
+    bool new_emplace = _task_mem_trackers.lazy_emplace_l(
+            task_id, [&](MemTrackerLimiter*) {},
+            [&](const auto& ctor) {
+                ctor(task_id, new MemTrackerLimiter(mem_limit, label, parent));
+            });
+    if (new_emplace) {
+        LOG(INFO) << "Register task memory tracker, task id: " << task_id
+                  << " limit: " << PrettyPrinter::print(mem_limit, TUnit::BYTES);
+    }
+    return _task_mem_trackers[task_id];
 }
 
 MemTrackerLimiter* MemTrackerTaskPool::register_query_mem_tracker(const std::string& query_id,
                                                                   int64_t mem_limit) {
-    VLOG_FILE << "Register Query memory tracker, query id: " << query_id
-              << " limit: " << PrettyPrinter::print(mem_limit, TUnit::BYTES);
     return register_task_mem_tracker_impl(query_id, mem_limit,
                                           fmt::format("Query#queryId={}", query_id),
                                           ExecEnv::GetInstance()->query_pool_mem_tracker());
@@ -49,10 +58,8 @@ MemTrackerLimiter* MemTrackerTaskPool::register_query_mem_tracker(const std::str
 MemTrackerLimiter* MemTrackerTaskPool::register_load_mem_tracker(const std::string& load_id,
                                                                  int64_t mem_limit) {
     // In load, the query id of the fragment is executed, which is the same as the load id of the load channel.
-    VLOG_FILE << "Register Load memory tracker, load id: " << load_id
-              << " limit: " << PrettyPrinter::print(mem_limit, TUnit::BYTES);
     return register_task_mem_tracker_impl(load_id, mem_limit,
-                                          fmt::format("Load#loadId={}", load_id),
+                                          fmt::format("Load#queryId={}", load_id),
                                           ExecEnv::GetInstance()->load_pool_mem_tracker());
 }
 
@@ -70,10 +77,10 @@ void MemTrackerTaskPool::logout_task_mem_tracker() {
         if (!it->second) {
             // https://github.com/apache/incubator-doris/issues/10006
             expired_tasks.emplace_back(it->first);
-        } else if (it->second->is_leaf() == true && it->second->peak_consumption() > 0) {
+        } else if (it->second->remain_child_count() == 0 && it->second->had_child_count() != 0) {
             // No RuntimeState uses this task MemTracker, it is only referenced by this map,
             // and tracker was not created soon, delete it.
-            if (config::memory_leak_detection && it->second->consumption() != 0) {
+            if (QUERY_MEMORY_LEAK_DETECTION && it->second->consumption() != 0) {
                 // If consumption is not equal to 0 before query mem tracker is destructed,
                 // there are two possibilities in theory.
                 // 1. A memory leak occurs.
@@ -87,8 +94,7 @@ void MemTrackerTaskPool::logout_task_mem_tracker() {
             // In order to ensure that the query pool mem tracker is the sum of all currently running query mem trackers,
             // the effect of the ended query mem tracker on the query pool mem tracker should be cleared, that is,
             // the negative number of the current value of consume.
-            it->second->parent()->consume_local(-it->second->consumption(),
-                                                MemTrackerLimiter::get_process_tracker());
+            it->second->parent()->consumption_revise(-it->second->consumption());
             expired_tasks.emplace_back(it->first);
         } else {
             // Log limit exceeded query tracker.
@@ -103,11 +109,11 @@ void MemTrackerTaskPool::logout_task_mem_tracker() {
     for (auto tid : expired_tasks) {
         if (!_task_mem_trackers[tid]) {
             _task_mem_trackers.erase(tid);
-            VLOG_FILE << "Deregister null task mem tracker, task id: " << tid;
+            LOG(INFO) << "Deregister null task memory tracker, task id: " << tid;
         } else {
             delete _task_mem_trackers[tid];
             _task_mem_trackers.erase(tid);
-            VLOG_FILE << "Deregister not used task mem tracker, task id: " << tid;
+            LOG(INFO) << "Deregister not used task memory tracker, task id: " << tid;
         }
     }
 }
diff --git a/be/src/runtime/memory/system_allocator.cpp b/be/src/runtime/memory/system_allocator.cpp
index 6ed5906f00..c05c28861b 100644
--- a/be/src/runtime/memory/system_allocator.cpp
+++ b/be/src/runtime/memory/system_allocator.cpp
@@ -45,7 +45,7 @@ void SystemAllocator::free(uint8_t* ptr, size_t length) {
             LOG(ERROR) << "fail to free memory via munmap, errno=" << errno
                        << ", errmsg=" << strerror_r(errno, buf, 64);
         } else {
-            RELEASE_THREAD_LOCAL_MEM_TRACKER(length);
+            RELEASE_THREAD_MEM_TRACKER(length);
         }
     } else {
         ::free(ptr);
@@ -66,14 +66,14 @@ uint8_t* SystemAllocator::allocate_via_malloc(size_t length) {
 }
 
 uint8_t* SystemAllocator::allocate_via_mmap(size_t length) {
-    CONSUME_THREAD_LOCAL_MEM_TRACKER(length);
+    CONSUME_THREAD_MEM_TRACKER(length);
     auto ptr = (uint8_t*)mmap(nullptr, length, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE,
                               -1, 0);
     if (ptr == MAP_FAILED) {
         char buf[64];
         LOG(ERROR) << "fail to allocate memory via mmap, errno=" << errno
                    << ", errmsg=" << strerror_r(errno, buf, 64);
-        RELEASE_THREAD_LOCAL_MEM_TRACKER(length);
+        RELEASE_THREAD_MEM_TRACKER(length);
         return nullptr;
     }
     return ptr;
diff --git a/be/src/runtime/tcmalloc_hook.h b/be/src/runtime/memory/tcmalloc_hook.h
similarity index 68%
rename from be/src/runtime/tcmalloc_hook.h
rename to be/src/runtime/memory/tcmalloc_hook.h
index 8b3a0290ed..627f42795d 100644
--- a/be/src/runtime/tcmalloc_hook.h
+++ b/be/src/runtime/memory/tcmalloc_hook.h
@@ -21,7 +21,6 @@
 #include 
 #include 
 
-#include "runtime/mem_tracker.h"
 #include "runtime/thread_context.h"
 
 // Notice: modify the command in New/Delete Hook should be careful enough!,
@@ -37,32 +36,27 @@
 //  destructor to control the behavior of consume can lead to unexpected behavior,
 //  like this: if (LIKELY(doris::start_thread_mem_tracker)) {
 void new_hook(const void* ptr, size_t size) {
-    if (doris::btls_key != doris::EMPTY_BTLS_KEY && doris::bthread_tls != nullptr) {
+    if (doris::btls_key != doris::EMPTY_BTLS_KEY && doris::bthread_context != nullptr) {
         // Currently in bthread, consume thread context mem tracker in bthread tls.
-        if (doris::btls_key != doris::bthread_tls_key) {
-            // pthread switch occurs, updating bthread_tls and bthread_tls_key cached in pthread tls.
-            doris::bthread_tls =
-                    static_cast(bthread_getspecific(doris::btls_key));
-            doris::bthread_tls_key = doris::btls_key;
-        }
-        doris::bthread_tls->_thread_mem_tracker_mgr->cache_consume(tc_nallocx(size, 0));
-    } else if (doris::thread_local_ctx._init) {
-        doris::thread_local_ctx._tls->_thread_mem_tracker_mgr->cache_consume(tc_nallocx(size, 0));
+        doris::update_bthread_context();
+        doris::bthread_context->_thread_mem_tracker_mgr->consume(tc_nallocx(size, 0));
+    } else if (doris::thread_context_ptr._init) {
+        doris::thread_context_ptr._ptr->_thread_mem_tracker_mgr->consume(tc_nallocx(size, 0));
+    } else {
+        doris::ThreadMemTrackerMgr::consume_no_attach(tc_nallocx(size, 0));
     }
 }
 
 void delete_hook(const void* ptr) {
-    if (doris::btls_key != doris::EMPTY_BTLS_KEY && doris::bthread_tls != nullptr) {
-        if (doris::btls_key != doris::bthread_tls_key) {
-            doris::bthread_tls =
-                    static_cast(bthread_getspecific(doris::btls_key));
-            doris::bthread_tls_key = doris::btls_key;
-        }
-        doris::bthread_tls->_thread_mem_tracker_mgr->cache_consume(
+    if (doris::btls_key != doris::EMPTY_BTLS_KEY && doris::bthread_context != nullptr) {
+        doris::update_bthread_context();
+        doris::bthread_context->_thread_mem_tracker_mgr->consume(
                 -tc_malloc_size(const_cast(ptr)));
-    } else if (doris::thread_local_ctx._init) {
-        doris::thread_local_ctx._tls->_thread_mem_tracker_mgr->cache_consume(
+    } else if (doris::thread_context_ptr._init) {
+        doris::thread_context_ptr._ptr->_thread_mem_tracker_mgr->consume(
                 -tc_malloc_size(const_cast(ptr)));
+    } else {
+        doris::ThreadMemTrackerMgr::consume_no_attach(-tc_malloc_size(const_cast(ptr)));
     }
 }
 
diff --git a/be/src/runtime/memory/thread_mem_tracker_mgr.cpp b/be/src/runtime/memory/thread_mem_tracker_mgr.cpp
new file mode 100644
index 0000000000..c129deb8ee
--- /dev/null
+++ b/be/src/runtime/memory/thread_mem_tracker_mgr.cpp
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "runtime/memory/thread_mem_tracker_mgr.h"
+
+#include "runtime/exec_env.h"
+#include "runtime/fragment_mgr.h"
+#include "runtime/memory/mem_tracker_task_pool.h"
+#include "service/backend_options.h"
+
+namespace doris {
+
+void ThreadMemTrackerMgr::attach_limiter_tracker(const std::string& cancel_msg,
+                                                 const std::string& task_id,
+                                                 const TUniqueId& fragment_instance_id,
+                                                 MemTrackerLimiter* mem_tracker) {
+    DCHECK(mem_tracker);
+    _task_id = task_id;
+    _fragment_instance_id = fragment_instance_id;
+    _exceed_cb.cancel_msg = cancel_msg;
+    _limiter_tracker = mem_tracker;
+}
+
+void ThreadMemTrackerMgr::detach_limiter_tracker() {
+    flush_untracked_mem();
+    _task_id = "";
+    _fragment_instance_id = TUniqueId();
+    _exceed_cb.cancel_msg = "";
+    _limiter_tracker = ExecEnv::GetInstance()->process_mem_tracker();
+}
+
+void ThreadMemTrackerMgr::exceeded_cancel_task(const std::string& cancel_details) {
+    if (_fragment_instance_id != TUniqueId()) {
+        ExecEnv::GetInstance()->fragment_mgr()->cancel(
+                _fragment_instance_id, PPlanFragmentCancelReason::MEMORY_LIMIT_EXCEED,
+                cancel_details);
+    }
+}
+
+void ThreadMemTrackerMgr::exceeded(int64_t mem_usage, Status try_consume_st) {
+    if (_exceed_cb.cb_func != nullptr) {
+        _exceed_cb.cb_func();
+    }
+    if (is_attach_task()) {
+        if (_exceed_cb.cancel_task) {
+            auto st = _limiter_tracker->mem_limit_exceeded(
+                    nullptr,
+                    fmt::format("Task mem limit exceeded and cancel it, msg:{}",
+                                _exceed_cb.cancel_msg),
+                    mem_usage, try_consume_st);
+            exceeded_cancel_task(st.to_string());
+            _exceed_cb.cancel_task = false; // Make sure it will only be canceled once
+        }
+    }
+}
+} // namespace doris
diff --git a/be/src/runtime/memory/thread_mem_tracker_mgr.h b/be/src/runtime/memory/thread_mem_tracker_mgr.h
new file mode 100644
index 0000000000..d32eaaf082
--- /dev/null
+++ b/be/src/runtime/memory/thread_mem_tracker_mgr.h
@@ -0,0 +1,215 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include 
+#include 
+
+#include "runtime/memory/mem_tracker.h"
+#include "runtime/memory/mem_tracker_limiter.h"
+
+namespace doris {
+
+using ExceedCallBack = void (*)();
+struct MemExceedCallBackInfo {
+    std::string cancel_msg;
+    bool cancel_task; // Whether to cancel the task when the current tracker exceeds the limit.
+    ExceedCallBack cb_func;
+
+    MemExceedCallBackInfo() { init(); }
+
+    MemExceedCallBackInfo(const std::string& cancel_msg, bool cancel_task, ExceedCallBack cb_func)
+            : cancel_msg(cancel_msg), cancel_task(cancel_task), cb_func(cb_func) {}
+
+    void init() {
+        cancel_msg = "";
+        cancel_task = true;
+        cb_func = nullptr;
+    }
+};
+
+// TCMalloc new/delete Hook is counted in the memory_tracker of the current thread.
+//
+// In the original design, the MemTracker consume method is called before the memory is allocated.
+// If the consume succeeds, the memory is actually allocated, otherwise an exception is thrown.
+// But the statistics of memory through TCMalloc new/delete Hook are after the memory is actually allocated,
+// which is different from the previous behavior. Therefore, when alloc for some large memory.
+class ThreadMemTrackerMgr {
+public:
+    ThreadMemTrackerMgr() {}
+
+    ~ThreadMemTrackerMgr() {
+        flush_untracked_mem();
+        _exceed_cb.init();
+        DCHECK(_consumer_tracker_stack.empty());
+    }
+
+    // only for tcmalloc hook
+    static void consume_no_attach(int64_t size) {
+        ExecEnv::GetInstance()->process_mem_tracker()->consume(size);
+    }
+
+    // After thread initialization, calling `init` again must call `clear_untracked_mems` first
+    // to avoid memory tracking loss.
+    void init();
+
+    // After attach, the current thread TCMalloc Hook starts to consume/release task mem_tracker
+    void attach_limiter_tracker(const std::string& cancel_msg, const std::string& task_id,
+                                const TUniqueId& fragment_instance_id,
+                                MemTrackerLimiter* mem_tracker);
+
+    void detach_limiter_tracker();
+
+    // Must be fast enough! Thread update_tracker may be called very frequently.
+    // So for performance, add tracker as early as possible, and then call update_tracker.
+    void push_consumer_tracker(MemTracker* mem_tracker);
+    void pop_consumer_tracker();
+
+    MemExceedCallBackInfo update_exceed_call_back(const std::string& cancel_msg, bool cancel_task,
+                                                  ExceedCallBack cb_func) {
+        _temp_exceed_cb = _exceed_cb;
+        _exceed_cb.cancel_msg = cancel_msg;
+        _exceed_cb.cancel_task = cancel_task;
+        _exceed_cb.cb_func = cb_func;
+        return _temp_exceed_cb;
+    }
+
+    void update_exceed_call_back(const MemExceedCallBackInfo& exceed_cb) { _exceed_cb = exceed_cb; }
+
+    // Note that, If call the memory allocation operation in TCMalloc new/delete Hook,
+    // such as calling LOG/iostream/sstream/stringstream/etc. related methods,
+    // must increase the control to avoid entering infinite recursion, otherwise it may cause crash or stuck,
+    void consume(int64_t size);
+
+    void transfer_to(int64_t size, MemTrackerLimiter* mem_tracker) {
+        consume(-size);
+        mem_tracker->consume(size);
+    }
+    void transfer_from(int64_t size, MemTrackerLimiter* mem_tracker) {
+        mem_tracker->release(size);
+        consume(size);
+    }
+
+    template 
+    void flush_untracked_mem();
+
+    bool is_attach_task() { return _task_id != ""; }
+
+    MemTrackerLimiter* limiter_mem_tracker() { return _limiter_tracker; }
+
+    void set_check_limit(bool check_limit) { _check_limit = check_limit; }
+
+    std::string print_debug_string() {
+        fmt::memory_buffer consumer_tracker_buf;
+        for (const auto& v : _consumer_tracker_stack) {
+            fmt::format_to(consumer_tracker_buf, "{}, ", v->log_usage());
+        }
+        return fmt::format(
+                "ThreadMemTrackerMgr debug, _untracked_mem:{}, _task_id:{}, "
+                "_limiter_tracker:<{}>, _consumer_tracker_stack:<{}>",
+                std::to_string(_untracked_mem), _task_id, _limiter_tracker->log_usage(1),
+                fmt::to_string(consumer_tracker_buf));
+    }
+
+private:
+    // If tryConsume fails due to task mem tracker exceeding the limit, the task must be canceled
+    void exceeded_cancel_task(const std::string& cancel_details);
+
+    void exceeded(int64_t mem_usage, Status try_consume_st);
+
+private:
+    // Cache untracked mem, only update to _untracked_mems when switching mem tracker.
+    // Frequent calls to unordered_map _untracked_mems[] in consume will degrade performance.
+    int64_t _untracked_mem = 0;
+
+    MemTrackerLimiter* _limiter_tracker;
+    std::vector _consumer_tracker_stack;
+
+    // If true, call memtracker try_consume, otherwise call consume.
+    bool _check_limit = false;
+    // If there is a memory new/delete operation in the consume method, it may enter infinite recursion.
+    bool _stop_consume = false;
+    std::string _task_id;
+    TUniqueId _fragment_instance_id;
+    MemExceedCallBackInfo _exceed_cb;
+    MemExceedCallBackInfo _temp_exceed_cb;
+};
+
+inline void ThreadMemTrackerMgr::init() {
+    DCHECK(_consumer_tracker_stack.empty());
+    _task_id = "";
+    _exceed_cb.init();
+    _limiter_tracker = ExecEnv::GetInstance()->process_mem_tracker();
+    _check_limit = true;
+}
+
+inline void ThreadMemTrackerMgr::push_consumer_tracker(MemTracker* tracker) {
+    DCHECK(tracker) << print_debug_string();
+    DCHECK(!std::count(_consumer_tracker_stack.begin(), _consumer_tracker_stack.end(), tracker))
+            << print_debug_string();
+    _consumer_tracker_stack.push_back(tracker);
+    tracker->release(_untracked_mem);
+}
+
+inline void ThreadMemTrackerMgr::pop_consumer_tracker() {
+    DCHECK(!_consumer_tracker_stack.empty());
+    _consumer_tracker_stack.back()->consume(_untracked_mem);
+    _consumer_tracker_stack.pop_back();
+}
+
+inline void ThreadMemTrackerMgr::consume(int64_t size) {
+    _untracked_mem += size;
+    // When some threads `0 < _untracked_mem < config::mem_tracker_consume_min_size_bytes`
+    // and some threads `_untracked_mem <= -config::mem_tracker_consume_min_size_bytes` trigger consumption(),
+    // it will cause tracker->consumption to be temporarily less than 0.
+    if ((_untracked_mem >= config::mem_tracker_consume_min_size_bytes ||
+         _untracked_mem <= -config::mem_tracker_consume_min_size_bytes) &&
+        !_stop_consume) {
+        if (_check_limit) {
+            flush_untracked_mem();
+        } else {
+            flush_untracked_mem();
+        }
+    }
+}
+
+template 
+inline void ThreadMemTrackerMgr::flush_untracked_mem() {
+    // Temporary memory may be allocated during the consumption of the mem tracker, which will lead to entering
+    // the TCMalloc Hook again, so suspend consumption to avoid falling into an infinite loop.
+    _stop_consume = true;
+    DCHECK(_limiter_tracker);
+    if (CheckLimit) {
+        Status st = _limiter_tracker->try_consume(_untracked_mem);
+        if (!st) {
+            // The memory has been allocated, so when TryConsume fails, need to continue to complete
+            // the consume to ensure the accuracy of the statistics.
+            _limiter_tracker->consume(_untracked_mem);
+            exceeded(_untracked_mem, st);
+        }
+    } else {
+        _limiter_tracker->consume(_untracked_mem);
+    }
+    for (auto tracker : _consumer_tracker_stack) {
+        tracker->consume(_untracked_mem);
+    }
+    _untracked_mem = 0;
+    _stop_consume = false;
+}
+
+} // namespace doris
diff --git a/be/src/runtime/memory_scratch_sink.cpp b/be/src/runtime/memory_scratch_sink.cpp
index ffa0e200dc..32bec47259 100644
--- a/be/src/runtime/memory_scratch_sink.cpp
+++ b/be/src/runtime/memory_scratch_sink.cpp
@@ -46,7 +46,7 @@ Status MemoryScratchSink::prepare_exprs(RuntimeState* state) {
     // From the thrift expressions create the real exprs.
     RETURN_IF_ERROR(Expr::create_expr_trees(state->obj_pool(), _t_output_expr, &_output_expr_ctxs));
     // Prepare the exprs to run.
-    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _expr_mem_tracker));
+    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc));
     // generate the arrow schema
     RETURN_IF_ERROR(convert_to_arrow_schema(_row_desc, &_arrow_schema));
     return Status::OK();
diff --git a/be/src/runtime/mysql_table_sink.cpp b/be/src/runtime/mysql_table_sink.cpp
index c2cdd25712..c3357eacc5 100644
--- a/be/src/runtime/mysql_table_sink.cpp
+++ b/be/src/runtime/mysql_table_sink.cpp
@@ -20,7 +20,6 @@
 #include 
 
 #include "exprs/expr.h"
-#include "runtime/mem_tracker.h"
 #include "runtime/runtime_state.h"
 #include "util/debug_util.h"
 #include "util/runtime_profile.h"
@@ -29,10 +28,7 @@ namespace doris {
 
 MysqlTableSink::MysqlTableSink(ObjectPool* pool, const RowDescriptor& row_desc,
                                const std::vector& t_exprs)
-        : _pool(pool),
-          _row_desc(row_desc),
-          _t_output_expr(t_exprs),
-          _mem_tracker(MemTracker::create_tracker(-1, "MysqlTableSink")) {
+        : _pool(pool), _row_desc(row_desc), _t_output_expr(t_exprs) {
     _name = "MysqlTableSink";
 }
 
@@ -58,7 +54,7 @@ Status MysqlTableSink::init(const TDataSink& t_sink) {
 Status MysqlTableSink::prepare(RuntimeState* state) {
     RETURN_IF_ERROR(DataSink::prepare(state));
     // Prepare the exprs to run.
-    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _mem_tracker));
+    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc));
     std::stringstream title;
     title << "MysqlTableSink (frag_id=" << state->fragment_instance_id() << ")";
     // create profile
diff --git a/be/src/runtime/mysql_table_sink.h b/be/src/runtime/mysql_table_sink.h
index 593ce96c15..08ae566a21 100644
--- a/be/src/runtime/mysql_table_sink.h
+++ b/be/src/runtime/mysql_table_sink.h
@@ -31,7 +31,6 @@ class TMysqlTableSink;
 class RuntimeState;
 class RuntimeProfile;
 class ExprContext;
-class MemTracker;
 
 // This class is a sinker, which put input data to mysql table
 class MysqlTableSink : public DataSink {
@@ -69,7 +68,6 @@ private:
     MysqlTableWriter* _writer;
 
     RuntimeProfile* _profile;
-    std::shared_ptr _mem_tracker;
 };
 
 } // namespace doris
diff --git a/be/src/runtime/odbc_table_sink.cpp b/be/src/runtime/odbc_table_sink.cpp
index 463b404921..11b175a90c 100644
--- a/be/src/runtime/odbc_table_sink.cpp
+++ b/be/src/runtime/odbc_table_sink.cpp
@@ -52,7 +52,7 @@ Status OdbcTableSink::init(const TDataSink& t_sink) {
 Status OdbcTableSink::prepare(RuntimeState* state) {
     RETURN_IF_ERROR(DataSink::prepare(state));
     // Prepare the exprs to run.
-    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _expr_mem_tracker));
+    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc));
     std::stringstream title;
     title << _name << " (frag_id=" << state->fragment_instance_id() << ")";
     // create profile
diff --git a/be/src/runtime/plan_fragment_executor.cpp b/be/src/runtime/plan_fragment_executor.cpp
index cdfe8e0a2a..f098de7541 100644
--- a/be/src/runtime/plan_fragment_executor.cpp
+++ b/be/src/runtime/plan_fragment_executor.cpp
@@ -32,7 +32,7 @@
 #include "runtime/data_stream_mgr.h"
 #include "runtime/descriptors.h"
 #include "runtime/exec_env.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/result_buffer_mgr.h"
 #include "runtime/result_queue_mgr.h"
 #include "runtime/row_batch.h"
@@ -98,7 +98,7 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request,
     _runtime_state->set_tracer(std::move(tracer));
 
     RETURN_IF_ERROR(_runtime_state->init_mem_trackers(_query_id));
-    SCOPED_ATTACH_TASK_THREAD(_runtime_state.get(), _runtime_state->instance_mem_tracker());
+    SCOPED_ATTACH_TASK(_runtime_state.get());
     _runtime_state->set_be_number(request.backend_num);
     if (request.__isset.backend_id) {
         _runtime_state->set_backend_id(request.backend_id);
@@ -446,7 +446,7 @@ void PlanFragmentExecutor::_collect_node_statistics() {
 }
 
 void PlanFragmentExecutor::report_profile() {
-    SCOPED_ATTACH_TASK_THREAD(_runtime_state.get(), _runtime_state->instance_mem_tracker());
+    SCOPED_ATTACH_TASK(_runtime_state.get());
     VLOG_FILE << "report_profile(): instance_id=" << _runtime_state->fragment_instance_id();
     DCHECK(_report_status_cb);
 
diff --git a/be/src/runtime/qsorter.cpp b/be/src/runtime/qsorter.cpp
index cec1b6cd2e..6381c56597 100644
--- a/be/src/runtime/qsorter.cpp
+++ b/be/src/runtime/qsorter.cpp
@@ -79,9 +79,7 @@ bool TupleRowLessThan::operator()(TupleRow* const& lhs, TupleRow* const& rhs) co
 
 QSorter::QSorter(const RowDescriptor& row_desc, const std::vector& order_expr_ctxs,
                  RuntimeState* state)
-        : _row_desc(row_desc),
-          _order_expr_ctxs(order_expr_ctxs),
-          _tuple_pool(new MemPool("QSorter")) {}
+        : _row_desc(row_desc), _order_expr_ctxs(order_expr_ctxs), _tuple_pool(new MemPool()) {}
 
 Status QSorter::prepare(RuntimeState* state) {
     RETURN_IF_ERROR(Expr::clone_if_not_exists(_order_expr_ctxs, state, &_lhs_expr_ctxs));
diff --git a/be/src/runtime/result_file_sink.cpp b/be/src/runtime/result_file_sink.cpp
index 1f29c30574..cd3e61659a 100644
--- a/be/src/runtime/result_file_sink.cpp
+++ b/be/src/runtime/result_file_sink.cpp
@@ -82,7 +82,7 @@ Status ResultFileSink::prepare_exprs(RuntimeState* state) {
     // From the thrift expressions create the real exprs.
     RETURN_IF_ERROR(Expr::create_expr_trees(state->obj_pool(), _t_output_expr, &_output_expr_ctxs));
     // Prepare the exprs to run.
-    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _expr_mem_tracker));
+    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc));
     return Status::OK();
 }
 
diff --git a/be/src/runtime/result_sink.cpp b/be/src/runtime/result_sink.cpp
index 7e67a8bfcb..de393e9e11 100644
--- a/be/src/runtime/result_sink.cpp
+++ b/be/src/runtime/result_sink.cpp
@@ -22,7 +22,7 @@
 #include "runtime/buffer_control_block.h"
 #include "runtime/exec_env.h"
 #include "runtime/file_result_writer.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/mysql_result_writer.h"
 #include "runtime/result_buffer_mgr.h"
 #include "runtime/row_batch.h"
@@ -51,7 +51,7 @@ Status ResultSink::prepare_exprs(RuntimeState* state) {
     // From the thrift expressions create the real exprs.
     RETURN_IF_ERROR(Expr::create_expr_trees(state->obj_pool(), _t_output_expr, &_output_expr_ctxs));
     // Prepare the exprs to run.
-    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _expr_mem_tracker));
+    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc));
     return Status::OK();
 }
 
@@ -90,7 +90,7 @@ Status ResultSink::open(RuntimeState* state) {
 Status ResultSink::send(RuntimeState* state, RowBatch* batch) {
     // The memory consumption in the process of sending the results is not check query memory limit.
     // Avoid the query being cancelled when the memory limit is reached after the query result comes out.
-    STOP_CHECK_LIMIT_THREAD_LOCAL_MEM_TRACKER();
+    STOP_CHECK_THREAD_MEM_TRACKER_LIMIT();
     return _writer->append_row_batch(batch);
 }
 
diff --git a/be/src/runtime/row_batch.cpp b/be/src/runtime/row_batch.cpp
index 42aad8da09..2b1f0a09c1 100644
--- a/be/src/runtime/row_batch.cpp
+++ b/be/src/runtime/row_batch.cpp
@@ -44,8 +44,7 @@ const int RowBatch::AT_CAPACITY_MEM_USAGE = 8 * 1024 * 1024;
 const int RowBatch::FIXED_LEN_BUFFER_LIMIT = AT_CAPACITY_MEM_USAGE / 2;
 
 RowBatch::RowBatch(const RowDescriptor& row_desc, int capacity)
-        : _mem_tracker(tls_ctx()->_thread_mem_tracker_mgr->mem_tracker()),
-          _has_in_flight_row(false),
+        : _has_in_flight_row(false),
           _num_rows(0),
           _num_uncommitted_rows(0),
           _capacity(capacity),
@@ -70,8 +69,7 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, int capacity)
 // to allocated string data in special mempool
 // (change via python script that runs over Data_types.cc)
 RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch)
-        : _mem_tracker(tls_ctx()->_thread_mem_tracker_mgr->mem_tracker()),
-          _has_in_flight_row(false),
+        : _has_in_flight_row(false),
           _num_rows(input_batch.num_rows()),
           _num_uncommitted_rows(0),
           _capacity(_num_rows),
@@ -326,7 +324,6 @@ void RowBatch::add_io_buffer(DiskIoMgr::BufferDescriptor* buffer) {
     DCHECK(buffer != nullptr);
     _io_buffers.push_back(buffer);
     _auxiliary_mem_usage += buffer->buffer_len();
-    buffer->update_mem_tracker(_mem_tracker.get());
 }
 
 Status RowBatch::resize_and_allocate_tuple_buffer(RuntimeState* state, int64_t* tuple_buffer_size,
@@ -405,7 +402,6 @@ void RowBatch::transfer_resource_ownership(RowBatch* dest) {
         DiskIoMgr::BufferDescriptor* buffer = _io_buffers[i];
         dest->_io_buffers.push_back(buffer);
         dest->_auxiliary_mem_usage += buffer->buffer_len();
-        buffer->update_mem_tracker(dest->_mem_tracker.get());
     }
     _io_buffers.clear();
 
@@ -514,7 +510,6 @@ void RowBatch::acquire_state(RowBatch* src) {
         DiskIoMgr::BufferDescriptor* buffer = src->_io_buffers[i];
         _io_buffers.push_back(buffer);
         _auxiliary_mem_usage += buffer->buffer_len();
-        buffer->update_mem_tracker(_mem_tracker.get());
     }
     src->_io_buffers.clear();
     src->_auxiliary_mem_usage = 0;
diff --git a/be/src/runtime/row_batch.h b/be/src/runtime/row_batch.h
index 9deb9e9b82..d9d7f6be38 100644
--- a/be/src/runtime/row_batch.h
+++ b/be/src/runtime/row_batch.h
@@ -394,15 +394,6 @@ public:
     std::string to_string();
 
 private:
-    // Back up the current thread local mem tracker. Used when transferring buffer memory between row batches.
-    // Memory operations in the actual row batch are automatically recorded in the thread local mem tracker.
-    // Change the recording position in the mem tracker specified by the external switch.
-    // Note: Raw pointers cannot be used directly, because when transferring_resource_ownership to other RowBatch,
-    // the src mem tracker when creating the current RowBatch may have been destroyed.
-    // At this time, the transfer of memory ownership cannot be completed, resulting in consumption > 0
-    // when the src mem tracker is destructed, and the memory statistics of the dst mem tracker are missing.
-    std::shared_ptr _mem_tracker;
-
     // Close owned tuple streams and delete if needed.
     void close_tuple_streams();
 
diff --git a/be/src/runtime/runtime_filter_mgr.cpp b/be/src/runtime/runtime_filter_mgr.cpp
index 223be0a34d..d231cd75c8 100644
--- a/be/src/runtime/runtime_filter_mgr.cpp
+++ b/be/src/runtime/runtime_filter_mgr.cpp
@@ -23,7 +23,7 @@
 #include "exprs/runtime_filter.h"
 #include "gen_cpp/internal_service.pb.h"
 #include "runtime/exec_env.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/plan_fragment_executor.h"
 #include "runtime/runtime_state.h"
 #include "runtime/thread_context.h"
@@ -44,9 +44,9 @@ RuntimeFilterMgr::RuntimeFilterMgr(const UniqueId& query_id, RuntimeState* state
 
 RuntimeFilterMgr::~RuntimeFilterMgr() {}
 
-Status RuntimeFilterMgr::init() {
+Status RuntimeFilterMgr::init(MemTrackerLimiter* parent_tracker) {
     DCHECK(_state->instance_mem_tracker() != nullptr);
-    _tracker = MemTracker::create_tracker(-1, "RuntimeFilterMgr", _state->instance_mem_tracker());
+    _tracker = std::make_unique("RuntimeFilterMgr", parent_tracker);
     return Status::OK();
 }
 
@@ -83,7 +83,7 @@ Status RuntimeFilterMgr::regist_filter(const RuntimeFilterRole role, const TRunt
                                        const TQueryOptions& options, int node_id) {
     DCHECK((role == RuntimeFilterRole::CONSUMER && node_id >= 0) ||
            role != RuntimeFilterRole::CONSUMER);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_tracker.get());
     int32_t key = desc.filter_id;
 
     std::map* filter_map = nullptr;
@@ -111,7 +111,7 @@ Status RuntimeFilterMgr::regist_filter(const RuntimeFilterRole role, const TRunt
 }
 
 Status RuntimeFilterMgr::update_filter(const PPublishFilterRequest* request, const char* data) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_tracker.get());
     UpdateRuntimeFilterParams params;
     params.request = request;
     params.data = data;
@@ -155,10 +155,6 @@ Status RuntimeFilterMergeControllerEntity::_init_with_desc(
     // LOG(INFO) << "entity filter id:" << filter_id;
     cntVal->filter->init_with_desc(&cntVal->runtime_filter_desc, query_options,
                                    _fragment_instance_id);
-    cntVal->tracker = MemTracker::create_tracker(
-            -1,
-            tls_ctx()->_thread_mem_tracker_mgr->mem_tracker()->label() + ":FilterID:" + filter_id,
-            tls_ctx()->_thread_mem_tracker_mgr->mem_tracker());
     _filter_map.emplace(filter_id, cntVal);
     return Status::OK();
 }
@@ -168,8 +164,8 @@ Status RuntimeFilterMergeControllerEntity::init(UniqueId query_id, UniqueId frag
                                                 const TQueryOptions& query_options) {
     _query_id = query_id;
     _fragment_instance_id = fragment_instance_id;
-    _mem_tracker = MemTracker::create_tracker(-1, "RuntimeFilterMergeControllerEntity", nullptr);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    _mem_tracker = std::make_unique("RuntimeFilterMergeControllerEntity");
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     for (auto& filterid_to_desc : runtime_filter_params.rid_to_runtime_filter) {
         int filter_id = filterid_to_desc.first;
         const auto& target_iter = runtime_filter_params.rid_to_target_param.find(filter_id);
@@ -189,7 +185,7 @@ Status RuntimeFilterMergeControllerEntity::init(UniqueId query_id, UniqueId frag
 // merge data
 Status RuntimeFilterMergeControllerEntity::merge(const PMergeFilterRequest* request,
                                                  const char* data) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     std::shared_ptr cntVal;
     int merged_size = 0;
     {
@@ -201,7 +197,6 @@ Status RuntimeFilterMergeControllerEntity::merge(const PMergeFilterRequest* requ
             return Status::InvalidArgument("unknown filter id");
         }
         cntVal = iter->second;
-        SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(cntVal->tracker);
         MergeRuntimeFilterParams params;
         params.data = data;
         params.request = request;
diff --git a/be/src/runtime/runtime_filter_mgr.h b/be/src/runtime/runtime_filter_mgr.h
index 7d1054560d..340e9b27c7 100644
--- a/be/src/runtime/runtime_filter_mgr.h
+++ b/be/src/runtime/runtime_filter_mgr.h
@@ -57,7 +57,7 @@ public:
 
     ~RuntimeFilterMgr();
 
-    Status init();
+    Status init(MemTrackerLimiter* parent_tracker);
 
     // get a consumer filter by filter-id
     Status get_consume_filter(const int filter_id, IRuntimeFilter** consumer_filter);
@@ -90,7 +90,7 @@ private:
     std::map _producer_map;
 
     RuntimeState* _state;
-    std::shared_ptr _tracker;
+    std::unique_ptr _tracker;
     ObjectPool _pool;
 
     TNetworkAddress _merge_addr;
@@ -129,14 +129,13 @@ private:
         std::vector target_info;
         IRuntimeFilter* filter;
         std::unordered_set arrive_id; // fragment_instance_id ?
-        std::shared_ptr tracker;
         std::shared_ptr pool;
     };
     UniqueId _query_id;
     UniqueId _fragment_instance_id;
     // protect _filter_map
     std::mutex _filter_map_mutex;
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
     // TODO: convert filter id to i32
     // filter-id -> val
     std::map> _filter_map;
diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp
index 6bbaabd1ea..054390d990 100644
--- a/be/src/runtime/runtime_state.cpp
+++ b/be/src/runtime/runtime_state.cpp
@@ -36,8 +36,8 @@
 #include "runtime/exec_env.h"
 #include "runtime/initial_reservations.h"
 #include "runtime/load_path_mgr.h"
-#include "runtime/mem_tracker.h"
-#include "runtime/mem_tracker_task_pool.h"
+#include "runtime/memory/mem_tracker.h"
+#include "runtime/memory/mem_tracker_task_pool.h"
 #include "runtime/runtime_filter_mgr.h"
 #include "util/file_utils.h"
 #include "util/load_error_hub.h"
@@ -169,6 +169,10 @@ RuntimeState::~RuntimeState() {
     if (_buffer_reservation != nullptr) {
         _buffer_reservation->Close();
     }
+
+    // Manually release the child mem tracker before _instance_mem_tracker is destructed.
+    _obj_pool->clear();
+    _runtime_filter_mgr.reset();
 }
 
 Status RuntimeState::init(const TUniqueId& fragment_instance_id, const TQueryOptions& query_options,
@@ -217,13 +221,13 @@ Status RuntimeState::init(const TUniqueId& fragment_instance_id, const TQueryOpt
 Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) {
     bool has_query_mem_tracker = _query_options.__isset.mem_limit && (_query_options.mem_limit > 0);
     int64_t bytes_limit = has_query_mem_tracker ? _query_options.mem_limit : -1;
-    if (bytes_limit > MemTracker::get_process_tracker()->limit()) {
+    if (bytes_limit > ExecEnv::GetInstance()->process_mem_tracker()->limit()) {
         VLOG_NOTICE << "Query memory limit " << PrettyPrinter::print(bytes_limit, TUnit::BYTES)
                     << " exceeds process memory limit of "
-                    << PrettyPrinter::print(MemTracker::get_process_tracker()->limit(),
+                    << PrettyPrinter::print(ExecEnv::GetInstance()->process_mem_tracker()->limit(),
                                             TUnit::BYTES)
                     << ". Using process memory limit instead";
-        bytes_limit = MemTracker::get_process_tracker()->limit();
+        bytes_limit = ExecEnv::GetInstance()->process_mem_tracker()->limit();
     }
     auto mem_tracker_counter = ADD_COUNTER(&_profile, "MemoryLimit", TUnit::BYTES);
     mem_tracker_counter->set(bytes_limit);
@@ -239,30 +243,29 @@ Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) {
         DCHECK(false);
     }
 
-    _instance_mem_tracker = MemTracker::create_tracker(
+    _instance_mem_tracker = std::make_unique(
             bytes_limit, "RuntimeState:instance:" + print_id(_fragment_instance_id),
-            _query_mem_tracker, MemTrackerLevel::INSTANCE, &_profile);
+            _query_mem_tracker, &_profile);
 
     RETURN_IF_ERROR(init_buffer_poolstate());
 
-    _initial_reservations = _obj_pool->add(
-            new InitialReservations(_obj_pool.get(), _buffer_reservation, nullptr,
-                                    _query_options.initial_reservation_total_claims));
+    _initial_reservations = _obj_pool->add(new InitialReservations(
+            _obj_pool.get(), _buffer_reservation, _query_options.initial_reservation_total_claims));
     RETURN_IF_ERROR(_initial_reservations->Init(_query_id, min_reservation()));
     DCHECK_EQ(0, _initial_reservation_refcnt.load());
 
     if (_instance_buffer_reservation != nullptr) {
-        _instance_buffer_reservation->InitChildTracker(&_profile, _buffer_reservation, nullptr,
+        _instance_buffer_reservation->InitChildTracker(&_profile, _buffer_reservation,
                                                        std::numeric_limits::max());
     }
 
     // filter manager depends _instance_mem_tracker
-    _runtime_filter_mgr->init();
+    _runtime_filter_mgr->init(_instance_mem_tracker.get());
     return Status::OK();
 }
 
 Status RuntimeState::init_instance_mem_tracker() {
-    _instance_mem_tracker = MemTracker::create_tracker(-1, "RuntimeState");
+    _instance_mem_tracker = std::make_unique(-1, "RuntimeState:instance");
     return Status::OK();
 }
 
@@ -284,22 +287,16 @@ Status RuntimeState::init_buffer_poolstate() {
     VLOG_QUERY << "Buffer pool limit for " << print_id(_query_id) << ": " << max_reservation;
 
     _buffer_reservation = _obj_pool->add(new ReservationTracker);
-    _buffer_reservation->InitChildTracker(nullptr, exec_env->buffer_reservation(), nullptr,
-                                          max_reservation);
+    _buffer_reservation->InitChildTracker(nullptr, exec_env->buffer_reservation(), max_reservation);
 
     return Status::OK();
 }
 
 Status RuntimeState::create_block_mgr() {
     DCHECK(_block_mgr2.get() == nullptr);
-
-    int64_t block_mgr_limit = _query_mem_tracker->limit();
-    if (block_mgr_limit < 0) {
-        block_mgr_limit = std::numeric_limits::max();
-    }
-    RETURN_IF_ERROR(BufferedBlockMgr2::create(
-            this, runtime_profile(), _exec_env->tmp_file_mgr(), block_mgr_limit,
-            _exec_env->disk_io_mgr()->max_read_buffer_size(), &_block_mgr2));
+    RETURN_IF_ERROR(BufferedBlockMgr2::create(this, runtime_profile(), _exec_env->tmp_file_mgr(),
+                                              _exec_env->disk_io_mgr()->max_read_buffer_size(),
+                                              &_block_mgr2));
     return Status::OK();
 }
 
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index 719139e994..f561adecf8 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -127,8 +127,8 @@ public:
     const TUniqueId& query_id() const { return _query_id; }
     const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; }
     ExecEnv* exec_env() { return _exec_env; }
-    std::shared_ptr query_mem_tracker() { return _query_mem_tracker; }
-    std::shared_ptr instance_mem_tracker() { return _instance_mem_tracker; }
+    MemTrackerLimiter* query_mem_tracker() { return _query_mem_tracker; }
+    MemTrackerLimiter* instance_mem_tracker() { return _instance_mem_tracker.get(); }
     ThreadResourceMgr::ResourcePool* resource_pool() { return _resource_pool; }
 
     void set_fragment_root_id(PlanNodeId id) {
@@ -390,10 +390,10 @@ private:
 
     // MemTracker that is shared by all fragment instances running on this host.
     // The query mem tracker must be released after the _instance_mem_tracker.
-    std::shared_ptr _query_mem_tracker;
+    MemTrackerLimiter* _query_mem_tracker;
 
     // Memory usage of this fragment instance
-    std::shared_ptr _instance_mem_tracker;
+    std::unique_ptr _instance_mem_tracker;
 
     // put runtime state before _obj_pool, so that it will be deconstructed after
     // _obj_pool. Because some of object in _obj_pool will use profile when deconstructing.
diff --git a/be/src/runtime/sorted_run_merger.cc b/be/src/runtime/sorted_run_merger.cc
index 5f4be7f2bf..de44701293 100644
--- a/be/src/runtime/sorted_run_merger.cc
+++ b/be/src/runtime/sorted_run_merger.cc
@@ -129,7 +129,7 @@ public:
         *done = false;
         _pull_task_thread =
                 std::thread(&SortedRunMerger::ParallelBatchedRowSupplier::process_sorted_run_task,
-                            this, tls_ctx()->_thread_mem_tracker_mgr->mem_tracker());
+                            this, thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker());
 
         RETURN_IF_ERROR(next(nullptr, done));
         return Status::OK();
@@ -182,8 +182,8 @@ private:
     // signal of new batch or the eos/cancelled condition
     std::condition_variable _batch_prepared_cv;
 
-    void process_sorted_run_task(const std::shared_ptr& mem_tracker) {
-        SCOPED_ATTACH_TASK_THREAD(ThreadContext::TaskType::QUERY, mem_tracker);
+    void process_sorted_run_task(MemTrackerLimiter* mem_tracker) {
+        SCOPED_ATTACH_TASK(mem_tracker, ThreadContext::TaskType::QUERY);
         std::unique_lock lock(_mutex);
         while (true) {
             _batch_prepared_cv.wait(lock, [this]() { return !_backup_ready.load(); });
diff --git a/be/src/runtime/spill_sorter.cc b/be/src/runtime/spill_sorter.cc
index c86a9eaebe..97ec436956 100644
--- a/be/src/runtime/spill_sorter.cc
+++ b/be/src/runtime/spill_sorter.cc
@@ -1024,8 +1024,7 @@ inline void SpillSorter::TupleSorter::swap(uint8_t* left, uint8_t* right) {
 // SpillSorter methods
 SpillSorter::SpillSorter(const TupleRowComparator& compare_less_than,
                          const vector& slot_materialize_expr_ctxs,
-                         RowDescriptor* output_row_desc,
-                         const std::shared_ptr& mem_tracker, RuntimeProfile* profile,
+                         RowDescriptor* output_row_desc, RuntimeProfile* profile,
                          RuntimeState* state)
         : _state(state),
           _compare_less_than(compare_less_than),
@@ -1034,7 +1033,6 @@ SpillSorter::SpillSorter(const TupleRowComparator& compare_less_than,
           _block_mgr_client(nullptr),
           _has_var_len_slots(false),
           _sort_tuple_slot_expr_ctxs(slot_materialize_expr_ctxs),
-          _mem_tracker(mem_tracker),
           _output_row_desc(output_row_desc),
           _unsorted_run(nullptr),
           _profile(profile),
@@ -1077,8 +1075,7 @@ Status SpillSorter::init() {
     if (_output_row_desc->tuple_descriptors()[0]->has_varlen_slots()) {
         min_blocks_required *= 2;
     }
-    RETURN_IF_ERROR(_block_mgr->register_client(min_blocks_required, _mem_tracker, _state,
-                                                &_block_mgr_client));
+    RETURN_IF_ERROR(_block_mgr->register_client(min_blocks_required, _state, &_block_mgr_client));
 
     DCHECK(_unsorted_run != nullptr);
     RETURN_IF_ERROR(_unsorted_run->init());
diff --git a/be/src/runtime/spill_sorter.h b/be/src/runtime/spill_sorter.h
index 71ee4c9b6d..20960b7230 100644
--- a/be/src/runtime/spill_sorter.h
+++ b/be/src/runtime/spill_sorter.h
@@ -93,8 +93,7 @@ public:
     // and retrieve rows from an intermediate merger.
     SpillSorter(const TupleRowComparator& compare_less_than,
                 const std::vector& sort_tuple_slot_expr_ctxs,
-                RowDescriptor* output_row_desc, const std::shared_ptr& mem_tracker,
-                RuntimeProfile* profile, RuntimeState* state);
+                RowDescriptor* output_row_desc, RuntimeProfile* profile, RuntimeState* state);
 
     ~SpillSorter();
 
@@ -170,9 +169,6 @@ private:
     // Expressions used to materialize the sort tuple. Contains one expr per slot in the tuple.
     std::vector _sort_tuple_slot_expr_ctxs;
 
-    // Mem tracker for batches created during merge. Not owned by SpillSorter.
-    std::shared_ptr _mem_tracker;
-
     // Descriptor for the sort tuple. Input rows are materialized into 1 tuple before
     // sorting. Not owned by the SpillSorter.
     RowDescriptor* _output_row_desc;
diff --git a/be/src/runtime/tablets_channel.cpp b/be/src/runtime/tablets_channel.cpp
index 2ee5ea2dcd..f35ffec2b7 100644
--- a/be/src/runtime/tablets_channel.cpp
+++ b/be/src/runtime/tablets_channel.cpp
@@ -36,7 +36,6 @@ TabletsChannel::TabletsChannel(const TabletsChannelKey& key, bool is_high_priori
           _closed_senders(64),
           _is_high_priority(is_high_priority),
           _is_vec(is_vec) {
-    _mem_tracker = MemTracker::create_tracker(-1, "TabletsChannel:" + std::to_string(key.index_id));
     static std::once_flag once_flag;
     std::call_once(once_flag, [] {
         REGISTER_HOOK_METRIC(tablet_writer_count, [&]() { return _s_tablet_writer_count.load(); });
@@ -208,6 +207,14 @@ Status TabletsChannel::reduce_mem_usage(int64_t mem_limit) {
     return Status::OK();
 }
 
+int64_t TabletsChannel::mem_consumption() {
+    int64_t mem_usage = 0;
+    for (auto& it : _tablet_writers) {
+        mem_usage += it.second->mem_consumption();
+    }
+    return mem_usage;
+}
+
 Status TabletsChannel::_open_all_writers(const PTabletWriterOpenRequest& request) {
     std::vector* index_slots = nullptr;
     int32_t schema_hash = 0;
diff --git a/be/src/runtime/tablets_channel.h b/be/src/runtime/tablets_channel.h
index 69897544ca..bd7504cd2f 100644
--- a/be/src/runtime/tablets_channel.h
+++ b/be/src/runtime/tablets_channel.h
@@ -28,7 +28,7 @@
 #include "gutil/strings/substitute.h"
 #include "olap/delta_writer.h"
 #include "runtime/descriptors.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/thread_context.h"
 #include "util/bitmap.h"
 #include "util/priority_thread_pool.hpp"
@@ -88,7 +88,7 @@ public:
     // no-op when this channel has been closed or cancelled
     Status reduce_mem_usage(int64_t mem_limit);
 
-    int64_t mem_consumption() const { return _mem_tracker->consumption(); }
+    int64_t mem_consumption();
 
 private:
     template 
@@ -141,8 +141,6 @@ private:
 
     std::unordered_set _partition_ids;
 
-    std::shared_ptr _mem_tracker;
-
     static std::atomic _s_tablet_writer_count;
 
     bool _is_high_priority = false;
@@ -171,7 +169,6 @@ Status TabletsChannel::_get_current_seq(int64_t& cur_seq, const Request& request
 template 
 Status TabletsChannel::add_batch(const TabletWriterAddRequest& request,
                                  TabletWriterAddResult* response) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
     int64_t cur_seq = 0;
 
     auto status = _get_current_seq(cur_seq, request);
diff --git a/be/src/runtime/thread_context.cpp b/be/src/runtime/thread_context.cpp
index 63d84cf21d..5bfa58fa39 100644
--- a/be/src/runtime/thread_context.cpp
+++ b/be/src/runtime/thread_context.cpp
@@ -22,171 +22,131 @@
 
 namespace doris {
 
-DEFINE_STATIC_THREAD_LOCAL(ThreadContext, ThreadContextPtr, _tls);
+DEFINE_STATIC_THREAD_LOCAL(ThreadContext, ThreadContextPtr, _ptr);
 
 ThreadContextPtr::ThreadContextPtr() {
-    INIT_STATIC_THREAD_LOCAL(ThreadContext, _tls);
+    INIT_STATIC_THREAD_LOCAL(ThreadContext, _ptr);
+    _init = true;
 }
 
-AttachTaskThread::AttachTaskThread(const ThreadContext::TaskType& type, const std::string& task_id,
-                                   const TUniqueId& fragment_instance_id,
-                                   const std::shared_ptr& mem_tracker) {
-    DCHECK(task_id != "");
-#ifdef USE_MEM_TRACKER
-    tls_ctx()->attach(type, task_id, fragment_instance_id, mem_tracker);
-#endif
-}
-
-AttachTaskThread::AttachTaskThread(const ThreadContext::TaskType& type,
-                                   const std::shared_ptr& mem_tracker) {
-#ifndef BE_TEST
+AttachTask::AttachTask(MemTrackerLimiter* mem_tracker, const ThreadContext::TaskType& type,
+                       const std::string& task_id, const TUniqueId& fragment_instance_id) {
     DCHECK(mem_tracker);
-#endif
 #ifdef USE_MEM_TRACKER
-    tls_ctx()->attach(type, "", TUniqueId(), mem_tracker);
+    thread_context()->attach_task(type, task_id, fragment_instance_id, mem_tracker);
 #endif
 }
 
-AttachTaskThread::AttachTaskThread(const TQueryType::type& query_type,
-                                   const std::shared_ptr& mem_tracker) {
-#ifndef BE_TEST
-    DCHECK(mem_tracker);
-#endif
-#ifdef USE_MEM_TRACKER
-    tls_ctx()->attach(query_to_task_type(query_type), "", TUniqueId(), mem_tracker);
-#endif
-}
+// AttachTask::AttachTask(const TQueryType::type& query_type,
+//                                    MemTrackerLimiter* mem_tracker) {
+//     DCHECK(mem_tracker);
+// #ifdef USE_MEM_TRACKER
+//     thread_context()->attach_task(query_to_task_type(query_type), "", TUniqueId(), mem_tracker);
+// #endif
+// }
 
-AttachTaskThread::AttachTaskThread(const TQueryType::type& query_type,
-                                   const std::shared_ptr& mem_tracker,
-                                   const std::string& task_id,
-                                   const TUniqueId& fragment_instance_id) {
-#ifndef BE_TEST
-    DCHECK(task_id != "");
-    DCHECK(fragment_instance_id != TUniqueId());
-    DCHECK(mem_tracker);
-#endif
-#ifdef USE_MEM_TRACKER
-    tls_ctx()->attach(query_to_task_type(query_type), task_id, fragment_instance_id, mem_tracker);
-#endif
-}
+// AttachTask::AttachTask(const TQueryType::type& query_type,
+//                                    MemTrackerLimiter* mem_tracker, const std::string& task_id,
+//                                    const TUniqueId& fragment_instance_id) {
+//     DCHECK(task_id != "");
+//     DCHECK(fragment_instance_id != TUniqueId());
+//     DCHECK(mem_tracker);
+// #ifdef USE_MEM_TRACKER
+//     thread_context()->attach_task(query_to_task_type(query_type), task_id, fragment_instance_id, mem_tracker);
+// #endif
+// }
 
-AttachTaskThread::AttachTaskThread(const RuntimeState* runtime_state,
-                                   const std::shared_ptr& mem_tracker) {
+AttachTask::AttachTask(RuntimeState* runtime_state) {
 #ifndef BE_TEST
     DCHECK(print_id(runtime_state->query_id()) != "");
     DCHECK(runtime_state->fragment_instance_id() != TUniqueId());
-    DCHECK(mem_tracker);
-#endif
+#endif // BE_TEST
+    DCHECK(runtime_state->instance_mem_tracker());
 #ifdef USE_MEM_TRACKER
-    tls_ctx()->attach(query_to_task_type(runtime_state->query_type()),
-                      print_id(runtime_state->query_id()), runtime_state->fragment_instance_id(),
-                      mem_tracker);
-#endif
+    thread_context()->attach_task(
+            query_to_task_type(runtime_state->query_type()), print_id(runtime_state->query_id()),
+            runtime_state->fragment_instance_id(), runtime_state->instance_mem_tracker());
+#endif // USE_MEM_TRACKER
 }
 
-AttachTaskThread::~AttachTaskThread() {
+AttachTask::~AttachTask() {
 #ifdef USE_MEM_TRACKER
-    tls_ctx()->detach();
+    thread_context()->detach_task();
+#ifndef NDEBUG
     DorisMetrics::instance()->attach_task_thread_count->increment(1);
+#endif // NDEBUG
 #endif
 }
 
-template 
-SwitchThreadMemTracker::SwitchThreadMemTracker(
-        const std::shared_ptr& mem_tracker, bool in_task) {
+AddThreadMemTrackerConsumer::AddThreadMemTrackerConsumer(MemTracker* mem_tracker) {
 #ifdef USE_MEM_TRACKER
     if (config::memory_verbose_track) {
-#ifndef BE_TEST
-        DCHECK(mem_tracker);
-        // The thread tracker must be switched after the attach task, otherwise switching
-        // in the main thread will cause the cached tracker not be cleaned up in time.
-        DCHECK(in_task == false || tls_ctx()->type() != ThreadContext::TaskType::UNKNOWN)
-                << ",tls ctx type=" << tls_ctx()->type();
-        if (Existed) {
-            _old_tracker_id = tls_ctx()->_thread_mem_tracker_mgr->update_tracker(mem_tracker);
-        } else {
-            _old_tracker_id =
-                    tls_ctx()->_thread_mem_tracker_mgr->update_tracker(mem_tracker);
-        }
-#endif // BE_TEST
-#ifndef NDEBUG
-        tls_ctx()->_thread_mem_tracker_mgr->switch_count += 1;
-#endif // NDEBUG
+        thread_context()->_thread_mem_tracker_mgr->push_consumer_tracker(mem_tracker);
     }
 #endif // USE_MEM_TRACKER
 }
 
-template 
-SwitchThreadMemTracker::~SwitchThreadMemTracker() {
+AddThreadMemTrackerConsumer::~AddThreadMemTrackerConsumer() {
 #ifdef USE_MEM_TRACKER
     if (config::memory_verbose_track) {
 #ifndef NDEBUG
-        tls_ctx()->_thread_mem_tracker_mgr->switch_count -= 1;
-        DorisMetrics::instance()->switch_thread_mem_tracker_count->increment(1);
+        DorisMetrics::instance()->add_thread_mem_tracker_consumer_count->increment(1);
 #endif // NDEBUG
-#ifndef BE_TEST
-        tls_ctx()->_thread_mem_tracker_mgr->update_tracker_id(_old_tracker_id);
-#endif // BE_TEST
+        thread_context()->_thread_mem_tracker_mgr->pop_consumer_tracker();
     }
 #endif // USE_MEM_TRACKER
 }
 
-SwitchThreadMemTrackerErrCallBack::SwitchThreadMemTrackerErrCallBack(const std::string& action_type,
-                                                                     bool cancel_work,
-                                                                     ERRCALLBACK err_call_back_func,
-                                                                     bool log_limit_exceeded) {
+UpdateMemExceedCallBack::UpdateMemExceedCallBack(const std::string& cancel_msg, bool cancel_task,
+                                                 ExceedCallBack cb_func) {
 #ifdef USE_MEM_TRACKER
-    DCHECK(action_type != std::string());
-    _old_tracker_cb = tls_ctx()->_thread_mem_tracker_mgr->update_consume_err_cb(
-            action_type, cancel_work, err_call_back_func, log_limit_exceeded);
+    DCHECK(cancel_msg != std::string());
+    _old_cb = thread_context()->_thread_mem_tracker_mgr->update_exceed_call_back(
+            cancel_msg, cancel_task, cb_func);
 #endif
 }
 
-SwitchThreadMemTrackerErrCallBack::~SwitchThreadMemTrackerErrCallBack() {
+UpdateMemExceedCallBack::~UpdateMemExceedCallBack() {
 #ifdef USE_MEM_TRACKER
-    tls_ctx()->_thread_mem_tracker_mgr->update_consume_err_cb(_old_tracker_cb);
+    thread_context()->_thread_mem_tracker_mgr->update_exceed_call_back(_old_cb);
 #ifndef NDEBUG
-    DorisMetrics::instance()->switch_thread_mem_tracker_err_cb_count->increment(1);
+    DorisMetrics::instance()->thread_mem_tracker_exceed_call_back_count->increment(1);
 #endif
 #endif // USE_MEM_TRACKER
 }
 
 SwitchBthread::SwitchBthread() {
 #ifdef USE_MEM_TRACKER
-    tls = static_cast(bthread_getspecific(btls_key));
+    _bthread_context = static_cast(bthread_getspecific(btls_key));
     // First call to bthread_getspecific (and before any bthread_setspecific) returns NULL
-    if (tls == nullptr) {
+    if (_bthread_context == nullptr) {
         // Create thread-local data on demand.
-        tls = new ThreadContext;
+        _bthread_context = new ThreadContext;
         // set the data so that next time bthread_getspecific in the thread returns the data.
-        CHECK_EQ(0, bthread_setspecific(btls_key, tls));
+        CHECK_EQ(0, bthread_setspecific(btls_key, _bthread_context));
     } else {
-        DCHECK(tls->type() == ThreadContext::TaskType::UNKNOWN);
-        tls->_thread_mem_tracker_mgr->clear_untracked_mems();
+        DCHECK(_bthread_context->type() == ThreadContext::TaskType::UNKNOWN);
+        _bthread_context->_thread_mem_tracker_mgr->flush_untracked_mem();
     }
-    tls->init();
-    tls->set_type(ThreadContext::TaskType::BRPC);
-    bthread_tls_key = btls_key;
-    bthread_tls = tls;
+    _bthread_context->_thread_mem_tracker_mgr->init();
+    _bthread_context->set_type(ThreadContext::TaskType::BRPC);
+    bthread_context_key = btls_key;
+    bthread_context = _bthread_context;
 #endif
 }
 
 SwitchBthread::~SwitchBthread() {
 #ifdef USE_MEM_TRACKER
-    DCHECK(tls != nullptr);
-    tls->_thread_mem_tracker_mgr->clear_untracked_mems();
-    tls->_thread_mem_tracker_mgr->init();
-    tls->set_type(ThreadContext::TaskType::UNKNOWN);
-    bthread_tls = nullptr;
-    bthread_tls_key = EMPTY_BTLS_KEY;
+    DCHECK(_bthread_context != nullptr);
+    _bthread_context->_thread_mem_tracker_mgr->flush_untracked_mem();
+    _bthread_context->_thread_mem_tracker_mgr->init();
+    _bthread_context->set_type(ThreadContext::TaskType::UNKNOWN);
+    bthread_context = nullptr;
+    bthread_context_key = EMPTY_BTLS_KEY;
 #ifndef NDEBUG
     DorisMetrics::instance()->switch_bthread_count->increment(1);
 #endif // NDEBUG
 #endif // USE_MEM_TRACKER
 }
 
-template class SwitchThreadMemTracker;
-template class SwitchThreadMemTracker;
-
 } // namespace doris
diff --git a/be/src/runtime/thread_context.h b/be/src/runtime/thread_context.h
index b2bddb2d21..f1439d1eea 100644
--- a/be/src/runtime/thread_context.h
+++ b/be/src/runtime/thread_context.h
@@ -26,58 +26,16 @@
 
 #include "common/logging.h"
 #include "gen_cpp/PaloInternalService_types.h" // for TQueryType
-#include "runtime/thread_mem_tracker_mgr.h"
+#include "runtime/memory/thread_mem_tracker_mgr.h"
 #include "runtime/threadlocal.h"
 
-// Attach to task when thread starts
-#define SCOPED_ATTACH_TASK_THREAD(type, ...) \
-    auto VARNAME_LINENUM(attach_task_thread) = AttachTaskThread(type, ##__VA_ARGS__)
-// Switch thread mem tracker during task execution.
-// After the non-query thread switches the mem tracker, if the thread will not switch the mem
-// tracker again in the short term, can consider manually clear_untracked_mems.
-// The query thread will automatically clear_untracked_mems when detach_task.
-#define SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(mem_tracker) \
-    auto VARNAME_LINENUM(switch_tracker) = doris::SwitchThreadMemTracker(mem_tracker, false)
-// `detach task/~switch bthread` will clear cached trackers and unconsumed tracks.
-// Used after `attach task/switch bthread` to avoid cached trackers not being destroyed in time.
-#define SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker) \
-    auto VARNAME_LINENUM(switch_tracker) = doris::SwitchThreadMemTracker(mem_tracker, true);
-#define SCOPED_SWITCH_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker) \
-    auto VARNAME_LINENUM(switch_tracker) = doris::SwitchThreadMemTracker(mem_tracker, false)
-#define SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker) \
-    auto VARNAME_LINENUM(switch_tracker) = doris::SwitchThreadMemTracker(mem_tracker, true)
-// Count the memory in the scope to a temporary tracker with the specified label name.
-// This is very useful when debugging. You can find the position where the tracker statistics are
-// inaccurate through the temporary tracker layer by layer. As well as finding memory hotspots.
-// TODO(zxy) track specifies the memory for each line in the code segment, instead of manually adding
-// a switch temporary tracker to each line. Maybe there are open source tools to do this?
-#define SCOPED_SWITCH_TEMPORARY_THREAD_LOCAL_MEM_TRACKER(label)                  \
-    auto VARNAME_LINENUM(switch_tracker) = doris::SwitchThreadMemTracker( \
-            MemTracker::get_temporary_mem_tracker(label), false)
-// After the non-query thread switches the mem tracker, if the thread will not switch the mem
-// tracker again in the short term, can consider manually clear_untracked_mems.
-// The query thread will automatically clear_untracked_mems when detach_task.
-#define SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_END_CLEAR(mem_tracker) \
-    auto VARNAME_LINENUM(switch_tracker) = doris::SwitchThreadMemTrackerEndClear(mem_tracker)
-#define SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB(action_type, ...) \
-    auto VARNAME_LINENUM(witch_tracker_cb) =                            \
-            doris::SwitchThreadMemTrackerErrCallBack(action_type, ##__VA_ARGS__)
-#define SCOPED_SWITCH_BTHREAD() auto VARNAME_LINENUM(switch_bthread) = SwitchBthread()
-// Before switching the same tracker multiple times, add tracker as early as possible,
-// and then call `SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER` to reduce one map find.
-// For example, in the exec_node open phase `add tracker`, it is no longer necessary to determine
-// whether the tracker exists in TLS when switching the tracker in the exec_node get_next phase.
-// TODO(zxy): Duplicate add tracker is currently prohibited, because it will,
-// 1. waste time 2. `_untracked_mems[mem_tracker->id()] = 0` will cause the memory track to be lost.
-// Some places may have to repeat the add tracker. optimize after.
-#define ADD_THREAD_LOCAL_MEM_TRACKER(mem_tracker) \
-    doris::tls_ctx()->_thread_mem_tracker_mgr->add_tracker(mem_tracker)
-#define CONSUME_THREAD_LOCAL_MEM_TRACKER(size) \
-    doris::tls_ctx()->_thread_mem_tracker_mgr->noncache_try_consume(size)
-#define RELEASE_THREAD_LOCAL_MEM_TRACKER(size) \
-    doris::tls_ctx()->_thread_mem_tracker_mgr->noncache_try_consume(-size)
-#define STOP_CHECK_LIMIT_THREAD_LOCAL_MEM_TRACKER() \
-    auto VARNAME_LINENUM(switch_bthread) = StopCheckLimitThreadMemTracker()
+// Add thread mem tracker consumer during query execution.
+#define SCOPED_CONSUME_MEM_TRACKER(mem_tracker) \
+    auto VARNAME_LINENUM(add_mem_consumer) = doris::AddThreadMemTrackerConsumer(mem_tracker)
+
+#define SCOPED_UPDATE_MEM_EXCEED_CALL_BACK(cancel_msg, ...) \
+    auto VARNAME_LINENUM(update_exceed_cb) =                \
+            doris::UpdateMemExceedCallBack(cancel_msg, ##__VA_ARGS__)
 
 namespace doris {
 
@@ -118,14 +76,15 @@ public:
     // TCMalloc hook is triggered during ThreadContext construction, which may lead to deadlock.
     bool _init = false;
 
-    DECLARE_STATIC_THREAD_LOCAL(ThreadContext, _tls);
+    DECLARE_STATIC_THREAD_LOCAL(ThreadContext, _ptr);
 };
 
-inline thread_local ThreadContextPtr thread_local_ctx;
+inline thread_local ThreadContextPtr thread_context_ptr;
+
 // To avoid performance problems caused by frequently calling `bthread_getspecific` to obtain bthread TLS
 // in tcmalloc hook, cache the key and value of bthread TLS in pthread TLS.
-inline thread_local ThreadContext* bthread_tls;
-inline thread_local bthread_key_t bthread_tls_key;
+inline thread_local ThreadContext* bthread_context;
+inline thread_local bthread_key_t bthread_context_key;
 
 // The thread context saves some info about a working thread.
 // 2 required info:
@@ -153,7 +112,6 @@ public:
     ThreadContext() {
         _thread_mem_tracker_mgr.reset(new ThreadMemTrackerMgr());
         init();
-        thread_local_ctx._init = true;
     }
 
     ~ThreadContext() {
@@ -163,7 +121,7 @@ public:
         // Equal to the size of the memory release that is not tracked during the destruction of the
         // ThreadContext after `_init = false in ~ThreadContextPtr()`,
         init();
-        thread_local_ctx._init = false;
+        thread_context_ptr._init = false;
     }
 
     void init() {
@@ -172,26 +130,24 @@ public:
         _thread_id = get_thread_id();
     }
 
-    void attach(const TaskType& type, const std::string& task_id,
-                const TUniqueId& fragment_instance_id,
-                const std::shared_ptr& mem_tracker) {
-        std::string new_tracker_label = mem_tracker == nullptr ? "null" : mem_tracker->label();
+    void attach_task(const TaskType& type, const std::string& task_id,
+                     const TUniqueId& fragment_instance_id, MemTrackerLimiter* mem_tracker) {
         DCHECK((_type == TaskType::UNKNOWN || _type == TaskType::BRPC) && _task_id == "")
-                << ",new tracker label: " << new_tracker_label
-                << ",old tracker label: " << _thread_mem_tracker_mgr->mem_tracker()->label();
+                << ",new tracker label: " << mem_tracker->label() << ",old tracker label: "
+                << _thread_mem_tracker_mgr->limiter_mem_tracker()->label();
         DCHECK(type != TaskType::UNKNOWN);
         _type = type;
         _task_id = task_id;
         _fragment_instance_id = fragment_instance_id;
-        _thread_mem_tracker_mgr->attach_task(TaskTypeStr[_type], task_id, fragment_instance_id,
-                                             mem_tracker);
+        _thread_mem_tracker_mgr->attach_limiter_tracker(TaskTypeStr[_type], task_id,
+                                                        fragment_instance_id, mem_tracker);
     }
 
-    void detach() {
+    void detach_task() {
         _type = TaskType::UNKNOWN;
         _task_id = "";
         _fragment_instance_id = TUniqueId();
-        _thread_mem_tracker_mgr->detach_task();
+        _thread_mem_tracker_mgr->detach_limiter_tracker();
     }
 
     const TaskType& type() const { return _type; }
@@ -221,33 +177,31 @@ private:
     TUniqueId _fragment_instance_id;
 };
 
-static ThreadContext* tls_ctx() {
-    ThreadContext* tls = static_cast(bthread_getspecific(btls_key));
-    if (tls != nullptr) {
-        return tls;
-    } else {
-        return thread_local_ctx._tls;
+static void update_bthread_context() {
+    if (btls_key != bthread_context_key) {
+        // pthread switch occurs, updating bthread_context and bthread_context_key cached in pthread tls.
+        bthread_context = static_cast(bthread_getspecific(btls_key));
+        bthread_context_key = btls_key;
     }
 }
 
-class AttachTaskThread {
+static ThreadContext* thread_context() {
+    if (btls_key != EMPTY_BTLS_KEY && bthread_context != nullptr) {
+        update_bthread_context();
+        return bthread_context;
+    } else {
+        return thread_context_ptr._ptr;
+    }
+}
+
+class AttachTask {
 public:
-    explicit AttachTaskThread(const ThreadContext::TaskType& type, const std::string& task_id,
-                              const TUniqueId& fragment_instance_id = TUniqueId(),
-                              const std::shared_ptr& mem_tracker = nullptr);
+    explicit AttachTask(MemTrackerLimiter* mem_tracker,
+                        const ThreadContext::TaskType& type = ThreadContext::TaskType::UNKNOWN,
+                        const std::string& task_id = "",
+                        const TUniqueId& fragment_instance_id = TUniqueId());
 
-    explicit AttachTaskThread(const ThreadContext::TaskType& type,
-                              const std::shared_ptr& mem_tracker);
-
-    explicit AttachTaskThread(const TQueryType::type& query_type,
-                              const std::shared_ptr& mem_tracker);
-
-    explicit AttachTaskThread(const TQueryType::type& query_type,
-                              const std::shared_ptr& mem_tracker,
-                              const std::string& task_id, const TUniqueId& fragment_instance_id);
-
-    explicit AttachTaskThread(const RuntimeState* runtime_state,
-                              const std::shared_ptr& mem_tracker);
+    explicit AttachTask(RuntimeState* runtime_state);
 
     const ThreadContext::TaskType query_to_task_type(const TQueryType::type& query_type) {
         switch (query_type) {
@@ -261,45 +215,26 @@ public:
         }
     }
 
-    ~AttachTaskThread();
+    ~AttachTask();
 };
 
-template 
-class SwitchThreadMemTracker {
+class AddThreadMemTrackerConsumer {
 public:
-    explicit SwitchThreadMemTracker(const std::shared_ptr& mem_tracker,
-                                    bool in_task = true);
+    explicit AddThreadMemTrackerConsumer(MemTracker* mem_tracker);
 
-    ~SwitchThreadMemTracker();
-
-protected:
-#ifdef USE_MEM_TRACKER
-    int64_t _old_tracker_id = 0;
-#endif
+    ~AddThreadMemTrackerConsumer();
 };
 
-class SwitchThreadMemTrackerEndClear : public SwitchThreadMemTracker {
+class UpdateMemExceedCallBack {
 public:
-    explicit SwitchThreadMemTrackerEndClear(const std::shared_ptr& mem_tracker)
-            : SwitchThreadMemTracker(mem_tracker, false) {}
+    explicit UpdateMemExceedCallBack(const std::string& cancel_msg, bool cancel_task = true,
+                                     ExceedCallBack cb_func = nullptr);
 
-    ~SwitchThreadMemTrackerEndClear() {
-        tls_ctx()->_thread_mem_tracker_mgr->clear_untracked_mems();
-    }
-};
-
-class SwitchThreadMemTrackerErrCallBack {
-public:
-    explicit SwitchThreadMemTrackerErrCallBack(const std::string& action_type,
-                                               bool cancel_work = true,
-                                               ERRCALLBACK err_call_back_func = nullptr,
-                                               bool log_limit_exceeded = true);
-
-    ~SwitchThreadMemTrackerErrCallBack();
+    ~UpdateMemExceedCallBack();
 
 private:
 #ifdef USE_MEM_TRACKER
-    ConsumeErrCallBackInfo _old_tracker_cb;
+    MemExceedCallBackInfo _old_cb;
 #endif
 };
 
@@ -311,19 +246,37 @@ public:
 
 private:
 #ifdef USE_MEM_TRACKER
-    ThreadContext* tls;
+    ThreadContext* _bthread_context;
 #endif
 };
 
-class StopCheckLimitThreadMemTracker {
+class StopCheckThreadMemTrackerLimit {
 public:
-    explicit StopCheckLimitThreadMemTracker() {
-        tls_ctx()->_thread_mem_tracker_mgr->update_check_limit(false);
+    explicit StopCheckThreadMemTrackerLimit() {
+        thread_context()->_thread_mem_tracker_mgr->set_check_limit(false);
     }
 
-    ~StopCheckLimitThreadMemTracker() {
-        tls_ctx()->_thread_mem_tracker_mgr->update_check_limit(true);
+    ~StopCheckThreadMemTrackerLimit() {
+        thread_context()->_thread_mem_tracker_mgr->set_check_limit(true);
     }
 };
 
+#define SCOPED_SWITCH_BTHREAD_TLS() auto VARNAME_LINENUM(switch_bthread) = SwitchBthread()
+
+// Attach to task when thread starts
+#define SCOPED_ATTACH_TASK(arg1, ...) \
+    auto VARNAME_LINENUM(attach_task) = AttachTask(arg1, ##__VA_ARGS__)
+
+#define STOP_CHECK_THREAD_MEM_TRACKER_LIMIT() \
+    auto VARNAME_LINENUM(stop_check_limit) = StopCheckThreadMemTrackerLimit()
+
+#define CONSUME_THREAD_MEM_TRACKER(size) \
+    doris::thread_context()->_thread_mem_tracker_mgr->consume(size)
+#define RELEASE_THREAD_MEM_TRACKER(size) \
+    doris::thread_context()->_thread_mem_tracker_mgr->consume(-size)
+#define THREAD_MEM_TRACKER_TRANSFER_TO(size, tracker) \
+    doris::thread_context()->_thread_mem_tracker_mgr->transfer_to(size, tracker)
+#define THREAD_MEM_TRACKER_TRANSFER_FROM(size, tracker) \
+    doris::thread_context()->_thread_mem_tracker_mgr->transfer_from(size, tracker)
+
 } // namespace doris
diff --git a/be/src/runtime/thread_mem_tracker_mgr.cpp b/be/src/runtime/thread_mem_tracker_mgr.cpp
deleted file mode 100644
index d3715b58df..0000000000
--- a/be/src/runtime/thread_mem_tracker_mgr.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "runtime/thread_mem_tracker_mgr.h"
-
-#include "runtime/exec_env.h"
-#include "runtime/fragment_mgr.h"
-#include "runtime/mem_tracker_task_pool.h"
-#include "service/backend_options.h"
-
-namespace doris {
-
-void ThreadMemTrackerMgr::attach_task(const std::string& cancel_msg, const std::string& task_id,
-                                      const TUniqueId& fragment_instance_id,
-                                      const std::shared_ptr& mem_tracker) {
-    DCHECK(switch_count == 0) << print_debug_string();
-    clear_untracked_mems();
-    init();
-    _task_id = task_id;
-    _fragment_instance_id = fragment_instance_id;
-    _consume_err_cb.cancel_msg = cancel_msg;
-    if (mem_tracker == nullptr) {
-#ifdef BE_TEST
-        if (ExecEnv::GetInstance()->task_pool_mem_tracker_registry() == nullptr) {
-            return;
-        }
-#endif
-        std::shared_ptr tracker =
-                ExecEnv::GetInstance()->task_pool_mem_tracker_registry()->get_task_mem_tracker(
-                        task_id);
-        update_tracker(tracker);
-    } else {
-        update_tracker(mem_tracker);
-    }
-}
-
-void ThreadMemTrackerMgr::detach_task() {
-    DCHECK(switch_count == 0) << print_debug_string();
-    _fragment_instance_id = TUniqueId();
-    clear_untracked_mems();
-    init();
-}
-
-void ThreadMemTrackerMgr::exceeded_cancel_task(const std::string& cancel_details) {
-    if (_fragment_instance_id != TUniqueId()) {
-        ExecEnv::GetInstance()->fragment_mgr()->cancel(
-                _fragment_instance_id, PPlanFragmentCancelReason::MEMORY_LIMIT_EXCEED,
-                cancel_details);
-    }
-}
-
-void ThreadMemTrackerMgr::exceeded(int64_t mem_usage, Status st) {
-    if (_consume_err_cb.cb_func != nullptr) {
-        _consume_err_cb.cb_func();
-    }
-    if (is_attach_task()) {
-        if (_consume_err_cb.cancel_task) {
-            auto rst = _mem_trackers[_tracker_id]->mem_limit_exceeded(
-                    nullptr,
-                    fmt::format("Task mem limit exceeded and cancel it, msg:{}",
-                                _consume_err_cb.cancel_msg),
-                    mem_usage, st);
-            exceeded_cancel_task(rst.to_string());
-            _consume_err_cb.cancel_task = false; // Make sure it will only be canceled once
-            _consume_err_cb.log_limit_exceeded = false;
-        }
-    }
-}
-} // namespace doris
diff --git a/be/src/runtime/thread_mem_tracker_mgr.h b/be/src/runtime/thread_mem_tracker_mgr.h
deleted file mode 100644
index ada4651ed3..0000000000
--- a/be/src/runtime/thread_mem_tracker_mgr.h
+++ /dev/null
@@ -1,305 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include 
-#include 
-
-#include "runtime/mem_tracker.h"
-
-namespace doris {
-
-using ERRCALLBACK = void (*)();
-struct ConsumeErrCallBackInfo {
-    std::string cancel_msg;
-    bool cancel_task; // Whether to cancel the task when the current tracker exceeds the limit.
-    ERRCALLBACK cb_func;
-    bool log_limit_exceeded; // Whether to print log_usage of mem tracker when mem limit exceeded.
-
-    ConsumeErrCallBackInfo() { init(); }
-
-    ConsumeErrCallBackInfo(const std::string& cancel_msg, bool cancel_task, ERRCALLBACK cb_func,
-                           bool log_limit_exceeded)
-            : cancel_msg(cancel_msg),
-              cancel_task(cancel_task),
-              cb_func(cb_func),
-              log_limit_exceeded(log_limit_exceeded) {}
-
-    void init() {
-        cancel_msg = "";
-        cancel_task = true;
-        cb_func = nullptr;
-        log_limit_exceeded = true;
-    }
-};
-
-// TCMalloc new/delete Hook is counted in the memory_tracker of the current thread.
-//
-// In the original design, the MemTracker consume method is called before the memory is allocated.
-// If the consume succeeds, the memory is actually allocated, otherwise an exception is thrown.
-// But the statistics of memory through TCMalloc new/delete Hook are after the memory is actually allocated,
-// which is different from the previous behavior. Therefore, when alloc for some large memory,
-// need to manually call consume after stop_mem_tracker, and then start_mem_tracker.
-class ThreadMemTrackerMgr {
-public:
-    ThreadMemTrackerMgr() {}
-
-    ~ThreadMemTrackerMgr() {
-        clear_untracked_mems();
-        _consume_err_cb.init();
-        _mem_trackers.clear();
-        _untracked_mems.clear();
-        _mem_tracker_labels.clear();
-    }
-
-    // After thread initialization, calling `init` again must call `clear_untracked_mems` first
-    // to avoid memory tracking loss.
-    void init();
-
-    void clear_untracked_mems();
-
-    // After attach, the current thread TCMalloc Hook starts to consume/release task mem_tracker
-    void attach_task(const std::string& cancel_msg, const std::string& task_id,
-                     const TUniqueId& fragment_instance_id,
-                     const std::shared_ptr& mem_tracker);
-
-    void detach_task();
-
-    // Must be fast enough! Thread update_tracker may be called very frequently.
-    // So for performance, add tracker as early as possible, and then call update_tracker.
-    template 
-    int64_t update_tracker(const std::shared_ptr& mem_tracker);
-    void update_tracker_id(int64_t tracker_id);
-
-    // Before switching the same tracker multiple times, add tracker as early as possible,
-    // update_tracker can reduce one map find.
-    void add_tracker(const std::shared_ptr& mem_tracker);
-
-    ConsumeErrCallBackInfo update_consume_err_cb(const std::string& cancel_msg, bool cancel_task,
-                                                 ERRCALLBACK cb_func, bool log_limit_exceeded) {
-        _temp_consume_err_cb = _consume_err_cb;
-        _consume_err_cb.cancel_msg = cancel_msg;
-        _consume_err_cb.cancel_task = cancel_task;
-        _consume_err_cb.cb_func = cb_func;
-        _consume_err_cb.log_limit_exceeded = log_limit_exceeded;
-        return _temp_consume_err_cb;
-    }
-
-    void update_consume_err_cb(const ConsumeErrCallBackInfo& consume_err_cb) {
-        _consume_err_cb = consume_err_cb;
-    }
-
-    // Note that, If call the memory allocation operation in TCMalloc new/delete Hook,
-    // such as calling LOG/iostream/sstream/stringstream/etc. related methods,
-    // must increase the control to avoid entering infinite recursion, otherwise it may cause crash or stuck,
-    void cache_consume(int64_t size);
-
-    void noncache_try_consume(int64_t size);
-
-    bool is_attach_task() { return _task_id != ""; }
-
-    std::shared_ptr mem_tracker();
-
-    void update_check_limit(bool check_limit) { _check_limit = check_limit; }
-
-    int64_t switch_count = 0;
-
-    std::string print_debug_string() {
-        fmt::memory_buffer mem_trackers_buf;
-        for (const auto& [key, value] : _mem_trackers) {
-            fmt::format_to(mem_trackers_buf, "{}_{},", std::to_string(key), value->log_usage(1));
-        }
-        fmt::memory_buffer untracked_mems_buf;
-        for (const auto& [key, value] : _untracked_mems) {
-            fmt::format_to(untracked_mems_buf, "{}_{},", std::to_string(key),
-                           std::to_string(value));
-        }
-        fmt::memory_buffer mem_tracker_labels_buf;
-        for (const auto& [key, value] : _mem_tracker_labels) {
-            fmt::format_to(mem_tracker_labels_buf, "{}_{},", std::to_string(key), value);
-        }
-        return fmt::format(
-                "ThreadMemTrackerMgr debug string, _tracker_id:{}, _untracked_mem:{}, _task_id:{}, "
-                "_mem_trackers:<{}>, _untracked_mems:<{}>, _mem_tracker_labels:<{}>",
-                std::to_string(_tracker_id), std::to_string(_untracked_mem), _task_id,
-                fmt::to_string(mem_trackers_buf), fmt::to_string(untracked_mems_buf),
-                fmt::to_string(mem_tracker_labels_buf));
-    }
-
-private:
-    // If tryConsume fails due to task mem tracker exceeding the limit, the task must be canceled
-    void exceeded_cancel_task(const std::string& cancel_details);
-
-    void exceeded(int64_t mem_usage, Status st);
-
-private:
-    // Cache untracked mem, only update to _untracked_mems when switching mem tracker.
-    // Frequent calls to unordered_map _untracked_mems[] in cache_consume will degrade performance.
-    int64_t _untracked_mem = 0;
-
-    // May switch back and forth between multiple trackers frequently. If you use a pointer to save the
-    // current tracker, and consume the current untracked mem each time you switch, there is a performance problem:
-    //  1. The frequent change of the use-count of shared_ptr has a huge cost; (it can also be solved by using
-    //  raw pointers, which requires uniform replacement of the pointers of all mem trackers in doris)
-    //  2. The cost of calling consume for the current untracked mem is huge;
-    // In order to reduce the cost, during an attach task, the untracked mem of all switched trackers is cached,
-    // and the untracked mem is consumed only after the upper limit is reached or when the task is detached.
-    // NOTE: flat_hash_map, int replaces string as key, all to improve the speed of map find,
-    //  the expected speed is increased by more than 10 times.
-    phmap::flat_hash_map> _mem_trackers;
-    phmap::flat_hash_map _untracked_mems;
-    // After the tracker is added to _mem_trackers, if tracker = null is found when using it,
-    // we can confirm the tracker label that was added through _mem_tracker_labels.
-    // Because for performance, all map keys are tracker id.
-    phmap::flat_hash_map _mem_tracker_labels;
-    // If true, call memtracker try_consume, otherwise call consume.
-    bool _check_limit;
-    // If there is a memory new/delete operation in the consume method, it may enter infinite recursion.
-    bool _stop_consume = false;
-
-    int64_t _tracker_id;
-    // Avoid memory allocation in functions.
-    int64_t _temp_tracker_id;
-    ConsumeErrCallBackInfo _temp_consume_err_cb;
-
-    std::string _task_id;
-    TUniqueId _fragment_instance_id;
-    ConsumeErrCallBackInfo _consume_err_cb;
-};
-
-inline void ThreadMemTrackerMgr::init() {
-    _task_id = "";
-    _consume_err_cb.init();
-    _tracker_id = 0;
-    _mem_trackers.clear();
-    _mem_trackers[0] = MemTracker::get_process_tracker();
-    _untracked_mems.clear();
-    _untracked_mems[0] = 0;
-    _mem_tracker_labels.clear();
-    _mem_tracker_labels[0] = MemTracker::get_process_tracker()->label();
-    _check_limit = true;
-}
-
-inline void ThreadMemTrackerMgr::clear_untracked_mems() {
-    for (const auto& untracked_mem : _untracked_mems) {
-        if (untracked_mem.second != 0) {
-            DCHECK(_mem_trackers[untracked_mem.first]) << print_debug_string();
-            _mem_trackers[untracked_mem.first]->consume(untracked_mem.second);
-        }
-    }
-    mem_tracker()->consume(_untracked_mem);
-    _untracked_mem = 0;
-}
-
-template 
-inline int64_t ThreadMemTrackerMgr::update_tracker(const std::shared_ptr& mem_tracker) {
-    DCHECK(mem_tracker) << print_debug_string();
-    _temp_tracker_id = mem_tracker->id();
-    if (_temp_tracker_id == _tracker_id) {
-        return _tracker_id;
-    }
-    if (Existed) {
-        DCHECK(_mem_trackers.find(_temp_tracker_id) != _mem_trackers.end()) << print_debug_string();
-    } else {
-        // If the tracker has already been added, avoid `_untracked_mems[x] = 0;` again causing the memory track to be lost.
-        if (_mem_trackers.find(_temp_tracker_id) == _mem_trackers.end()) {
-            _mem_trackers[_temp_tracker_id] = mem_tracker;
-            DCHECK(_mem_trackers[_temp_tracker_id]) << print_debug_string();
-            _untracked_mems[_temp_tracker_id] = 0;
-            _mem_tracker_labels[_temp_tracker_id] = mem_tracker->label();
-        }
-    }
-
-    DCHECK(_mem_trackers.find(_tracker_id) != _mem_trackers.end()) << print_debug_string();
-    DCHECK(_mem_trackers[_tracker_id]) << print_debug_string();
-    _untracked_mems[_tracker_id] += _untracked_mem;
-    _untracked_mem = 0;
-    std::swap(_tracker_id, _temp_tracker_id);
-    DCHECK(_mem_trackers[_tracker_id]) << print_debug_string();
-    return _temp_tracker_id; // old tracker_id
-}
-
-inline void ThreadMemTrackerMgr::update_tracker_id(int64_t tracker_id) {
-    DCHECK(switch_count >= 0) << print_debug_string();
-    if (tracker_id != _tracker_id) {
-        _untracked_mems[_tracker_id] += _untracked_mem;
-        _untracked_mem = 0;
-        _tracker_id = tracker_id;
-        DCHECK(_untracked_mems.find(_tracker_id) != _untracked_mems.end()) << print_debug_string();
-        DCHECK(_mem_trackers[_tracker_id]) << print_debug_string();
-    }
-}
-
-inline void ThreadMemTrackerMgr::cache_consume(int64_t size) {
-    _untracked_mem += size;
-    // When some threads `0 < _untracked_mem < config::mem_tracker_consume_min_size_bytes`
-    // and some threads `_untracked_mem <= -config::mem_tracker_consume_min_size_bytes` trigger consumption(),
-    // it will cause tracker->consumption to be temporarily less than 0.
-    //
-    // Temporary memory may be allocated during the consumption of the mem tracker (in the processing logic of
-    // the exceeded limit), which will lead to entering the TCMalloc Hook again, so suspend consumption to avoid
-    // falling into an infinite loop.
-    if ((_untracked_mem >= config::mem_tracker_consume_min_size_bytes ||
-         _untracked_mem <= -config::mem_tracker_consume_min_size_bytes) &&
-        !_stop_consume) {
-        _stop_consume = true;
-        DCHECK(_untracked_mems.find(_tracker_id) != _untracked_mems.end()) << print_debug_string();
-        // When switching to the current tracker last time, the remaining untracked memory.
-        if (_untracked_mems[_tracker_id] != 0) {
-            _untracked_mem += _untracked_mems[_tracker_id];
-            _untracked_mems[_tracker_id] = 0;
-        }
-        if (_check_limit) {
-            noncache_try_consume(_untracked_mem);
-        } else {
-            mem_tracker()->consume(_untracked_mem);
-        }
-        _untracked_mem = 0;
-        _stop_consume = false;
-    }
-}
-
-inline void ThreadMemTrackerMgr::noncache_try_consume(int64_t size) {
-    Status st = mem_tracker()->try_consume(size);
-    if (!st) {
-        // The memory has been allocated, so when TryConsume fails, need to continue to complete
-        // the consume to ensure the accuracy of the statistics.
-        mem_tracker()->consume(size);
-        exceeded(size, st);
-    }
-}
-
-inline void ThreadMemTrackerMgr::add_tracker(const std::shared_ptr& mem_tracker) {
-#ifdef USE_MEM_TRACKER
-    DCHECK(_mem_trackers.find(mem_tracker->id()) == _mem_trackers.end()) << print_debug_string();
-    _mem_trackers[mem_tracker->id()] = mem_tracker;
-    DCHECK(_mem_trackers[mem_tracker->id()]) << print_debug_string();
-    _untracked_mems[mem_tracker->id()] = 0;
-    _mem_tracker_labels[mem_tracker->id()] = mem_tracker->label();
-#endif
-}
-
-inline std::shared_ptr ThreadMemTrackerMgr::mem_tracker() {
-    // Whether the key _tracker_id exists in _mem_trackers.
-    DCHECK(_mem_trackers.find(_tracker_id) != _mem_trackers.end()) << print_debug_string();
-    // If the key _tracker_id exists in _mem_trackers, check whether the value is null.
-    DCHECK(_mem_trackers[_tracker_id]) << print_debug_string();
-    return _mem_trackers[_tracker_id];
-}
-
-} // namespace doris
diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp
index af65cab0e8..68cff8e08a 100644
--- a/be/src/service/doris_main.cpp
+++ b/be/src/service/doris_main.cpp
@@ -51,6 +51,7 @@
 #include "olap/storage_engine.h"
 #include "runtime/exec_env.h"
 #include "runtime/heartbeat_flags.h"
+#include "runtime/memory/mem_tracker_task_pool.h"
 #include "service/backend_options.h"
 #include "service/backend_service.h"
 #include "service/brpc_service.h"
@@ -64,11 +65,6 @@
 #include "util/thrift_server.h"
 #include "util/uid_util.h"
 
-#if !defined(__SANITIZE_ADDRESS__) && !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && \
-        !defined(THREAD_SANITIZER) && !defined(USE_JEMALLOC)
-#include "runtime/tcmalloc_hook.h"
-#endif
-
 static void help(const char*);
 
 #include 
@@ -333,11 +329,6 @@ int main(int argc, char** argv) {
         fprintf(stderr, "Failed to change TCMalloc total thread cache size.\n");
         return -1;
     }
-#if defined(USE_MEM_TRACKER) && !defined(USE_JEMALLOC)
-    if (doris::config::track_new_delete) {
-        init_hook();
-    }
-#endif // USE_MEM_TRACKER
 #endif
 
     std::vector paths;
@@ -384,6 +375,10 @@ int main(int argc, char** argv) {
         exit(-1);
     }
 
+    // init exec env
+    auto exec_env = doris::ExecEnv::GetInstance();
+    doris::ExecEnv::init(exec_env, paths);
+
     // init and open storage engine
     doris::EngineOptions options;
     options.store_paths = paths;
@@ -394,10 +389,6 @@ int main(int argc, char** argv) {
         LOG(FATAL) << "fail to open StorageEngine, res=" << st.get_error_msg();
         exit(-1);
     }
-
-    // init exec env
-    auto exec_env = doris::ExecEnv::GetInstance();
-    doris::ExecEnv::init(exec_env, paths);
     exec_env->set_storage_engine(engine);
     engine->set_heartbeat_flags(exec_env->heartbeat_flags());
 
diff --git a/be/src/service/http_service.cpp b/be/src/service/http_service.cpp
index 42c0002ac4..ce9919f3b4 100644
--- a/be/src/service/http_service.cpp
+++ b/be/src/service/http_service.cpp
@@ -53,7 +53,7 @@ HttpService::HttpService(ExecEnv* env, int port, int num_threads)
 HttpService::~HttpService() {}
 
 Status HttpService::start() {
-    add_default_path_handlers(_web_page_handler.get(), MemTracker::get_process_tracker());
+    add_default_path_handlers(_web_page_handler.get());
 
     // register load
     StreamLoadAction* streamload_action = _pool.add(new StreamLoadAction(_env));
diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp
index 38885c68e2..7603beacff 100644
--- a/be/src/service/internal_service.cpp
+++ b/be/src/service/internal_service.cpp
@@ -26,6 +26,7 @@
 #include "runtime/fold_constant_executor.h"
 #include "runtime/fragment_mgr.h"
 #include "runtime/load_channel_mgr.h"
+#include "runtime/memory/mem_tracker_task_pool.h"
 #include "runtime/result_buffer_mgr.h"
 #include "runtime/routine_load/routine_load_task_executor.h"
 #include "runtime/runtime_state.h"
@@ -89,7 +90,7 @@ void PInternalServiceImpl::transmit_data(google::protobuf::RpcController* cntl_b
                                          const PTransmitDataParams* request,
                                          PTransmitDataResult* response,
                                          google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     // TODO(zxy) delete in 1.2 version
     brpc::Controller* cntl = static_cast(cntl_base);
     attachment_transfer_request_row_batch(request, cntl);
@@ -101,7 +102,7 @@ void PInternalServiceImpl::transmit_data_by_http(google::protobuf::RpcController
                                                  const PEmptyRequest* request,
                                                  PTransmitDataResult* response,
                                                  google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     PTransmitDataParams* request_raw = new PTransmitDataParams();
     google::protobuf::Closure* done_raw =
             new NewHttpClosure(request_raw, done);
@@ -117,20 +118,20 @@ void PInternalServiceImpl::_transmit_data(google::protobuf::RpcController* cntl_
                                           const Status& extract_st) {
     std::string query_id;
     TUniqueId finst_id;
-    std::shared_ptr query_tracker;
+    std::unique_ptr transmit_tracker;
     if (request->has_query_id()) {
         query_id = print_id(request->query_id());
         finst_id.__set_hi(request->finst_id().hi());
         finst_id.__set_lo(request->finst_id().lo());
         // In some cases, query mem tracker does not exist in BE when transmit block, will get null pointer.
-        query_tracker = _exec_env->task_pool_mem_tracker_registry()->get_task_mem_tracker(query_id);
+        transmit_tracker = std::make_unique(
+                -1, fmt::format("QueryTransmit#queryId={}", query_id),
+                _exec_env->task_pool_mem_tracker_registry()->get_task_mem_tracker(query_id));
     } else {
-        query_id = "default_transmit_data";
+        query_id = "unkown_transmit_data";
+        transmit_tracker = std::make_unique(-1, "unkown_transmit_data");
     }
-    if (!query_tracker) {
-        query_tracker = ExecEnv::GetInstance()->query_pool_mem_tracker();
-    }
-    SCOPED_ATTACH_TASK_THREAD(ThreadContext::TaskType::QUERY, query_id, finst_id, query_tracker);
+    SCOPED_ATTACH_TASK(transmit_tracker.get(), ThreadContext::TaskType::QUERY, query_id, finst_id);
     VLOG_ROW << "transmit data: fragment_instance_id=" << print_id(request->finst_id())
              << " query_id=" << query_id << " node=" << request->node_id();
     // The response is accessed when done->Run is called in transmit_data(),
@@ -157,7 +158,7 @@ void PInternalServiceImpl::tablet_writer_open(google::protobuf::RpcController* c
                                               const PTabletWriterOpenRequest* request,
                                               PTabletWriterOpenResult* response,
                                               google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     VLOG_RPC << "tablet writer open, id=" << request->id() << ", index_id=" << request->index_id()
              << ", txn_id=" << request->txn_id();
     brpc::ClosureGuard closure_guard(done);
@@ -176,7 +177,7 @@ void PInternalServiceImpl::exec_plan_fragment(google::protobuf::RpcController* c
                                               google::protobuf::Closure* done) {
     auto span = telemetry::start_rpc_server_span("exec_plan_fragment", cntl_base);
     auto scope = OpentelemetryScope {span};
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     auto st = Status::OK();
     bool compact = request->has_compact() ? request->compact() : false;
@@ -202,7 +203,7 @@ void PInternalServiceImpl::exec_plan_fragment_start(google::protobuf::RpcControl
                                                     google::protobuf::Closure* done) {
     auto span = telemetry::start_rpc_server_span("exec_plan_fragment_start", controller);
     auto scope = OpentelemetryScope {span};
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     auto st = _exec_env->fragment_mgr()->start_query_execution(request);
     st.to_protobuf(result->mutable_status());
@@ -212,7 +213,7 @@ void PInternalServiceImpl::tablet_writer_add_block(google::protobuf::RpcControll
                                                    const PTabletWriterAddBlockRequest* request,
                                                    PTabletWriterAddBlockResult* response,
                                                    google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     // TODO(zxy) delete in 1.2 version
     brpc::Controller* cntl = static_cast(cntl_base);
     attachment_transfer_request_block(request, cntl);
@@ -223,7 +224,7 @@ void PInternalServiceImpl::tablet_writer_add_block(google::protobuf::RpcControll
 void PInternalServiceImpl::tablet_writer_add_block_by_http(
         google::protobuf::RpcController* cntl_base, const ::doris::PEmptyRequest* request,
         PTabletWriterAddBlockResult* response, google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     PTabletWriterAddBlockRequest* request_raw = new PTabletWriterAddBlockRequest();
     google::protobuf::Closure* done_raw =
             new NewHttpClosure(request_raw, done);
@@ -270,14 +271,14 @@ void PInternalServiceImpl::tablet_writer_add_batch(google::protobuf::RpcControll
                                                    const PTabletWriterAddBatchRequest* request,
                                                    PTabletWriterAddBatchResult* response,
                                                    google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     _tablet_writer_add_batch(cntl_base, request, response, done);
 }
 
 void PInternalServiceImpl::tablet_writer_add_batch_by_http(
         google::protobuf::RpcController* cntl_base, const ::doris::PEmptyRequest* request,
         PTabletWriterAddBatchResult* response, google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     PTabletWriterAddBatchRequest* request_raw = new PTabletWriterAddBatchRequest();
     google::protobuf::Closure* done_raw =
             new NewHttpClosure(request_raw, done);
@@ -330,7 +331,7 @@ void PInternalServiceImpl::tablet_writer_cancel(google::protobuf::RpcController*
                                                 const PTabletWriterCancelRequest* request,
                                                 PTabletWriterCancelResult* response,
                                                 google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     VLOG_RPC << "tablet writer cancel, id=" << request->id() << ", index_id=" << request->index_id()
              << ", sender_id=" << request->sender_id();
     brpc::ClosureGuard closure_guard(done);
@@ -376,7 +377,7 @@ void PInternalServiceImpl::cancel_plan_fragment(google::protobuf::RpcController*
                                                 google::protobuf::Closure* done) {
     auto span = telemetry::start_rpc_server_span("exec_plan_fragment_start", cntl_base);
     auto scope = OpentelemetryScope {span};
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     TUniqueId tid;
     tid.__set_hi(request->finst_id().hi());
@@ -400,7 +401,7 @@ void PInternalServiceImpl::cancel_plan_fragment(google::protobuf::RpcController*
 void PInternalServiceImpl::fetch_data(google::protobuf::RpcController* cntl_base,
                                       const PFetchDataRequest* request, PFetchDataResult* result,
                                       google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::Controller* cntl = static_cast(cntl_base);
     GetResultBatchCtx* ctx = new GetResultBatchCtx(cntl, result, done);
     _exec_env->result_mgr()->fetch_data(request->finst_id(), ctx);
@@ -409,7 +410,7 @@ void PInternalServiceImpl::fetch_data(google::protobuf::RpcController* cntl_base
 void PInternalServiceImpl::get_info(google::protobuf::RpcController* controller,
                                     const PProxyRequest* request, PProxyResult* response,
                                     google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     // PProxyRequest is defined in gensrc/proto/internal_service.proto
     // Currently it supports 2 kinds of requests:
@@ -470,7 +471,7 @@ void PInternalServiceImpl::get_info(google::protobuf::RpcController* controller,
 void PInternalServiceImpl::update_cache(google::protobuf::RpcController* controller,
                                         const PUpdateCacheRequest* request,
                                         PCacheResponse* response, google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     _exec_env->result_cache()->update(request, response);
 }
@@ -478,7 +479,7 @@ void PInternalServiceImpl::update_cache(google::protobuf::RpcController* control
 void PInternalServiceImpl::fetch_cache(google::protobuf::RpcController* controller,
                                        const PFetchCacheRequest* request, PFetchCacheResult* result,
                                        google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     _exec_env->result_cache()->fetch(request, result);
 }
@@ -486,7 +487,7 @@ void PInternalServiceImpl::fetch_cache(google::protobuf::RpcController* controll
 void PInternalServiceImpl::clear_cache(google::protobuf::RpcController* controller,
                                        const PClearCacheRequest* request, PCacheResponse* response,
                                        google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     _exec_env->result_cache()->clear(request, response);
 }
@@ -495,7 +496,7 @@ void PInternalServiceImpl::merge_filter(::google::protobuf::RpcController* contr
                                         const ::doris::PMergeFilterRequest* request,
                                         ::doris::PMergeFilterResponse* response,
                                         ::google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     auto buf = static_cast(controller)->request_attachment();
     Status st = _exec_env->fragment_mgr()->merge_filter(request, buf.to_string().data());
@@ -509,7 +510,7 @@ void PInternalServiceImpl::apply_filter(::google::protobuf::RpcController* contr
                                         const ::doris::PPublishFilterRequest* request,
                                         ::doris::PPublishFilterResponse* response,
                                         ::google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     auto attachment = static_cast(controller)->request_attachment();
     UniqueId unique_id(request->query_id());
@@ -525,7 +526,7 @@ void PInternalServiceImpl::apply_filter(::google::protobuf::RpcController* contr
 void PInternalServiceImpl::send_data(google::protobuf::RpcController* controller,
                                      const PSendDataRequest* request, PSendDataResult* response,
                                      google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     TUniqueId fragment_instance_id;
     fragment_instance_id.hi = request->fragment_instance_id().hi();
@@ -548,7 +549,7 @@ void PInternalServiceImpl::send_data(google::protobuf::RpcController* controller
 void PInternalServiceImpl::commit(google::protobuf::RpcController* controller,
                                   const PCommitRequest* request, PCommitResult* response,
                                   google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     TUniqueId fragment_instance_id;
     fragment_instance_id.hi = request->fragment_instance_id().hi();
@@ -566,7 +567,7 @@ void PInternalServiceImpl::commit(google::protobuf::RpcController* controller,
 void PInternalServiceImpl::rollback(google::protobuf::RpcController* controller,
                                     const PRollbackRequest* request, PRollbackResult* response,
                                     google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     TUniqueId fragment_instance_id;
     fragment_instance_id.hi = request->fragment_instance_id().hi();
@@ -585,7 +586,7 @@ void PInternalServiceImpl::fold_constant_expr(google::protobuf::RpcController* c
                                               const PConstantExprRequest* request,
                                               PConstantExprResult* response,
                                               google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     brpc::Controller* cntl = static_cast(cntl_base);
 
@@ -620,7 +621,7 @@ void PInternalServiceImpl::transmit_block(google::protobuf::RpcController* cntl_
                                           const PTransmitDataParams* request,
                                           PTransmitDataResult* response,
                                           google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     // TODO(zxy) delete in 1.2 version
     brpc::Controller* cntl = static_cast(cntl_base);
     attachment_transfer_request_block(request, cntl);
@@ -632,7 +633,7 @@ void PInternalServiceImpl::transmit_block_by_http(google::protobuf::RpcControlle
                                                   const PEmptyRequest* request,
                                                   PTransmitDataResult* response,
                                                   google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     PTransmitDataParams* request_raw = new PTransmitDataParams();
     google::protobuf::Closure* done_raw =
             new NewHttpClosure(request_raw, done);
@@ -648,20 +649,20 @@ void PInternalServiceImpl::_transmit_block(google::protobuf::RpcController* cntl
                                            const Status& extract_st) {
     std::string query_id;
     TUniqueId finst_id;
-    std::shared_ptr query_tracker;
+    std::unique_ptr transmit_tracker;
     if (request->has_query_id()) {
         query_id = print_id(request->query_id());
         finst_id.__set_hi(request->finst_id().hi());
         finst_id.__set_lo(request->finst_id().lo());
         // In some cases, query mem tracker does not exist in BE when transmit block, will get null pointer.
-        query_tracker = _exec_env->task_pool_mem_tracker_registry()->get_task_mem_tracker(query_id);
+        transmit_tracker = std::make_unique(
+                -1, fmt::format("QueryTransmit#queryId={}", query_id),
+                _exec_env->task_pool_mem_tracker_registry()->get_task_mem_tracker(query_id));
     } else {
-        query_id = "default_transmit_block";
+        query_id = "unkown_transmit_block";
+        transmit_tracker = std::make_unique(-1, "unkown_transmit_block");
     }
-    if (!query_tracker) {
-        query_tracker = ExecEnv::GetInstance()->query_pool_mem_tracker();
-    }
-    SCOPED_ATTACH_TASK_THREAD(ThreadContext::TaskType::QUERY, query_id, finst_id, query_tracker);
+    SCOPED_ATTACH_TASK(transmit_tracker.get(), ThreadContext::TaskType::QUERY, query_id, finst_id);
     VLOG_ROW << "transmit block: fragment_instance_id=" << print_id(request->finst_id())
              << " query_id=" << query_id << " node=" << request->node_id();
     // The response is accessed when done->Run is called in transmit_block(),
@@ -688,7 +689,7 @@ void PInternalServiceImpl::check_rpc_channel(google::protobuf::RpcController* co
                                              const PCheckRPCChannelRequest* request,
                                              PCheckRPCChannelResponse* response,
                                              google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     response->mutable_status()->set_status_code(0);
     if (request->data().size() != request->size()) {
@@ -715,7 +716,7 @@ void PInternalServiceImpl::reset_rpc_channel(google::protobuf::RpcController* co
                                              const PResetRPCChannelRequest* request,
                                              PResetRPCChannelResponse* response,
                                              google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     response->mutable_status()->set_status_code(0);
     if (request->all()) {
@@ -749,7 +750,7 @@ void PInternalServiceImpl::hand_shake(google::protobuf::RpcController* cntl_base
                                       const PHandShakeRequest* request,
                                       PHandShakeResponse* response,
                                       google::protobuf::Closure* done) {
-    SCOPED_SWITCH_BTHREAD();
+    SCOPED_SWITCH_BTHREAD_TLS();
     brpc::ClosureGuard closure_guard(done);
     if (request->has_hello()) {
         response->set_hello(request->hello());
diff --git a/be/src/util/doris_metrics.cpp b/be/src/util/doris_metrics.cpp
index 4216ea5942..a4c7a80cf0 100644
--- a/be/src/util/doris_metrics.cpp
+++ b/be/src/util/doris_metrics.cpp
@@ -135,8 +135,8 @@ DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(memtable_flush_total, MetricUnit::OPERATION
 DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(memtable_flush_duration_us, MetricUnit::MICROSECONDS);
 
 DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(attach_task_thread_count, MetricUnit::NOUNIT);
-DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(switch_thread_mem_tracker_count, MetricUnit::NOUNIT);
-DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(switch_thread_mem_tracker_err_cb_count, MetricUnit::NOUNIT);
+DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(add_thread_mem_tracker_consumer_count, MetricUnit::NOUNIT);
+DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(thread_mem_tracker_exceed_call_back_count, MetricUnit::NOUNIT);
 DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(switch_bthread_count, MetricUnit::NOUNIT);
 
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(memory_pool_bytes_total, MetricUnit::BYTES);
@@ -291,8 +291,8 @@ DorisMetrics::DorisMetrics() : _metric_registry(_s_registry_name) {
     INT_COUNTER_METRIC_REGISTER(_server_metric_entity, load_bytes);
 
     INT_COUNTER_METRIC_REGISTER(_server_metric_entity, attach_task_thread_count);
-    INT_COUNTER_METRIC_REGISTER(_server_metric_entity, switch_thread_mem_tracker_count);
-    INT_COUNTER_METRIC_REGISTER(_server_metric_entity, switch_thread_mem_tracker_err_cb_count);
+    INT_COUNTER_METRIC_REGISTER(_server_metric_entity, add_thread_mem_tracker_consumer_count);
+    INT_COUNTER_METRIC_REGISTER(_server_metric_entity, thread_mem_tracker_exceed_call_back_count);
     INT_COUNTER_METRIC_REGISTER(_server_metric_entity, switch_bthread_count);
 
     INT_UGAUGE_METRIC_REGISTER(_server_metric_entity, upload_total_byte);
diff --git a/be/src/util/doris_metrics.h b/be/src/util/doris_metrics.h
index 20e00c3c05..2811da069b 100644
--- a/be/src/util/doris_metrics.h
+++ b/be/src/util/doris_metrics.h
@@ -127,8 +127,8 @@ public:
     IntCounter* memtable_flush_duration_us;
 
     IntCounter* attach_task_thread_count;
-    IntCounter* switch_thread_mem_tracker_count;
-    IntCounter* switch_thread_mem_tracker_err_cb_count;
+    IntCounter* add_thread_mem_tracker_consumer_count;
+    IntCounter* thread_mem_tracker_exceed_call_back_count;
     // brpc server response count
     IntCounter* switch_bthread_count;
 
diff --git a/be/src/util/file_utils.cpp b/be/src/util/file_utils.cpp
index dc6a6195b0..014d344f0c 100644
--- a/be/src/util/file_utils.cpp
+++ b/be/src/util/file_utils.cpp
@@ -193,13 +193,13 @@ Status FileUtils::md5sum(const std::string& file, std::string* md5sum) {
         return Status::InternalError("failed to stat file");
     }
     size_t file_len = statbuf.st_size;
-    CONSUME_THREAD_LOCAL_MEM_TRACKER(file_len);
+    CONSUME_THREAD_MEM_TRACKER(file_len);
     void* buf = mmap(0, file_len, PROT_READ, MAP_SHARED, fd, 0);
 
     unsigned char result[MD5_DIGEST_LENGTH];
     MD5((unsigned char*)buf, file_len, result);
     munmap(buf, file_len);
-    RELEASE_THREAD_LOCAL_MEM_TRACKER(file_len);
+    RELEASE_THREAD_MEM_TRACKER(file_len);
 
     std::stringstream ss;
     for (int32_t i = 0; i < MD5_DIGEST_LENGTH; i++) {
diff --git a/be/src/vec/columns/predicate_column.h b/be/src/vec/columns/predicate_column.h
index 0565ad0ff9..d99ee9a12c 100644
--- a/be/src/vec/columns/predicate_column.h
+++ b/be/src/vec/columns/predicate_column.h
@@ -264,7 +264,7 @@ public:
                                  uint32_t* start_offset_array, size_t num) override {
         if constexpr (std::is_same_v) {
             if (_pool == nullptr) {
-                _pool.reset(new MemPool("PredicateStringColumn"));
+                _pool.reset(new MemPool());
             }
 
             size_t total_mem_size = 0;
diff --git a/be/src/vec/common/allocator.h b/be/src/vec/common/allocator.h
index 835aeb172c..4c96a375a5 100644
--- a/be/src/vec/common/allocator.h
+++ b/be/src/vec/common/allocator.h
@@ -124,10 +124,10 @@ public:
                                 alignment, size),
                         doris::TStatusCode::VEC_BAD_ARGUMENTS);
 
-            CONSUME_THREAD_LOCAL_MEM_TRACKER(size);
+            CONSUME_THREAD_MEM_TRACKER(size);
             buf = mmap(get_mmap_hint(), size, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
             if (MAP_FAILED == buf) {
-                RELEASE_THREAD_LOCAL_MEM_TRACKER(size);
+                RELEASE_THREAD_MEM_TRACKER(size);
                 doris::vectorized::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", size),
                                                   doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY);
             }
@@ -175,7 +175,7 @@ public:
                 doris::vectorized::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", size),
                                                   doris::TStatusCode::VEC_CANNOT_MUNMAP);
             } else {
-                RELEASE_THREAD_LOCAL_MEM_TRACKER(size);
+                RELEASE_THREAD_MEM_TRACKER(size);
             }
         } else if (size >= CHUNK_THRESHOLD) {
             doris::ChunkAllocator::instance()->free((uint8_t*)buf, size);
@@ -208,13 +208,13 @@ public:
                     memset(reinterpret_cast(buf) + old_size, 0, new_size - old_size);
         } else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD) {
             /// Resize mmap'd memory region.
-            CONSUME_THREAD_LOCAL_MEM_TRACKER(new_size - old_size);
+            CONSUME_THREAD_MEM_TRACKER(new_size - old_size);
 
             // On apple and freebsd self-implemented mremap used (common/mremap.h)
             buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE,
                                     mmap_flags, -1, 0);
             if (MAP_FAILED == buf) {
-                RELEASE_THREAD_LOCAL_MEM_TRACKER(new_size - old_size);
+                RELEASE_THREAD_MEM_TRACKER(new_size - old_size);
                 doris::vectorized::throwFromErrno("Allocator: Cannot mremap memory chunk from " +
                                                           std::to_string(old_size) + " to " +
                                                           std::to_string(new_size) + ".",
diff --git a/be/src/vec/exec/file_scan_node.cpp b/be/src/vec/exec/file_scan_node.cpp
index 1141d603f3..87219f9326 100644
--- a/be/src/vec/exec/file_scan_node.cpp
+++ b/be/src/vec/exec/file_scan_node.cpp
@@ -19,7 +19,7 @@
 
 #include "common/config.h"
 #include "gen_cpp/PlanNodes_types.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/runtime_filter_mgr.h"
 #include "runtime/runtime_state.h"
 #include "runtime/string_value.h"
@@ -80,7 +80,7 @@ Status FileScanNode::init(const TPlanNode& tnode, RuntimeState* state) {
 Status FileScanNode::prepare(RuntimeState* state) {
     VLOG_QUERY << "FileScanNode prepare";
     RETURN_IF_ERROR(ScanNode::prepare(state));
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     // get tuple desc
     _runtime_state = state;
     _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id);
@@ -111,8 +111,8 @@ Status FileScanNode::prepare(RuntimeState* state) {
 
 Status FileScanNode::open(RuntimeState* state) {
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
     RETURN_IF_ERROR(ExecNode::open(state));
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     RETURN_IF_CANCELLED(state);
 
     RETURN_IF_ERROR(_acquire_and_build_runtime_filter(state));
@@ -147,7 +147,7 @@ Status FileScanNode::_acquire_and_build_runtime_filter(RuntimeState* state) {
             // std::list expr_context;
             // RETURN_IF_ERROR(runtime_filter->get_push_expr_ctxs(&expr_context));
             // for (auto ctx : expr_context) {
-            //     ctx->prepare(state, row_desc(), _expr_mem_tracker);
+            //     ctx->prepare(state, row_desc());
             //     ctx->open(state);
             //     int index = _conjunct_ctxs.size();
             //     _conjunct_ctxs.push_back(ctx);
@@ -164,7 +164,7 @@ Status FileScanNode::_acquire_and_build_runtime_filter(RuntimeState* state) {
         }
         IRuntimeFilter* runtime_filter = _runtime_filter_ctxs[i].runtimefilter;
         std::vector vexprs;
-        runtime_filter->get_prepared_vexprs(&vexprs, row_desc(), _expr_mem_tracker);
+        runtime_filter->get_prepared_vexprs(&vexprs, row_desc());
         if (vexprs.empty()) {
             continue;
         }
@@ -180,7 +180,7 @@ Status FileScanNode::_acquire_and_build_runtime_filter(RuntimeState* state) {
             last_expr = new_node;
         }
         auto new_vconjunct_ctx_ptr = _pool->add(new VExprContext(last_expr));
-        auto expr_status = new_vconjunct_ctx_ptr->prepare(state, row_desc(), expr_mem_tracker());
+        auto expr_status = new_vconjunct_ctx_ptr->prepare(state, row_desc());
         if (UNLIKELY(!expr_status.OK())) {
             LOG(WARNING) << "Something wrong for runtime filters: " << expr_status;
             vexprs.clear();
@@ -403,7 +403,10 @@ Status FileScanNode::scanner_scan(const TFileScanRange& scan_range, ScannerCount
                // 1. too many batches in queue, or
                // 2. at least one batch in queue and memory exceed limit.
                (_block_queue.size() >= _max_buffered_batches ||
-                (mem_tracker()->any_limit_exceeded() && !_block_queue.empty()))) {
+                (thread_context()
+                         ->_thread_mem_tracker_mgr->limiter_mem_tracker()
+                         ->any_limit_exceeded() &&
+                 !_block_queue.empty()))) {
             _queue_writer_cond.wait_for(l, std::chrono::seconds(1));
         }
         // Process already set failed, so we just return OK
diff --git a/be/src/vec/exec/file_scanner.cpp b/be/src/vec/exec/file_scanner.cpp
index 7de01a843f..a0f473ffc9 100644
--- a/be/src/vec/exec/file_scanner.cpp
+++ b/be/src/vec/exec/file_scanner.cpp
@@ -27,7 +27,6 @@
 #include "exec/text_converter.hpp"
 #include "exprs/expr_context.h"
 #include "runtime/descriptors.h"
-#include "runtime/mem_tracker.h"
 #include "runtime/raw_value.h"
 #include "runtime/runtime_state.h"
 #include "runtime/tuple.h"
@@ -43,15 +42,7 @@ FileScanner::FileScanner(RuntimeState* state, RuntimeProfile* profile,
           _ranges(ranges),
           _next_range(0),
           _counter(counter),
-#if BE_TEST
-          _mem_tracker(new MemTracker()),
-#else
-          _mem_tracker(MemTracker::create_tracker(
-                  -1, state->query_type() == TQueryType::LOAD
-                              ? "FileScanner:" + std::to_string(state->load_job_id())
-                              : "FileScanner:Select")),
-#endif
-          _mem_pool(std::make_unique(_mem_tracker.get())),
+          _mem_pool(std::make_unique()),
           _pre_filter_texprs(pre_filter_texprs),
           _profile(profile),
           _rows_read_counter(nullptr),
@@ -125,7 +116,7 @@ Status FileScanner::_init_expr_ctxes() {
         _vpre_filter_ctx_ptr.reset(new doris::vectorized::VExprContext*);
         RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(
                 _state->obj_pool(), _pre_filter_texprs[0], _vpre_filter_ctx_ptr.get()));
-        RETURN_IF_ERROR((*_vpre_filter_ctx_ptr)->prepare(_state, *_row_desc, _mem_tracker));
+        RETURN_IF_ERROR((*_vpre_filter_ctx_ptr)->prepare(_state, *_row_desc));
         RETURN_IF_ERROR((*_vpre_filter_ctx_ptr)->open(_state));
     }
 
diff --git a/be/src/vec/exec/file_scanner.h b/be/src/vec/exec/file_scanner.h
index 66940e8b37..16e75aefc0 100644
--- a/be/src/vec/exec/file_scanner.h
+++ b/be/src/vec/exec/file_scanner.h
@@ -76,7 +76,6 @@ protected:
 
     std::unique_ptr _row_desc;
 
-    std::shared_ptr _mem_tracker;
     // Mem pool used to allocate _src_tuple and _src_tuple_row
     std::unique_ptr _mem_pool;
 
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index 92e8609244..84ebb07033 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -19,7 +19,7 @@
 
 #include "gen_cpp/PlanNodes_types.h"
 #include "gutil/strings/substitute.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/runtime_filter_mgr.h"
 #include "util/defer_op.h"
 #include "vec/core/materialize_block.h"
@@ -60,7 +60,6 @@ struct ProcessHashTableBuild {
         Defer defer {[&]() {
             int64_t bucket_size = hash_table_ctx.hash_table.get_buffer_size_in_cells();
             int64_t bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes();
-            _join_node->_hash_table_mem_tracker->consume(bucket_bytes - old_bucket_bytes);
             _join_node->_mem_used += bucket_bytes - old_bucket_bytes;
             COUNTER_SET(_join_node->_build_buckets_counter, bucket_size);
         }};
@@ -782,8 +781,7 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) {
 
 Status HashJoinNode::prepare(RuntimeState* state) {
     RETURN_IF_ERROR(ExecNode::prepare(state));
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
-    _hash_table_mem_tracker = MemTracker::create_virtual_tracker(-1, "VSetOperationNode:HashTable");
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
 
     // Build phase
     auto build_phase_profile = runtime_profile()->create_child("BuildPhase", true, true);
@@ -809,16 +807,13 @@ Status HashJoinNode::prepare(RuntimeState* state) {
     _push_compute_timer = ADD_TIMER(runtime_profile(), "PushDownComputeTime");
     _build_buckets_counter = ADD_COUNTER(runtime_profile(), "BuildBuckets", TUnit::UNIT);
 
-    RETURN_IF_ERROR(
-            VExpr::prepare(_build_expr_ctxs, state, child(1)->row_desc(), expr_mem_tracker()));
-    RETURN_IF_ERROR(
-            VExpr::prepare(_probe_expr_ctxs, state, child(0)->row_desc(), expr_mem_tracker()));
+    RETURN_IF_ERROR(VExpr::prepare(_build_expr_ctxs, state, child(1)->row_desc()));
+    RETURN_IF_ERROR(VExpr::prepare(_probe_expr_ctxs, state, child(0)->row_desc()));
 
     // _vother_join_conjuncts are evaluated in the context of the rows produced by this node
     if (_vother_join_conjunct_ptr) {
         RETURN_IF_ERROR(
-                (*_vother_join_conjunct_ptr)
-                        ->prepare(state, _row_desc_for_other_join_conjunt, expr_mem_tracker()));
+                (*_vother_join_conjunct_ptr)->prepare(state, _row_desc_for_other_join_conjunt));
     }
     // right table data types
     _right_table_data_types = VectorizedUtils::get_data_types(child(1)->row_desc());
@@ -844,8 +839,6 @@ Status HashJoinNode::close(RuntimeState* state) {
         (*_vother_join_conjunct_ptr)->close(state);
     }
 
-    _hash_table_mem_tracker->release(_mem_used);
-
     return ExecNode::close(state);
 }
 
@@ -989,8 +982,8 @@ Status HashJoinNode::get_next(RuntimeState* state, Block* output_block, bool* eo
 Status HashJoinNode::open(RuntimeState* state) {
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "HashJoinNode::open");
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
     RETURN_IF_ERROR(ExecNode::open(state));
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     RETURN_IF_CANCELLED(state);
 
     RETURN_IF_ERROR(VExpr::open(_build_expr_ctxs, state));
@@ -1019,13 +1012,13 @@ Status HashJoinNode::open(RuntimeState* state) {
 
 void HashJoinNode::_hash_table_build_thread(RuntimeState* state, std::promise* status) {
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "HashJoinNode::_hash_table_build_thread");
-    SCOPED_ATTACH_TASK_THREAD(state, mem_tracker());
+    SCOPED_ATTACH_TASK(state);
     status->set_value(_hash_table_build(state));
 }
 
 Status HashJoinNode::_hash_table_build(RuntimeState* state) {
     RETURN_IF_ERROR(child(1)->open(state));
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB("Hash join, while constructing the hash table.");
+    SCOPED_UPDATE_MEM_EXCEED_CALL_BACK("Hash join, while constructing the hash table.");
     SCOPED_TIMER(_build_timer);
     MutableBlock mutable_block(child(1)->row_desc().tuple_descriptors());
 
@@ -1043,7 +1036,6 @@ Status HashJoinNode::_hash_table_build(RuntimeState* state) {
 
         RETURN_IF_ERROR_AND_CHECK_SPAN(child(1)->get_next(state, &block, &eos),
                                        child(1)->get_next_span(), eos);
-        _hash_table_mem_tracker->consume(block.allocated_bytes());
         _mem_used += block.allocated_bytes();
 
         if (block.rows() != 0) {
diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h
index 9ea79344f4..3fcd9ded69 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -231,8 +231,6 @@ private:
     std::vector _build_block_offsets;
     std::vector _build_block_rows;
 
-    std::shared_ptr _hash_table_mem_tracker;
-
     std::vector _hash_output_slot_ids;
     std::vector _left_output_slot_flags;
     std::vector _right_output_slot_flags;
diff --git a/be/src/vec/exec/vaggregation_node.cpp b/be/src/vec/exec/vaggregation_node.cpp
index 16f01902cc..c7539996c6 100644
--- a/be/src/vec/exec/vaggregation_node.cpp
+++ b/be/src/vec/exec/vaggregation_node.cpp
@@ -232,20 +232,18 @@ void AggregationNode::_init_hash_method(std::vector& probe_exprs)
 Status AggregationNode::prepare(RuntimeState* state) {
     SCOPED_TIMER(_runtime_profile->total_time_counter());
     RETURN_IF_ERROR(ExecNode::prepare(state));
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     _build_timer = ADD_TIMER(runtime_profile(), "BuildTime");
     _serialize_key_timer = ADD_TIMER(runtime_profile(), "SerializeKeyTimer");
     _exec_timer = ADD_TIMER(runtime_profile(), "ExecTime");
     _merge_timer = ADD_TIMER(runtime_profile(), "MergeTime");
     _expr_timer = ADD_TIMER(runtime_profile(), "ExprTime");
     _get_results_timer = ADD_TIMER(runtime_profile(), "GetResultsTime");
-    _data_mem_tracker =
-            MemTracker::create_virtual_tracker(-1, "AggregationNode:Data", mem_tracker());
+    _data_mem_tracker = std::make_unique("AggregationNode:Data");
     _intermediate_tuple_desc = state->desc_tbl().get_tuple_descriptor(_intermediate_tuple_id);
     _output_tuple_desc = state->desc_tbl().get_tuple_descriptor(_output_tuple_id);
     DCHECK_EQ(_intermediate_tuple_desc->slots().size(), _output_tuple_desc->slots().size());
-    RETURN_IF_ERROR(
-            VExpr::prepare(_probe_expr_ctxs, state, child(0)->row_desc(), expr_mem_tracker()));
+    RETURN_IF_ERROR(VExpr::prepare(_probe_expr_ctxs, state, child(0)->row_desc()));
 
     _mem_pool = std::make_unique();
 
@@ -263,7 +261,7 @@ Status AggregationNode::prepare(RuntimeState* state) {
         SlotDescriptor* output_slot_desc = _output_tuple_desc->slots()[j];
         RETURN_IF_ERROR(_aggregate_evaluators[i]->prepare(state, child(0)->row_desc(),
                                                           _mem_pool.get(), intermediate_slot_desc,
-                                                          output_slot_desc, mem_tracker()));
+                                                          output_slot_desc));
     }
 
     // set profile timer to evaluators
@@ -361,9 +359,9 @@ Status AggregationNode::prepare(RuntimeState* state) {
 Status AggregationNode::open(RuntimeState* state) {
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "AggregationNode::open");
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB("aggregator, while execute open.");
+    SCOPED_UPDATE_MEM_EXCEED_CALL_BACK("aggregator, while execute open.");
     RETURN_IF_ERROR(ExecNode::open(state));
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
 
     RETURN_IF_ERROR(VExpr::open(_probe_expr_ctxs, state));
 
@@ -405,8 +403,8 @@ Status AggregationNode::get_next(RuntimeState* state, RowBatch* row_batch, bool*
 Status AggregationNode::get_next(RuntimeState* state, Block* block, bool* eos) {
     INIT_AND_SCOPE_GET_NEXT_SPAN(state->get_tracer(), _get_next_span, "AggregationNode::get_next");
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker());
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB("aggregator, while execute get_next.");
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
+    SCOPED_UPDATE_MEM_EXCEED_CALL_BACK("aggregator, while execute get_next.");
 
     if (_is_streaming_preagg) {
         bool child_eos = false;
diff --git a/be/src/vec/exec/vaggregation_node.h b/be/src/vec/exec/vaggregation_node.h
index cf49a3f9c7..08834e4611 100644
--- a/be/src/vec/exec/vaggregation_node.h
+++ b/be/src/vec/exec/vaggregation_node.h
@@ -520,7 +520,7 @@ private:
     bool _is_merge;
     std::unique_ptr _mem_pool;
 
-    std::shared_ptr _data_mem_tracker;
+    std::unique_ptr _data_mem_tracker;
 
     size_t _align_aggregate_states = 1;
     /// The offset to the n-th aggregate function in a row of aggregate functions.
diff --git a/be/src/vec/exec/vanalytic_eval_node.cpp b/be/src/vec/exec/vanalytic_eval_node.cpp
index 7a4a3dc684..3906062cc4 100644
--- a/be/src/vec/exec/vanalytic_eval_node.cpp
+++ b/be/src/vec/exec/vanalytic_eval_node.cpp
@@ -147,9 +147,9 @@ Status VAnalyticEvalNode::init(const TPlanNode& tnode, RuntimeState* state) {
 Status VAnalyticEvalNode::prepare(RuntimeState* state) {
     SCOPED_TIMER(_runtime_profile->total_time_counter());
     RETURN_IF_ERROR(ExecNode::prepare(state));
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     DCHECK(child(0)->row_desc().is_prefix_of(row_desc()));
-    _mem_pool.reset(new MemPool(mem_tracker().get()));
+    _mem_pool.reset(new MemPool(mem_tracker()));
     _evaluation_timer = ADD_TIMER(runtime_profile(), "EvaluationTime");
     SCOPED_TIMER(_evaluation_timer);
 
@@ -159,8 +159,7 @@ Status VAnalyticEvalNode::prepare(RuntimeState* state) {
         SlotDescriptor* intermediate_slot_desc = _intermediate_tuple_desc->slots()[i];
         SlotDescriptor* output_slot_desc = _output_tuple_desc->slots()[i];
         RETURN_IF_ERROR(_agg_functions[i]->prepare(state, child(0)->row_desc(), _mem_pool.get(),
-                                                   intermediate_slot_desc, output_slot_desc,
-                                                   mem_tracker()));
+                                                   intermediate_slot_desc, output_slot_desc));
     }
 
     _offsets_of_aggregate_states.resize(_agg_functions_size);
@@ -193,7 +192,7 @@ Status VAnalyticEvalNode::prepare(RuntimeState* state) {
                             std::placeholders::_2, std::placeholders::_3, std::placeholders::_4);
 
     for (const auto& ctx : _agg_expr_ctxs) {
-        VExpr::prepare(ctx, state, child(0)->row_desc(), expr_mem_tracker());
+        VExpr::prepare(ctx, state, child(0)->row_desc());
     }
     if (!_partition_by_eq_expr_ctxs.empty() || !_order_by_eq_expr_ctxs.empty()) {
         vector tuple_ids;
@@ -201,12 +200,10 @@ Status VAnalyticEvalNode::prepare(RuntimeState* state) {
         tuple_ids.push_back(_buffered_tuple_id);
         RowDescriptor cmp_row_desc(state->desc_tbl(), tuple_ids, vector(2, false));
         if (!_partition_by_eq_expr_ctxs.empty()) {
-            RETURN_IF_ERROR(VExpr::prepare(_partition_by_eq_expr_ctxs, state, cmp_row_desc,
-                                           expr_mem_tracker()));
+            RETURN_IF_ERROR(VExpr::prepare(_partition_by_eq_expr_ctxs, state, cmp_row_desc));
         }
         if (!_order_by_eq_expr_ctxs.empty()) {
-            RETURN_IF_ERROR(VExpr::prepare(_order_by_eq_expr_ctxs, state, cmp_row_desc,
-                                           expr_mem_tracker()));
+            RETURN_IF_ERROR(VExpr::prepare(_order_by_eq_expr_ctxs, state, cmp_row_desc));
         }
     }
     return Status::OK();
@@ -215,8 +212,8 @@ Status VAnalyticEvalNode::prepare(RuntimeState* state) {
 Status VAnalyticEvalNode::open(RuntimeState* state) {
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "VAnalyticEvalNode::open");
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
     RETURN_IF_ERROR(ExecNode::open(state));
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     RETURN_IF_CANCELLED(state);
     RETURN_IF_ERROR(child(0)->open(state));
     RETURN_IF_ERROR(VExpr::open(_partition_by_eq_expr_ctxs, state));
@@ -253,7 +250,7 @@ Status VAnalyticEvalNode::get_next(RuntimeState* state, vectorized::Block* block
     INIT_AND_SCOPE_GET_NEXT_SPAN(state->get_tracer(), _get_next_span,
                                  "VAnalyticEvalNode::get_next");
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     RETURN_IF_CANCELLED(state);
 
     if (_input_eos && _output_block_index == _input_blocks.size()) {
diff --git a/be/src/vec/exec/vblocking_join_node.cpp b/be/src/vec/exec/vblocking_join_node.cpp
index 8600c333ca..62edd021aa 100644
--- a/be/src/vec/exec/vblocking_join_node.cpp
+++ b/be/src/vec/exec/vblocking_join_node.cpp
@@ -41,9 +41,9 @@ Status VBlockingJoinNode::init(const TPlanNode& tnode, RuntimeState* state) {
 Status VBlockingJoinNode::prepare(RuntimeState* state) {
     SCOPED_TIMER(_runtime_profile->total_time_counter());
     RETURN_IF_ERROR(ExecNode::prepare(state));
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
 
-    _build_pool.reset(new MemPool(mem_tracker().get()));
+    _build_pool.reset(new MemPool(mem_tracker()));
     _build_timer = ADD_TIMER(runtime_profile(), "BuildTime");
     _left_child_timer = ADD_TIMER(runtime_profile(), "LeftChildTime");
     _build_row_counter = ADD_COUNTER(runtime_profile(), "BuildRows", TUnit::UNIT);
@@ -71,7 +71,7 @@ Status VBlockingJoinNode::close(RuntimeState* state) {
 }
 
 void VBlockingJoinNode::build_side_thread(RuntimeState* state, std::promise* status) {
-    SCOPED_ATTACH_TASK_THREAD(state, mem_tracker());
+    SCOPED_ATTACH_TASK(state);
     status->set_value(construct_build_side(state));
     // Release the thread token as soon as possible (before the main thread joins
     // on it).  This way, if we had a chain of 10 joins using 1 additional thread,
@@ -81,8 +81,8 @@ void VBlockingJoinNode::build_side_thread(RuntimeState* state, std::promiseget_tracer(), span, "VBlockingJoinNode::open")
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
     RETURN_IF_ERROR(ExecNode::open(state));
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
 
     RETURN_IF_CANCELLED(state);
 
diff --git a/be/src/vec/exec/vbroker_scan_node.cpp b/be/src/vec/exec/vbroker_scan_node.cpp
index b765df56b3..0a85a60aa7 100644
--- a/be/src/vec/exec/vbroker_scan_node.cpp
+++ b/be/src/vec/exec/vbroker_scan_node.cpp
@@ -18,7 +18,7 @@
 #include "vec/exec/vbroker_scan_node.h"
 
 #include "gen_cpp/PlanNodes_types.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/runtime_state.h"
 #include "runtime/string_value.h"
 #include "runtime/tuple.h"
@@ -59,7 +59,7 @@ Status VBrokerScanNode::init(const TPlanNode& tnode, RuntimeState* state) {
 Status VBrokerScanNode::prepare(RuntimeState* state) {
     VLOG_QUERY << "VBrokerScanNode prepare";
     RETURN_IF_ERROR(ScanNode::prepare(state));
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     // get tuple desc
     _runtime_state = state;
     _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id);
@@ -84,8 +84,8 @@ Status VBrokerScanNode::prepare(RuntimeState* state) {
 Status VBrokerScanNode::open(RuntimeState* state) {
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "VBrokerScanNode::open");
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
     RETURN_IF_ERROR(ExecNode::open(state));
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     RETURN_IF_CANCELLED(state);
 
     RETURN_IF_ERROR(start_scanners());
@@ -246,7 +246,10 @@ Status VBrokerScanNode::scanner_scan(const TBrokerScanRange& scan_range, Scanner
                // 1. too many batches in queue, or
                // 2. at least one batch in queue and memory exceed limit.
                (_block_queue.size() >= _max_buffered_batches ||
-                (mem_tracker()->any_limit_exceeded() && !_block_queue.empty()))) {
+                (thread_context()
+                         ->_thread_mem_tracker_mgr->limiter_mem_tracker()
+                         ->any_limit_exceeded() &&
+                 !_block_queue.empty()))) {
             _queue_writer_cond.wait_for(l, std::chrono::seconds(1));
         }
         // Process already set failed, so we just return OK
diff --git a/be/src/vec/exec/vcross_join_node.cpp b/be/src/vec/exec/vcross_join_node.cpp
index 8654160513..d3c9de5843 100644
--- a/be/src/vec/exec/vcross_join_node.cpp
+++ b/be/src/vec/exec/vcross_join_node.cpp
@@ -33,9 +33,7 @@ VCrossJoinNode::VCrossJoinNode(ObjectPool* pool, const TPlanNode& tnode, const D
 Status VCrossJoinNode::prepare(RuntimeState* state) {
     DCHECK(_join_op == TJoinOp::CROSS_JOIN);
     RETURN_IF_ERROR(VBlockingJoinNode::prepare(state));
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
-    _block_mem_tracker =
-            MemTracker::create_virtual_tracker(-1, "VCrossJoinNode:Block", mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
 
     _num_existing_columns = child(0)->row_desc().num_materialized_slots();
     _num_columns_to_add = child(1)->row_desc().num_materialized_slots();
@@ -48,7 +46,6 @@ Status VCrossJoinNode::close(RuntimeState* state) {
         return Status::OK();
     }
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "VCrossJoinNode::close");
-    _block_mem_tracker->release(_total_mem_usage);
     VBlockingJoinNode::close(state);
     return Status::OK();
 }
@@ -56,8 +53,7 @@ Status VCrossJoinNode::close(RuntimeState* state) {
 Status VCrossJoinNode::construct_build_side(RuntimeState* state) {
     // Do a full scan of child(1) and store all build row batches.
     RETURN_IF_ERROR(child(1)->open(state));
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB(
-            "Vec Cross join, while getting next from the child 1");
+    SCOPED_UPDATE_MEM_EXCEED_CALL_BACK("Vec Cross join, while getting next from the child 1");
 
     bool eos = false;
     while (true) {
@@ -74,7 +70,6 @@ Status VCrossJoinNode::construct_build_side(RuntimeState* state) {
             _build_rows += rows;
             _total_mem_usage += mem_usage;
             _build_blocks.emplace_back(std::move(block));
-            _block_mem_tracker->consume(mem_usage);
         }
 
         if (eos) {
@@ -96,7 +91,7 @@ Status VCrossJoinNode::get_next(RuntimeState* state, Block* block, bool* eos) {
     INIT_AND_SCOPE_GET_NEXT_SPAN(state->get_tracer(), _get_next_span, "VCrossJoinNode::get_next");
     RETURN_IF_CANCELLED(state);
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     *eos = false;
 
     if (_eos) {
diff --git a/be/src/vec/exec/vcross_join_node.h b/be/src/vec/exec/vcross_join_node.h
index 94c03a2c0f..ba517861bf 100644
--- a/be/src/vec/exec/vcross_join_node.h
+++ b/be/src/vec/exec/vcross_join_node.h
@@ -64,8 +64,6 @@ private:
     uint64_t _build_rows = 0;
     uint64_t _total_mem_usage = 0;
 
-    std::shared_ptr _block_mem_tracker;
-
     // Build mutable columns to insert data.
     // if block can mem reuse, just clear data in block
     // else build a new block and alloc mem of column from left and right child block
diff --git a/be/src/vec/exec/ves_http_scan_node.cpp b/be/src/vec/exec/ves_http_scan_node.cpp
index 34dc7d19bd..3029da4f8c 100644
--- a/be/src/vec/exec/ves_http_scan_node.cpp
+++ b/be/src/vec/exec/ves_http_scan_node.cpp
@@ -64,7 +64,7 @@ Status VEsHttpScanNode::init(const TPlanNode& tnode, RuntimeState* state) {
 Status VEsHttpScanNode::prepare(RuntimeState* state) {
     VLOG_QUERY << "VEsHttpScanNode prepare";
     RETURN_IF_ERROR(ScanNode::prepare(state));
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
 
     _scanner_profile.reset(new RuntimeProfile("EsHttpScanNode"));
     runtime_profile()->add_child(_scanner_profile.get(), true, nullptr);
@@ -120,8 +120,8 @@ Status VEsHttpScanNode::build_conjuncts_list() {
 Status VEsHttpScanNode::open(RuntimeState* state) {
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "VEsHttpScanNode::open");
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
     RETURN_IF_ERROR(ExecNode::open(state));
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     RETURN_IF_CANCELLED(state);
 
     // if conjunct is constant, compute direct and set eos = true
@@ -273,7 +273,7 @@ Status VEsHttpScanNode::scanner_scan(std::unique_ptr scanner) {
     bool scanner_eof = false;
 
     const int batch_size = _runtime_state->batch_size();
-    std::unique_ptr tuple_pool(new MemPool(mem_tracker().get()));
+    std::unique_ptr tuple_pool(new MemPool(mem_tracker()));
     size_t slot_num = _tuple_desc->slots().size();
 
     while (!scanner_eof) {
@@ -384,7 +384,7 @@ void VEsHttpScanNode::debug_string(int ident_level, std::stringstream* out) cons
 
 void VEsHttpScanNode::scanner_worker(int start_idx, int length, std::promise& p_status) {
     START_AND_SCOPE_SPAN(_runtime_state->get_tracer(), span, "VEsHttpScanNode::scanner_worker");
-    SCOPED_ATTACH_TASK_THREAD(_runtime_state, mem_tracker());
+    SCOPED_ATTACH_TASK(_runtime_state);
     // Clone expr context
     std::vector scanner_expr_ctxs;
     DCHECK(start_idx < length);
diff --git a/be/src/vec/exec/vexchange_node.cpp b/be/src/vec/exec/vexchange_node.cpp
index 33909fb9ff..c9cc87d6bd 100644
--- a/be/src/vec/exec/vexchange_node.cpp
+++ b/be/src/vec/exec/vexchange_node.cpp
@@ -49,7 +49,7 @@ Status VExchangeNode::init(const TPlanNode& tnode, RuntimeState* state) {
 
 Status VExchangeNode::prepare(RuntimeState* state) {
     RETURN_IF_ERROR(ExecNode::prepare(state));
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     DCHECK_GT(_num_senders, 0);
     _sub_plan_query_statistics_recvr.reset(new QueryStatisticsRecvr());
     _stream_recvr = state->exec_env()->vstream_mgr()->create_recvr(
@@ -58,17 +58,15 @@ Status VExchangeNode::prepare(RuntimeState* state) {
             _sub_plan_query_statistics_recvr);
 
     if (_is_merging) {
-        RETURN_IF_ERROR(_vsort_exec_exprs.prepare(state, _row_descriptor, _row_descriptor,
-                                                  expr_mem_tracker()));
+        RETURN_IF_ERROR(_vsort_exec_exprs.prepare(state, _row_descriptor, _row_descriptor));
     }
     return Status::OK();
 }
 Status VExchangeNode::open(RuntimeState* state) {
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "VExchangeNode::open");
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
-    ADD_THREAD_LOCAL_MEM_TRACKER(_stream_recvr->mem_tracker());
     RETURN_IF_ERROR(ExecNode::open(state));
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
 
     if (_is_merging) {
         RETURN_IF_ERROR(_vsort_exec_exprs.open(state));
@@ -86,7 +84,7 @@ Status VExchangeNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* e
 Status VExchangeNode::get_next(RuntimeState* state, Block* block, bool* eos) {
     INIT_AND_SCOPE_GET_NEXT_SPAN(state->get_tracer(), _get_next_span, "VExchangeNode::get_next");
     SCOPED_TIMER(runtime_profile()->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     auto status = _stream_recvr->get_next(block, eos);
     if (block != nullptr) {
         if (_num_rows_returned + block->rows() < _limit) {
diff --git a/be/src/vec/exec/vmysql_scan_node.cpp b/be/src/vec/exec/vmysql_scan_node.cpp
index 595a195fe5..c1c206a371 100644
--- a/be/src/vec/exec/vmysql_scan_node.cpp
+++ b/be/src/vec/exec/vmysql_scan_node.cpp
@@ -50,7 +50,7 @@ Status VMysqlScanNode::prepare(RuntimeState* state) {
     }
 
     RETURN_IF_ERROR(ScanNode::prepare(state));
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     // get tuple desc
     _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id);
 
@@ -80,7 +80,7 @@ Status VMysqlScanNode::prepare(RuntimeState* state) {
         return Status::InternalError("new a mysql scanner failed.");
     }
 
-    _tuple_pool.reset(new (std::nothrow) MemPool("MysqlScanNode"));
+    _tuple_pool.reset(new (std::nothrow) MemPool());
 
     if (_tuple_pool.get() == nullptr) {
         return Status::InternalError("new a mem pool failed.");
@@ -100,8 +100,8 @@ Status VMysqlScanNode::prepare(RuntimeState* state) {
 Status VMysqlScanNode::open(RuntimeState* state) {
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "VMysqlScanNode::open");
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
     RETURN_IF_ERROR(ExecNode::open(state));
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     VLOG_CRITICAL << "MysqlScanNode::Open";
 
     if (nullptr == state) {
diff --git a/be/src/vec/exec/vodbc_scan_node.cpp b/be/src/vec/exec/vodbc_scan_node.cpp
index a292fad2a1..4b75f332b1 100644
--- a/be/src/vec/exec/vodbc_scan_node.cpp
+++ b/be/src/vec/exec/vodbc_scan_node.cpp
@@ -47,7 +47,7 @@ Status VOdbcScanNode::prepare(RuntimeState* state) {
     }
 
     RETURN_IF_ERROR(ScanNode::prepare(state));
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     // get tuple desc
     _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id);
 
@@ -67,7 +67,7 @@ Status VOdbcScanNode::prepare(RuntimeState* state) {
         return Status::InternalError("new a odbc scanner failed.");
     }
 
-    _tuple_pool.reset(new (std::nothrow) MemPool("OdbcScanNode"));
+    _tuple_pool.reset(new (std::nothrow) MemPool());
 
     if (_tuple_pool.get() == nullptr) {
         return Status::InternalError("new a mem pool failed.");
@@ -87,8 +87,8 @@ Status VOdbcScanNode::prepare(RuntimeState* state) {
 Status VOdbcScanNode::open(RuntimeState* state) {
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "VOdbcScanNode::open");
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
     RETURN_IF_ERROR(ExecNode::open(state));
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     VLOG_CRITICAL << _scan_node_type << "::Open";
 
     if (nullptr == state) {
diff --git a/be/src/vec/exec/volap_scan_node.cpp b/be/src/vec/exec/volap_scan_node.cpp
index b3a88cbe40..544aacdd3d 100644
--- a/be/src/vec/exec/volap_scan_node.cpp
+++ b/be/src/vec/exec/volap_scan_node.cpp
@@ -186,7 +186,7 @@ void VOlapScanNode::_init_counter(RuntimeState* state) {
 Status VOlapScanNode::prepare(RuntimeState* state) {
     init_scan_profile();
     RETURN_IF_ERROR(ScanNode::prepare(state));
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     // create scanner profile
     // create timer
     _tablet_counter = ADD_COUNTER(runtime_profile(), "TabletCount ", TUnit::UNIT);
@@ -197,8 +197,7 @@ Status VOlapScanNode::prepare(RuntimeState* state) {
     _init_counter(state);
     _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id);
 
-    _scanner_mem_tracker = MemTracker::create_tracker(state->instance_mem_tracker()->limit(),
-                                                      "Scanners", mem_tracker());
+    _scanner_mem_tracker = std::make_unique("OlapScanners");
 
     if (_tuple_desc == nullptr) {
         // TODO: make sure we print all available diagnostic output to our error log
@@ -236,9 +235,9 @@ Status VOlapScanNode::open(RuntimeState* state) {
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "VOlapScanNode::open");
     VLOG_CRITICAL << "VOlapScanNode::Open";
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
     RETURN_IF_CANCELLED(state);
     RETURN_IF_ERROR(ExecNode::open(state));
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
 
     _resource_info = ResourceTls::get_resource_tls();
 
@@ -264,7 +263,7 @@ Status VOlapScanNode::open(RuntimeState* state) {
             _runtime_filter_ctxs[i].runtimefilter = runtime_filter;
 
             for (auto ctx : expr_context) {
-                ctx->prepare(state, row_desc(), _expr_mem_tracker);
+                ctx->prepare(state, row_desc());
                 ctx->open(state);
                 int index = _conjunct_ctxs.size();
                 _conjunct_ctxs.push_back(ctx);
@@ -280,7 +279,7 @@ Status VOlapScanNode::open(RuntimeState* state) {
 void VOlapScanNode::transfer_thread(RuntimeState* state) {
     // scanner open pushdown to scanThread
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "VOlapScanNode::transfer_thread");
-    SCOPED_ATTACH_TASK_THREAD(state, mem_tracker());
+    SCOPED_ATTACH_TASK(state);
     Status status = Status::OK();
 
     if (_vconjunct_ctx_ptr) {
@@ -324,8 +323,6 @@ void VOlapScanNode::transfer_thread(RuntimeState* state) {
         _buffered_bytes += block->allocated_bytes();
     }
 
-    _block_mem_tracker->consume(_buffered_bytes);
-
     // read from scanner
     while (LIKELY(status.ok())) {
         int assigned_thread_num = _start_scanner_thread_task(state, block_per_scanner);
@@ -387,8 +384,7 @@ void VOlapScanNode::transfer_thread(RuntimeState* state) {
 void VOlapScanNode::scanner_thread(VOlapScanner* scanner) {
     START_AND_SCOPE_SPAN(scanner->runtime_state()->get_tracer(), span,
                          "VOlapScanNode::scanner_thread");
-    SCOPED_ATTACH_TASK_THREAD(_runtime_state, mem_tracker());
-    ADD_THREAD_LOCAL_MEM_TRACKER(scanner->mem_tracker());
+    SCOPED_ATTACH_TASK(_runtime_state);
     Thread::set_self_name("volap_scanner");
     int64_t wait_time = scanner->update_wait_worker_timer();
     // Do not use ScopedTimer. There is no guarantee that, the counter
@@ -432,7 +428,7 @@ void VOlapScanNode::scanner_thread(VOlapScanner* scanner) {
             DCHECK(runtime_filter != nullptr);
             bool ready = runtime_filter->is_ready();
             if (ready) {
-                runtime_filter->get_prepared_vexprs(&vexprs, row_desc(), _expr_mem_tracker);
+                runtime_filter->get_prepared_vexprs(&vexprs, row_desc());
                 scanner_filter_apply_marks[i] = true;
                 if (!_runtime_filter_ready_flag[i] && !vexprs.empty()) {
                     std::unique_lock l(*(_rf_locks[i]));
@@ -456,8 +452,7 @@ void VOlapScanNode::scanner_thread(VOlapScanner* scanner) {
                             _rf_vexpr_set.insert(vexprs[j]);
                         }
                         auto new_vconjunct_ctx_ptr = _pool->add(new VExprContext(last_expr));
-                        auto expr_status = new_vconjunct_ctx_ptr->prepare(state, row_desc(),
-                                                                          expr_mem_tracker());
+                        auto expr_status = new_vconjunct_ctx_ptr->prepare(state, row_desc());
                         // If error occurs in `prepare` or `open` phase, discard these runtime
                         // filters directly.
                         if (UNLIKELY(!expr_status.OK())) {
@@ -1554,7 +1549,6 @@ Status VOlapScanNode::start_scan_thread(RuntimeState* state) {
         return Status::OK();
     }
     auto span = opentelemetry::trace::Tracer::GetCurrentSpan();
-    _block_mem_tracker = MemTracker::create_virtual_tracker(-1, "VOlapScanNode:Block");
 
     // ranges constructed from scan keys
     std::vector> cond_ranges;
@@ -1616,7 +1610,7 @@ Status VOlapScanNode::start_scan_thread(RuntimeState* state) {
             }
             VOlapScanner* scanner =
                     new VOlapScanner(state, this, _olap_scan_node.is_preaggregation,
-                                     _need_agg_finalize, *scan_range, _scanner_mem_tracker);
+                                     _need_agg_finalize, *scan_range, _scanner_mem_tracker.get());
             // add scanner to pool before doing prepare.
             // so that scanner can be automatically deconstructed if prepare failed.
             _scanner_pool.add(scanner);
@@ -1675,7 +1669,6 @@ Status VOlapScanNode::close(RuntimeState* state) {
     std::for_each(_scan_blocks.begin(), _scan_blocks.end(), std::default_delete());
     _scan_row_batches_bytes = 0;
     std::for_each(_free_blocks.begin(), _free_blocks.end(), std::default_delete());
-    _block_mem_tracker->release(_buffered_bytes);
 
     // OlapScanNode terminate by exception
     // so that initiative close the Scanner
@@ -1703,7 +1696,7 @@ Status VOlapScanNode::close(RuntimeState* state) {
 Status VOlapScanNode::get_next(RuntimeState* state, Block* block, bool* eos) {
     INIT_AND_SCOPE_GET_NEXT_SPAN(state->get_tracer(), _get_next_span, "VOlapScanNode::get_next");
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
 
     // check if Canceled.
     if (state->is_cancelled()) {
diff --git a/be/src/vec/exec/volap_scan_node.h b/be/src/vec/exec/volap_scan_node.h
index c1a17da82b..0caa9d029d 100644
--- a/be/src/vec/exec/volap_scan_node.h
+++ b/be/src/vec/exec/volap_scan_node.h
@@ -219,7 +219,7 @@ private:
 
     int64_t _buffered_bytes;
     // Count the memory consumption of Rowset Reader and Tablet Reader in OlapScanner.
-    std::shared_ptr _scanner_mem_tracker;
+    std::unique_ptr _scanner_mem_tracker;
     EvalConjunctsFn _eval_conjuncts_fn;
 
     bool _need_agg_finalize = true;
@@ -333,8 +333,6 @@ private:
     std::list _volap_scanners;
     std::mutex _volap_scanners_lock;
 
-    std::shared_ptr _block_mem_tracker;
-
     int _max_materialized_blocks;
 
     size_t _block_size = 0;
diff --git a/be/src/vec/exec/volap_scanner.cpp b/be/src/vec/exec/volap_scanner.cpp
index 689dcba99f..51bf83f4ec 100644
--- a/be/src/vec/exec/volap_scanner.cpp
+++ b/be/src/vec/exec/volap_scanner.cpp
@@ -29,7 +29,7 @@ namespace doris::vectorized {
 
 VOlapScanner::VOlapScanner(RuntimeState* runtime_state, VOlapScanNode* parent, bool aggregation,
                            bool need_agg_finalize, const TPaloScanRange& scan_range,
-                           const std::shared_ptr& tracker)
+                           MemTracker* tracker)
         : _runtime_state(runtime_state),
           _parent(parent),
           _tuple_desc(parent->_tuple_desc),
@@ -37,21 +37,15 @@ VOlapScanner::VOlapScanner(RuntimeState* runtime_state, VOlapScanNode* parent, b
           _is_open(false),
           _aggregation(aggregation),
           _need_agg_finalize(need_agg_finalize),
-          _version(-1) {
-#ifndef NDEBUG
-    _mem_tracker = MemTracker::create_tracker(
-            tracker->limit(), "VOlapScanner:" + tls_ctx()->thread_id_str(), tracker);
-#else
-    _mem_tracker = tracker;
-#endif
-}
+          _version(-1),
+          _mem_tracker(tracker) {}
 
 Status VOlapScanner::prepare(
         const TPaloScanRange& scan_range, const std::vector& key_ranges,
         const std::vector& filters,
         const std::vector>>& bloom_filters,
         const std::vector& function_filters) {
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
     set_tablet_reader();
     // set limit to reduce end of rowset and segment mem use
     _tablet_reader->set_batch_size(
@@ -124,7 +118,7 @@ Status VOlapScanner::prepare(
 
 Status VOlapScanner::open() {
     SCOPED_TIMER(_parent->_reader_init_timer);
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
 
     if (_conjunct_ctxs.size() > _parent->_direct_conjunct_size) {
         _use_pushdown_conjuncts = true;
@@ -279,7 +273,7 @@ Status VOlapScanner::_init_return_columns(bool need_seq_col) {
 Status VOlapScanner::get_block(RuntimeState* state, vectorized::Block* block, bool* eof) {
     // only empty block should be here
     DCHECK(block->rows() == 0);
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
 
     int64_t raw_rows_threshold = raw_rows_read() + config::doris_scanner_row_num;
     if (!block->mem_reuse()) {
diff --git a/be/src/vec/exec/volap_scanner.h b/be/src/vec/exec/volap_scanner.h
index cb2161d843..1510ec4d4c 100644
--- a/be/src/vec/exec/volap_scanner.h
+++ b/be/src/vec/exec/volap_scanner.h
@@ -36,8 +36,7 @@ class VOlapScanNode;
 class VOlapScanner {
 public:
     VOlapScanner(RuntimeState* runtime_state, VOlapScanNode* parent, bool aggregation,
-                 bool need_agg_finalize, const TPaloScanRange& scan_range,
-                 const std::shared_ptr& tracker);
+                 bool need_agg_finalize, const TPaloScanRange& scan_range, MemTracker* tracker);
     virtual ~VOlapScanner() = default;
 
     Status prepare(const TPaloScanRange& scan_range, const std::vector& key_ranges,
@@ -90,8 +89,6 @@ public:
 
     std::vector* mutable_runtime_filter_marks() { return &_runtime_filter_marks; }
 
-    const std::shared_ptr& mem_tracker() const { return _mem_tracker; }
-
 private:
     Status _init_tablet_reader_params(
             const std::vector& key_ranges, const std::vector& filters,
@@ -141,7 +138,7 @@ private:
 
     MonotonicStopWatch _watcher;
 
-    std::shared_ptr _mem_tracker;
+    MemTracker* _mem_tracker;
 
     VExprContext* _vconjunct_ctx = nullptr;
     bool _need_to_close = false;
diff --git a/be/src/vec/exec/vschema_scan_node.cpp b/be/src/vec/exec/vschema_scan_node.cpp
index eef55ea567..9e6a01a095 100644
--- a/be/src/vec/exec/vschema_scan_node.cpp
+++ b/be/src/vec/exec/vschema_scan_node.cpp
@@ -107,7 +107,7 @@ Status VSchemaScanNode::open(RuntimeState* state) {
     }
 
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
     RETURN_IF_CANCELLED(state);
     RETURN_IF_ERROR(ExecNode::open(state));
 
@@ -132,7 +132,7 @@ Status VSchemaScanNode::prepare(RuntimeState* state) {
     }
 
     RETURN_IF_ERROR(ScanNode::prepare(state));
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
+    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
 
     // new one mem pool
     _tuple_pool.reset(new (std::nothrow) MemPool());
diff --git a/be/src/vec/exec/vset_operation_node.cpp b/be/src/vec/exec/vset_operation_node.cpp
index 9b644fffd7..e8c0ab96ad 100644
--- a/be/src/vec/exec/vset_operation_node.cpp
+++ b/be/src/vec/exec/vset_operation_node.cpp
@@ -40,7 +40,6 @@ struct HashTableBuild {
 
         Defer defer {[&]() {
             int64_t bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes();
-            _operation_node->_hash_table_mem_tracker->consume(bucket_bytes - old_bucket_bytes);
             _operation_node->_mem_used += bucket_bytes - old_bucket_bytes;
         }};
 
@@ -86,7 +85,6 @@ Status VSetOperationNode::close(RuntimeState* state) {
     for (auto& exprs : _child_expr_lists) {
         VExpr::close(exprs, state);
     }
-    _hash_table_mem_tracker->release(_mem_used);
     return ExecNode::close(state);
 }
 
@@ -116,8 +114,8 @@ Status VSetOperationNode::init(const TPlanNode& tnode, RuntimeState* state) {
 Status VSetOperationNode::open(RuntimeState* state) {
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "VSetOperationNode::open");
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
     RETURN_IF_ERROR(ExecNode::open(state));
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     // open result expr lists.
     for (const std::vector& exprs : _child_expr_lists) {
         RETURN_IF_ERROR(VExpr::open(exprs, state));
@@ -129,15 +127,13 @@ Status VSetOperationNode::open(RuntimeState* state) {
 Status VSetOperationNode::prepare(RuntimeState* state) {
     SCOPED_TIMER(_runtime_profile->total_time_counter());
     RETURN_IF_ERROR(ExecNode::prepare(state));
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
-    _hash_table_mem_tracker = MemTracker::create_virtual_tracker(-1, "VSetOperationNode:HashTable");
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     _build_timer = ADD_TIMER(runtime_profile(), "BuildTime");
     _probe_timer = ADD_TIMER(runtime_profile(), "ProbeTime");
 
     // Prepare result expr lists.
     for (int i = 0; i < _child_expr_lists.size(); ++i) {
-        RETURN_IF_ERROR(VExpr::prepare(_child_expr_lists[i], state, child(i)->row_desc(),
-                                       expr_mem_tracker()));
+        RETURN_IF_ERROR(VExpr::prepare(_child_expr_lists[i], state, child(i)->row_desc()));
     }
 
     for (auto ctx : _child_expr_lists[0]) {
@@ -236,8 +232,7 @@ void VSetOperationNode::hash_table_init() {
 //build a hash table from child(0)
 Status VSetOperationNode::hash_table_build(RuntimeState* state) {
     RETURN_IF_ERROR(child(0)->open(state));
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB(
-            "Vec Set Operation Node, while constructing the hash table");
+    SCOPED_UPDATE_MEM_EXCEED_CALL_BACK("Vec Set Operation Node, while constructing the hash table");
     Block block;
     MutableBlock mutable_block(child(0)->row_desc().tuple_descriptors());
 
@@ -252,7 +247,6 @@ Status VSetOperationNode::hash_table_build(RuntimeState* state) {
                                        child(0)->get_next_span(), eos);
 
         size_t allocated_bytes = block.allocated_bytes();
-        _hash_table_mem_tracker->consume(allocated_bytes);
         _mem_used += allocated_bytes;
 
         if (block.rows() != 0) {
diff --git a/be/src/vec/exec/vset_operation_node.h b/be/src/vec/exec/vset_operation_node.h
index 9b770838b5..ba4eba3013 100644
--- a/be/src/vec/exec/vset_operation_node.h
+++ b/be/src/vec/exec/vset_operation_node.h
@@ -90,8 +90,6 @@ protected:
     RuntimeProfile::Counter* _build_timer; // time to build hash table
     RuntimeProfile::Counter* _probe_timer; // time to probe
 
-    std::shared_ptr _hash_table_mem_tracker;
-
     template 
     friend struct HashTableBuild;
     template 
diff --git a/be/src/vec/exec/vsort_exec_exprs.cpp b/be/src/vec/exec/vsort_exec_exprs.cpp
index 5813e5121f..67981b8e31 100644
--- a/be/src/vec/exec/vsort_exec_exprs.cpp
+++ b/be/src/vec/exec/vsort_exec_exprs.cpp
@@ -46,14 +46,11 @@ Status VSortExecExprs::init(const std::vector& lhs_ordering_expr_
 }
 
 Status VSortExecExprs::prepare(RuntimeState* state, const RowDescriptor& child_row_desc,
-                               const RowDescriptor& output_row_desc,
-                               const std::shared_ptr& expr_mem_tracker) {
+                               const RowDescriptor& output_row_desc) {
     if (_materialize_tuple) {
-        RETURN_IF_ERROR(VExpr::prepare(_sort_tuple_slot_expr_ctxs, state, child_row_desc,
-                                       expr_mem_tracker));
+        RETURN_IF_ERROR(VExpr::prepare(_sort_tuple_slot_expr_ctxs, state, child_row_desc));
     }
-    RETURN_IF_ERROR(
-            VExpr::prepare(_lhs_ordering_expr_ctxs, state, output_row_desc, expr_mem_tracker));
+    RETURN_IF_ERROR(VExpr::prepare(_lhs_ordering_expr_ctxs, state, output_row_desc));
     return Status::OK();
 }
 
diff --git a/be/src/vec/exec/vsort_exec_exprs.h b/be/src/vec/exec/vsort_exec_exprs.h
index 8e471a5475..5e5b044d16 100644
--- a/be/src/vec/exec/vsort_exec_exprs.h
+++ b/be/src/vec/exec/vsort_exec_exprs.h
@@ -45,8 +45,7 @@ public:
 
     // prepare all expressions used for sorting and tuple materialization.
     Status prepare(RuntimeState* state, const RowDescriptor& child_row_desc,
-                   const RowDescriptor& output_row_desc,
-                   const std::shared_ptr& mem_tracker);
+                   const RowDescriptor& output_row_desc);
 
     // open all expressions used for sorting and tuple materialization.
     Status open(RuntimeState* state);
diff --git a/be/src/vec/exec/vsort_node.cpp b/be/src/vec/exec/vsort_node.cpp
index 56b93bc675..73618b0b26 100644
--- a/be/src/vec/exec/vsort_node.cpp
+++ b/be/src/vec/exec/vsort_node.cpp
@@ -42,18 +42,16 @@ Status VSortNode::prepare(RuntimeState* state) {
     SCOPED_TIMER(_runtime_profile->total_time_counter());
     _runtime_profile->add_info_string("TOP-N", _limit == -1 ? "false" : "true");
     RETURN_IF_ERROR(ExecNode::prepare(state));
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
-    _block_mem_tracker = MemTracker::create_virtual_tracker(-1, "VSortNode:Block", mem_tracker());
-    RETURN_IF_ERROR(_vsort_exec_exprs.prepare(state, child(0)->row_desc(), _row_descriptor,
-                                              expr_mem_tracker()));
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
+    RETURN_IF_ERROR(_vsort_exec_exprs.prepare(state, child(0)->row_desc(), _row_descriptor));
     return Status::OK();
 }
 
 Status VSortNode::open(RuntimeState* state) {
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "VSortNode::open");
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
     RETURN_IF_ERROR(ExecNode::open(state));
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     RETURN_IF_ERROR(_vsort_exec_exprs.open(state));
     RETURN_IF_CANCELLED(state);
     RETURN_IF_ERROR(state->check_query_state("vsort, while open."));
@@ -79,7 +77,7 @@ Status VSortNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos)
 Status VSortNode::get_next(RuntimeState* state, Block* block, bool* eos) {
     INIT_AND_SCOPE_GET_NEXT_SPAN(state->get_tracer(), _get_next_span, "VSortNode::get_next");
     SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
 
     auto status = Status::OK();
     if (_sorted_blocks.empty()) {
@@ -108,7 +106,6 @@ Status VSortNode::close(RuntimeState* state) {
         return Status::OK();
     }
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "VSortNode::close");
-    _block_mem_tracker->release(_total_mem_usage);
     _vsort_exec_exprs.close(state);
     return ExecNode::close(state);
 }
@@ -165,7 +162,6 @@ Status VSortNode::sort_input(RuntimeState* state) {
                 _sorted_blocks.emplace_back(std::move(block));
             }
 
-            _block_mem_tracker->consume(mem_usage);
             RETURN_IF_CANCELLED(state);
             RETURN_IF_ERROR(state->check_query_state("vsort, while sorting input."));
         }
diff --git a/be/src/vec/exec/vsort_node.h b/be/src/vec/exec/vsort_node.h
index f224938bd9..f67326afa6 100644
--- a/be/src/vec/exec/vsort_node.h
+++ b/be/src/vec/exec/vsort_node.h
@@ -84,8 +84,6 @@ private:
     // only valid in TOP-N node
     uint64_t _num_rows_in_block = 0;
     std::priority_queue _block_priority_queue;
-
-    std::shared_ptr _block_mem_tracker;
 };
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/vtable_function_node.cpp b/be/src/vec/exec/vtable_function_node.cpp
index 0ac4795b3d..54551bdbce 100644
--- a/be/src/vec/exec/vtable_function_node.cpp
+++ b/be/src/vec/exec/vtable_function_node.cpp
@@ -56,7 +56,7 @@ Status VTableFunctionNode::init(const TPlanNode& tnode, RuntimeState* state) {
 Status VTableFunctionNode::prepare(RuntimeState* state) {
     SCOPED_TIMER(_runtime_profile->total_time_counter());
     RETURN_IF_ERROR(TableFunctionNode::prepare(state));
-    RETURN_IF_ERROR(VExpr::prepare(_vfn_ctxs, state, _row_descriptor, expr_mem_tracker()));
+    RETURN_IF_ERROR(VExpr::prepare(_vfn_ctxs, state, _row_descriptor));
 
     // get current all output slots
     for (const auto& tuple_desc : this->row_desc().tuple_descriptors()) {
diff --git a/be/src/vec/exec/vunion_node.cpp b/be/src/vec/exec/vunion_node.cpp
index 4123b2d9cc..940b5521ca 100644
--- a/be/src/vec/exec/vunion_node.cpp
+++ b/be/src/vec/exec/vunion_node.cpp
@@ -67,13 +67,12 @@ Status VUnionNode::prepare(RuntimeState* state) {
             ADD_TIMER(_runtime_profile, "MaterializeExprsEvaluateTimer");
     // Prepare const expr lists.
     for (const std::vector& exprs : _const_expr_lists) {
-        RETURN_IF_ERROR(VExpr::prepare(exprs, state, row_desc(), expr_mem_tracker()));
+        RETURN_IF_ERROR(VExpr::prepare(exprs, state, row_desc()));
     }
 
     // Prepare result expr lists.
     for (int i = 0; i < _child_expr_lists.size(); ++i) {
-        RETURN_IF_ERROR(VExpr::prepare(_child_expr_lists[i], state, child(i)->row_desc(),
-                                       expr_mem_tracker()));
+        RETURN_IF_ERROR(VExpr::prepare(_child_expr_lists[i], state, child(i)->row_desc()));
     }
     return Status::OK();
 }
diff --git a/be/src/vec/exprs/vectorized_agg_fn.cpp b/be/src/vec/exprs/vectorized_agg_fn.cpp
index db6fe29b63..6882788e2c 100644
--- a/be/src/vec/exprs/vectorized_agg_fn.cpp
+++ b/be/src/vec/exprs/vectorized_agg_fn.cpp
@@ -68,15 +68,14 @@ Status AggFnEvaluator::create(ObjectPool* pool, const TExpr& desc, AggFnEvaluato
 
 Status AggFnEvaluator::prepare(RuntimeState* state, const RowDescriptor& desc, MemPool* pool,
                                const SlotDescriptor* intermediate_slot_desc,
-                               const SlotDescriptor* output_slot_desc,
-                               const std::shared_ptr& mem_tracker) {
+                               const SlotDescriptor* output_slot_desc) {
     DCHECK(pool != nullptr);
     DCHECK(intermediate_slot_desc != nullptr);
     DCHECK(_intermediate_slot_desc == nullptr);
     _output_slot_desc = output_slot_desc;
     _intermediate_slot_desc = intermediate_slot_desc;
 
-    Status status = VExpr::prepare(_input_exprs_ctxs, state, desc, mem_tracker);
+    Status status = VExpr::prepare(_input_exprs_ctxs, state, desc);
     RETURN_IF_ERROR(status);
 
     std::vector child_expr_name;
diff --git a/be/src/vec/exprs/vectorized_agg_fn.h b/be/src/vec/exprs/vectorized_agg_fn.h
index 9a1dbafdcc..8a4f777355 100644
--- a/be/src/vec/exprs/vectorized_agg_fn.h
+++ b/be/src/vec/exprs/vectorized_agg_fn.h
@@ -33,8 +33,7 @@ public:
 
     Status prepare(RuntimeState* state, const RowDescriptor& desc, MemPool* pool,
                    const SlotDescriptor* intermediate_slot_desc,
-                   const SlotDescriptor* output_slot_desc,
-                   const std::shared_ptr& mem_tracker);
+                   const SlotDescriptor* output_slot_desc);
 
     void set_timer(RuntimeProfile::Counter* exec_timer, RuntimeProfile::Counter* merge_timer,
                    RuntimeProfile::Counter* expr_timer) {
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index c2da03f0a0..909feb324f 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -224,9 +224,9 @@ Status VExpr::create_expr_trees(ObjectPool* pool, const std::vector& ctxs, RuntimeState* state,
-                      const RowDescriptor& row_desc, const std::shared_ptr& tracker) {
+                      const RowDescriptor& row_desc) {
     for (int i = 0; i < ctxs.size(); ++i) {
-        RETURN_IF_ERROR(ctxs[i]->prepare(state, row_desc, tracker));
+        RETURN_IF_ERROR(ctxs[i]->prepare(state, row_desc));
     }
     return Status::OK();
 }
diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h
index 7d4fd5c9f0..b6921753e8 100644
--- a/be/src/vec/exprs/vexpr.h
+++ b/be/src/vec/exprs/vexpr.h
@@ -98,8 +98,7 @@ public:
                                     std::vector* ctxs);
 
     static Status prepare(const std::vector& ctxs, RuntimeState* state,
-                          const RowDescriptor& row_desc,
-                          const std::shared_ptr& tracker);
+                          const RowDescriptor& row_desc);
 
     static Status open(const std::vector& ctxs, RuntimeState* state);
 
diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp
index 00d63173c8..dfb4223741 100644
--- a/be/src/vec/exprs/vexpr_context.cpp
+++ b/be/src/vec/exprs/vexpr_context.cpp
@@ -45,16 +45,9 @@ doris::Status VExprContext::execute(doris::vectorized::Block* block, int* result
 }
 
 doris::Status VExprContext::prepare(doris::RuntimeState* state,
-                                    const doris::RowDescriptor& row_desc,
-                                    const std::shared_ptr& tracker) {
+                                    const doris::RowDescriptor& row_desc) {
     _prepared = true;
-    if (!tracker) {
-        _mem_tracker = tls_ctx()->_thread_mem_tracker_mgr->mem_tracker();
-    } else {
-        _mem_tracker = tracker;
-    }
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
-    _pool.reset(new MemPool(_mem_tracker.get()));
+    _pool.reset(new MemPool());
     return _root->prepare(state, row_desc, this);
 }
 
@@ -63,7 +56,6 @@ doris::Status VExprContext::open(doris::RuntimeState* state) {
     if (_opened) {
         return Status::OK();
     }
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
     _opened = true;
     // Fragment-local state is only initialized for original contexts. Clones inherit the
     // original's fragment state and only need to have thread-local state initialized.
@@ -92,10 +84,9 @@ doris::Status VExprContext::clone(RuntimeState* state, VExprContext** new_ctx) {
     DCHECK(_prepared);
     DCHECK(_opened);
     DCHECK(*new_ctx == nullptr);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
 
     *new_ctx = state->obj_pool()->add(new VExprContext(_root));
-    (*new_ctx)->_pool.reset(new MemPool(_pool->mem_tracker()));
+    (*new_ctx)->_pool.reset(new MemPool());
     for (auto& _fn_context : _fn_contexts) {
         (*new_ctx)->_fn_contexts.push_back(_fn_context->impl()->clone((*new_ctx)->_pool.get()));
     }
@@ -103,7 +94,6 @@ doris::Status VExprContext::clone(RuntimeState* state, VExprContext** new_ctx) {
     (*new_ctx)->_is_clone = true;
     (*new_ctx)->_prepared = true;
     (*new_ctx)->_opened = true;
-    (*new_ctx)->_mem_tracker = _mem_tracker;
 
     return _root->open(state, *new_ctx, FunctionContext::THREAD_LOCAL);
 }
diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h
index bfa4468916..fb31925136 100644
--- a/be/src/vec/exprs/vexpr_context.h
+++ b/be/src/vec/exprs/vexpr_context.h
@@ -28,8 +28,7 @@ class VExprContext {
 public:
     VExprContext(VExpr* expr);
     ~VExprContext();
-    Status prepare(RuntimeState* state, const RowDescriptor& row_desc,
-                   const std::shared_ptr& tracker = nullptr);
+    Status prepare(RuntimeState* state, const RowDescriptor& row_desc);
     Status open(RuntimeState* state);
     void close(RuntimeState* state);
     Status clone(RuntimeState* state, VExprContext** new_ctx);
@@ -88,9 +87,7 @@ private:
     /// and owned by this VExprContext.
     std::vector _fn_contexts;
 
-    std::shared_ptr _mem_tracker;
-
-    /// Pool backing fn_contexts_. Counts against the runtime state's UDF mem tracker.
+    /// Pool backing fn_contexts_.
     std::unique_ptr _pool;
 
     int _last_result_column_id;
diff --git a/be/src/vec/runtime/vdata_stream_recvr.cpp b/be/src/vec/runtime/vdata_stream_recvr.cpp
index d7f6c62a42..19393b4499 100644
--- a/be/src/vec/runtime/vdata_stream_recvr.cpp
+++ b/be/src/vec/runtime/vdata_stream_recvr.cpp
@@ -18,7 +18,7 @@
 #include "vec/runtime/vdata_stream_recvr.h"
 
 #include "gen_cpp/data.pb.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/thread_context.h"
 #include "util/uid_util.h"
 #include "vec/core/block.h"
@@ -92,7 +92,7 @@ void VDataStreamRecvr::SenderQueue::add_block(const PBlock& pblock, int be_numbe
                                               ::google::protobuf::Closure** done) {
     // Avoid deadlock when calling SenderQueue::cancel() in tcmalloc hook,
     // limit memory via DataStreamRecvr::exceeds_limit.
-    STOP_CHECK_LIMIT_THREAD_LOCAL_MEM_TRACKER();
+    STOP_CHECK_THREAD_MEM_TRACKER_LIMIT();
     std::lock_guard l(_lock);
     if (_is_cancelled) {
         return;
@@ -126,7 +126,6 @@ void VDataStreamRecvr::SenderQueue::add_block(const PBlock& pblock, int be_numbe
         SCOPED_TIMER(_recvr->_deserialize_row_batch_timer);
         block = new Block(pblock);
     }
-    _recvr->_block_mem_tracker->consume(block->bytes());
 
     VLOG_ROW << "added #rows=" << block->rows() << " batch_size=" << block_byte_size << "\n";
     _block_queue.emplace_back(block_byte_size, block);
@@ -145,7 +144,7 @@ void VDataStreamRecvr::SenderQueue::add_block(const PBlock& pblock, int be_numbe
 void VDataStreamRecvr::SenderQueue::add_block(Block* block, bool use_move) {
     // Avoid deadlock when calling SenderQueue::cancel() in tcmalloc hook,
     // limit memory via DataStreamRecvr::exceeds_limit.
-    STOP_CHECK_LIMIT_THREAD_LOCAL_MEM_TRACKER();
+    STOP_CHECK_THREAD_MEM_TRACKER_LIMIT();
     std::unique_lock l(_lock);
     if (_is_cancelled) {
         return;
@@ -167,7 +166,6 @@ void VDataStreamRecvr::SenderQueue::add_block(Block* block, bool use_move) {
 
     size_t block_size = nblock->bytes();
     _block_queue.emplace_back(block_size, nblock);
-    _recvr->_block_mem_tracker->consume(nblock->bytes());
     _data_arrival_cv.notify_one();
 
     if (_recvr->exceeds_limit(block_size)) {
@@ -264,12 +262,9 @@ VDataStreamRecvr::VDataStreamRecvr(
           _num_buffered_bytes(0),
           _profile(profile),
           _sub_plan_query_statistics_recvr(sub_plan_query_statistics_recvr) {
-    _mem_tracker =
-            MemTracker::create_tracker(-1, "VDataStreamRecvr:" + print_id(_fragment_instance_id),
-                                       nullptr, MemTrackerLevel::VERBOSE, _profile);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
-    _block_mem_tracker = MemTracker::create_virtual_tracker(
-            -1, "VDataStreamRecvr:block:" + print_id(_fragment_instance_id), _mem_tracker);
+    _mem_tracker = std::make_unique(
+            "VDataStreamRecvr:" + print_id(_fragment_instance_id), nullptr, _profile);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
 
     // Create one queue per sender if is_merging is true.
     int num_queues = is_merging ? num_senders : 1;
@@ -292,7 +287,6 @@ VDataStreamRecvr::VDataStreamRecvr(
 
 VDataStreamRecvr::~VDataStreamRecvr() {
     DCHECK(_mgr == nullptr) << "Must call close()";
-    MemTracker::memory_leak_check(_block_mem_tracker.get(), false);
 }
 
 Status VDataStreamRecvr::create_merger(const std::vector& ordering_expr,
@@ -300,7 +294,7 @@ Status VDataStreamRecvr::create_merger(const std::vector& orderin
                                        const std::vector& nulls_first, size_t batch_size,
                                        int64_t limit, size_t offset) {
     DCHECK(_is_merging);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     std::vector child_block_suppliers;
     // Create the merger that will a single stream of sorted rows.
     _merger.reset(new VSortedRunMerger(ordering_expr, is_asc_order, nulls_first, batch_size, limit,
@@ -316,19 +310,19 @@ Status VDataStreamRecvr::create_merger(const std::vector& orderin
 
 void VDataStreamRecvr::add_block(const PBlock& pblock, int sender_id, int be_number,
                                  int64_t packet_seq, ::google::protobuf::Closure** done) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     int use_sender_id = _is_merging ? sender_id : 0;
     _sender_queues[use_sender_id]->add_block(pblock, be_number, packet_seq, done);
 }
 
 void VDataStreamRecvr::add_block(Block* block, int sender_id, bool use_move) {
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     int use_sender_id = _is_merging ? sender_id : 0;
     _sender_queues[use_sender_id]->add_block(block, use_move);
 }
 
 Status VDataStreamRecvr::get_next(Block* block, bool* eos) {
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     if (!_is_merging) {
         Block* res = nullptr;
         RETURN_IF_ERROR(_sender_queues[0]->get_batch(&res));
@@ -342,11 +336,6 @@ Status VDataStreamRecvr::get_next(Block* block, bool* eos) {
         RETURN_IF_ERROR(_merger->get_next(block, eos));
     }
 
-    if (LIKELY(_block_mem_tracker->consumption() >= block->bytes())) {
-        _block_mem_tracker->release(block->bytes());
-    } else {
-        _block_mem_tracker->release(_block_mem_tracker->consumption());
-    }
     return Status::OK();
 }
 
@@ -375,7 +364,6 @@ void VDataStreamRecvr::close() {
     _mgr = nullptr;
 
     _merger.reset();
-    _block_mem_tracker->release(_block_mem_tracker->consumption());
 }
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/runtime/vdata_stream_recvr.h b/be/src/vec/runtime/vdata_stream_recvr.h
index 9c18cb6baa..bedd18bbce 100644
--- a/be/src/vec/runtime/vdata_stream_recvr.h
+++ b/be/src/vec/runtime/vdata_stream_recvr.h
@@ -73,7 +73,6 @@ public:
     const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; }
     PlanNodeId dest_node_id() const { return _dest_node_id; }
     const RowDescriptor& row_desc() const { return _row_desc; }
-    const std::shared_ptr& mem_tracker() const { return _mem_tracker; }
 
     void add_sub_plan_statistics(const PQueryStatistics& statistics, int sender_id) {
         _sub_plan_query_statistics_recvr->insert(statistics, sender_id);
@@ -116,8 +115,7 @@ private:
     bool _is_closed;
 
     std::atomic _num_buffered_bytes;
-    std::shared_ptr _mem_tracker;
-    std::shared_ptr _block_mem_tracker;
+    std::unique_ptr _mem_tracker;
     std::vector _sender_queues;
 
     std::unique_ptr _merger;
diff --git a/be/src/vec/runtime/vpartition_info.cpp b/be/src/vec/runtime/vpartition_info.cpp
index 115227dd69..bca6f52f88 100644
--- a/be/src/vec/runtime/vpartition_info.cpp
+++ b/be/src/vec/runtime/vpartition_info.cpp
@@ -33,10 +33,9 @@ Status VPartitionInfo::from_thrift(ObjectPool* pool, const TRangePartition& t_pa
     return Status::OK();
 }
 
-Status VPartitionInfo::prepare(RuntimeState* state, const RowDescriptor& row_desc,
-                               const std::shared_ptr& mem_tracker) {
+Status VPartitionInfo::prepare(RuntimeState* state, const RowDescriptor& row_desc) {
     if (_distributed_expr_ctxs.size() > 0) {
-        RETURN_IF_ERROR(VExpr::prepare(_distributed_expr_ctxs, state, row_desc, mem_tracker));
+        RETURN_IF_ERROR(VExpr::prepare(_distributed_expr_ctxs, state, row_desc));
     }
     return Status::OK();
 }
diff --git a/be/src/vec/runtime/vpartition_info.h b/be/src/vec/runtime/vpartition_info.h
index 61ab6ad5b0..f6a98b2b67 100644
--- a/be/src/vec/runtime/vpartition_info.h
+++ b/be/src/vec/runtime/vpartition_info.h
@@ -31,8 +31,7 @@ public:
     static Status from_thrift(ObjectPool* pool, const TRangePartition& t_partition,
                               VPartitionInfo* partition);
 
-    Status prepare(RuntimeState* state, const RowDescriptor& row_desc,
-                   const std::shared_ptr& mem_tracker);
+    Status prepare(RuntimeState* state, const RowDescriptor& row_desc);
 
     Status open(RuntimeState* state);
 
diff --git a/be/src/vec/sink/vdata_stream_sender.cpp b/be/src/vec/sink/vdata_stream_sender.cpp
index f759664895..c378b3c33d 100644
--- a/be/src/vec/sink/vdata_stream_sender.cpp
+++ b/be/src/vec/sink/vdata_stream_sender.cpp
@@ -25,7 +25,7 @@
 #include "runtime/client_cache.h"
 #include "runtime/dpp_sink_internal.h"
 #include "runtime/exec_env.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/runtime_state.h"
 #include "runtime/thread_context.h"
 #include "util/proto_util.h"
@@ -396,10 +396,9 @@ Status VDataStreamSender::prepare(RuntimeState* state) {
                                     _dest_node_id, instances);
     _profile = _pool->add(new RuntimeProfile(std::move(title)));
     SCOPED_TIMER(_profile->total_time_counter());
-    _mem_tracker = MemTracker::create_tracker(
-            -1, "VDataStreamSender:" + print_id(state->fragment_instance_id()),
-            state->instance_mem_tracker(), MemTrackerLevel::VERBOSE, _profile);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    _mem_tracker = std::make_unique(
+            "VDataStreamSender:" + print_id(state->fragment_instance_id()), nullptr, _profile);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
 
     if (_part_type == TPartitionType::UNPARTITIONED || _part_type == TPartitionType::RANDOM) {
         std::random_device rd;
@@ -407,11 +406,11 @@ Status VDataStreamSender::prepare(RuntimeState* state) {
         shuffle(_channels.begin(), _channels.end(), g);
     } else if (_part_type == TPartitionType::HASH_PARTITIONED ||
                _part_type == TPartitionType::BUCKET_SHFFULE_HASH_PARTITIONED) {
-        RETURN_IF_ERROR(VExpr::prepare(_partition_expr_ctxs, state, _row_desc, _expr_mem_tracker));
+        RETURN_IF_ERROR(VExpr::prepare(_partition_expr_ctxs, state, _row_desc));
     } else {
-        RETURN_IF_ERROR(VExpr::prepare(_partition_expr_ctxs, state, _row_desc, _expr_mem_tracker));
+        RETURN_IF_ERROR(VExpr::prepare(_partition_expr_ctxs, state, _row_desc));
         for (auto iter : _partition_infos) {
-            RETURN_IF_ERROR(iter->prepare(state, _row_desc, _expr_mem_tracker));
+            RETURN_IF_ERROR(iter->prepare(state, _row_desc));
         }
     }
 
@@ -434,7 +433,7 @@ Status VDataStreamSender::prepare(RuntimeState* state) {
 Status VDataStreamSender::open(RuntimeState* state) {
     START_AND_SCOPE_SPAN(state->get_tracer(), span, "VDataStreamSender::open");
     DCHECK(state != nullptr);
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     RETURN_IF_ERROR(VExpr::open(_partition_expr_ctxs, state));
     for (auto iter : _partition_infos) {
         RETURN_IF_ERROR(iter->open(state));
@@ -449,7 +448,7 @@ Status VDataStreamSender::send(RuntimeState* state, RowBatch* batch) {
 Status VDataStreamSender::send(RuntimeState* state, Block* block) {
     INIT_AND_SCOPE_SEND_SPAN(state->get_tracer(), _send_span, "VDataStreamSender::send")
     SCOPED_TIMER(_profile->total_time_counter());
-    SCOPED_SWITCH_TASK_THREAD_LOCAL_EXISTED_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     if (_part_type == TPartitionType::UNPARTITIONED || _channels.size() == 1) {
         // 1. serialize depends on it is not local exchange
         // 2. send block
diff --git a/be/src/vec/sink/vdata_stream_sender.h b/be/src/vec/sink/vdata_stream_sender.h
index ce8a59711d..421a0daf3f 100644
--- a/be/src/vec/sink/vdata_stream_sender.h
+++ b/be/src/vec/sink/vdata_stream_sender.h
@@ -135,7 +135,7 @@ protected:
     RuntimeProfile::Counter* _uncompressed_bytes_counter;
     RuntimeProfile::Counter* _ignore_rows;
 
-    std::shared_ptr _mem_tracker;
+    std::unique_ptr _mem_tracker;
 
     // Throughput per total time spent in sender
     RuntimeProfile::Counter* _overall_throughput;
diff --git a/be/src/vec/sink/vmysql_table_sink.cpp b/be/src/vec/sink/vmysql_table_sink.cpp
index 863c127120..49d95f27ec 100644
--- a/be/src/vec/sink/vmysql_table_sink.cpp
+++ b/be/src/vec/sink/vmysql_table_sink.cpp
@@ -19,7 +19,6 @@
 
 #include 
 
-#include "runtime/mem_tracker.h"
 #include "runtime/runtime_state.h"
 #include "util/debug_util.h"
 #include "util/runtime_profile.h"
@@ -29,10 +28,7 @@ namespace doris {
 namespace vectorized {
 VMysqlTableSink::VMysqlTableSink(ObjectPool* pool, const RowDescriptor& row_desc,
                                  const std::vector& t_exprs)
-        : _pool(pool),
-          _row_desc(row_desc),
-          _t_output_expr(t_exprs),
-          _mem_tracker(MemTracker::create_tracker(-1, "VMysqlTableSink")) {
+        : _pool(pool), _row_desc(row_desc), _t_output_expr(t_exprs) {
     _name = "VMysqlTableSink";
 }
 
@@ -58,7 +54,7 @@ Status VMysqlTableSink::init(const TDataSink& t_sink) {
 Status VMysqlTableSink::prepare(RuntimeState* state) {
     RETURN_IF_ERROR(DataSink::prepare(state));
     // Prepare the exprs to run.
-    RETURN_IF_ERROR(VExpr::prepare(_output_expr_ctxs, state, _row_desc, _mem_tracker));
+    RETURN_IF_ERROR(VExpr::prepare(_output_expr_ctxs, state, _row_desc));
     std::stringstream title;
     title << "VMysqlTableSink (frag_id=" << state->fragment_instance_id() << ")";
     // create profile
diff --git a/be/src/vec/sink/vmysql_table_sink.h b/be/src/vec/sink/vmysql_table_sink.h
index 73eef3fda0..58c7dfce13 100644
--- a/be/src/vec/sink/vmysql_table_sink.h
+++ b/be/src/vec/sink/vmysql_table_sink.h
@@ -28,7 +28,6 @@ class TExpr;
 class TMysqlTableSink;
 class RuntimeState;
 class RuntimeProfile;
-class MemTracker;
 namespace vectorized {
 
 class VExprContext;
@@ -68,7 +67,6 @@ private:
     VMysqlTableWriter* _writer;
 
     RuntimeProfile* _profile;
-    std::shared_ptr _mem_tracker;
 };
 } // namespace vectorized
 } // namespace doris
diff --git a/be/src/vec/sink/vodbc_table_sink.cpp b/be/src/vec/sink/vodbc_table_sink.cpp
index c92245ec5c..04fdef8e27 100644
--- a/be/src/vec/sink/vodbc_table_sink.cpp
+++ b/be/src/vec/sink/vodbc_table_sink.cpp
@@ -19,7 +19,6 @@
 
 #include 
 
-#include "runtime/mem_tracker.h"
 #include "runtime/runtime_state.h"
 #include "util/debug_util.h"
 #include "util/runtime_profile.h"
@@ -31,10 +30,7 @@ namespace vectorized {
 
 VOdbcTableSink::VOdbcTableSink(ObjectPool* pool, const RowDescriptor& row_desc,
                                const std::vector& t_exprs)
-        : _pool(pool),
-          _row_desc(row_desc),
-          _t_output_expr(t_exprs),
-          _mem_tracker(MemTracker::create_tracker(-1, "VOdbcTableSink")) {
+        : _pool(pool), _row_desc(row_desc), _t_output_expr(t_exprs) {
     _name = "VOdbcTableSink";
 }
 
@@ -54,7 +50,7 @@ Status VOdbcTableSink::init(const TDataSink& t_sink) {
 Status VOdbcTableSink::prepare(RuntimeState* state) {
     RETURN_IF_ERROR(DataSink::prepare(state));
     // Prepare the exprs to run.
-    RETURN_IF_ERROR(VExpr::prepare(_output_expr_ctxs, state, _row_desc, _mem_tracker));
+    RETURN_IF_ERROR(VExpr::prepare(_output_expr_ctxs, state, _row_desc));
     std::stringstream title;
     title << "VOdbcTableSink (frag_id=" << state->fragment_instance_id() << ")";
     // create profile
diff --git a/be/src/vec/sink/vodbc_table_sink.h b/be/src/vec/sink/vodbc_table_sink.h
index 75c5348327..786b3cfb57 100644
--- a/be/src/vec/sink/vodbc_table_sink.h
+++ b/be/src/vec/sink/vodbc_table_sink.h
@@ -27,7 +27,6 @@ class RowDescriptor;
 class TExpr;
 class RuntimeState;
 class RuntimeProfile;
-class MemTracker;
 namespace vectorized {
 
 // This class is a sinker, which put input data to odbc table
@@ -60,7 +59,6 @@ private:
     std::vector _output_expr_ctxs;
 
     RuntimeProfile* _profile;
-    std::shared_ptr _mem_tracker;
 
     ODBCConnectorParam _odbc_param;
     std::string _odbc_tbl;
diff --git a/be/src/vec/sink/vresult_file_sink.cpp b/be/src/vec/sink/vresult_file_sink.cpp
index 4ee8ab0293..cbb8d8810c 100644
--- a/be/src/vec/sink/vresult_file_sink.cpp
+++ b/be/src/vec/sink/vresult_file_sink.cpp
@@ -86,7 +86,7 @@ Status VResultFileSink::prepare_exprs(RuntimeState* state) {
     RETURN_IF_ERROR(
             VExpr::create_expr_trees(state->obj_pool(), _t_output_expr, &_output_vexpr_ctxs));
     // Prepare the exprs to run.
-    RETURN_IF_ERROR(VExpr::prepare(_output_vexpr_ctxs, state, _row_desc, _expr_mem_tracker));
+    RETURN_IF_ERROR(VExpr::prepare(_output_vexpr_ctxs, state, _row_desc));
     return Status::OK();
 }
 
diff --git a/be/src/vec/sink/vresult_sink.cpp b/be/src/vec/sink/vresult_sink.cpp
index e8d6572509..9fe8cf8b66 100644
--- a/be/src/vec/sink/vresult_sink.cpp
+++ b/be/src/vec/sink/vresult_sink.cpp
@@ -47,7 +47,7 @@ Status VResultSink::prepare_exprs(RuntimeState* state) {
     RETURN_IF_ERROR(
             VExpr::create_expr_trees(state->obj_pool(), _t_output_expr, &_output_vexpr_ctxs));
     // Prepare the exprs to run.
-    RETURN_IF_ERROR(VExpr::prepare(_output_vexpr_ctxs, state, _row_desc, _expr_mem_tracker));
+    RETURN_IF_ERROR(VExpr::prepare(_output_vexpr_ctxs, state, _row_desc));
     return Status::OK();
 }
 Status VResultSink::prepare(RuntimeState* state) {
@@ -91,7 +91,7 @@ Status VResultSink::send(RuntimeState* state, Block* block) {
     INIT_AND_SCOPE_SEND_SPAN(state->get_tracer(), _send_span, "VResultSink::send");
     // The memory consumption in the process of sending the results is not check query memory limit.
     // Avoid the query being cancelled when the memory limit is reached after the query result comes out.
-    STOP_CHECK_LIMIT_THREAD_LOCAL_MEM_TRACKER();
+    STOP_CHECK_THREAD_MEM_TRACKER_LIMIT();
     return _writer->append_block(*block);
 }
 
diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp
index a7a20fb36f..048395eec5 100644
--- a/be/src/vec/sink/vtablet_sink.cpp
+++ b/be/src/vec/sink/vtablet_sink.cpp
@@ -159,7 +159,9 @@ Status VNodeChannel::add_row(const BlockRow& block_row, int64_t tablet_id) {
     // It's fine to do a fake add_row() and return OK, because we will check _cancelled in next add_row() or mark_close().
     while (!_cancelled &&
            (_pending_batches_bytes > _max_pending_batches_bytes ||
-            _parent->_mem_tracker->any_limit_exceeded()) &&
+            thread_context()
+                    ->_thread_mem_tracker_mgr->limiter_mem_tracker()
+                    ->any_limit_exceeded()) &&
            _pending_batches_num > 0) {
         SCOPED_ATOMIC_TIMER(&_mem_exceeded_block_ns);
         std::this_thread::sleep_for(std::chrono::milliseconds(10));
@@ -215,7 +217,7 @@ int VNodeChannel::try_send_and_fetch_status(RuntimeState* state,
 }
 
 void VNodeChannel::try_send_block(RuntimeState* state) {
-    SCOPED_ATTACH_TASK_THREAD(state, _node_channel_tracker);
+    SCOPED_ATTACH_TASK(state);
     SCOPED_ATOMIC_TIMER(&_actual_consume_ns);
     AddBlockReq send_block;
     {
@@ -363,8 +365,7 @@ Status VOlapTableSink::init(const TDataSink& sink) {
 Status VOlapTableSink::prepare(RuntimeState* state) {
     RETURN_IF_ERROR(OlapTableSink::prepare(state));
     // Prepare the exprs to run.
-    RETURN_IF_ERROR(vectorized::VExpr::prepare(_output_vexpr_ctxs, state, _input_row_desc,
-                                               _expr_mem_tracker));
+    RETURN_IF_ERROR(vectorized::VExpr::prepare(_output_vexpr_ctxs, state, _input_row_desc));
     return Status::OK();
 }
 
@@ -384,7 +385,7 @@ size_t VOlapTableSink::get_pending_bytes() const {
 }
 Status VOlapTableSink::send(RuntimeState* state, vectorized::Block* input_block) {
     INIT_AND_SCOPE_SEND_SPAN(state->get_tracer(), _send_span, "VOlapTableSink::send");
-    SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
+    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     Status status = Status::OK();
 
     auto rows = input_block->rows();
diff --git a/be/test/exec/broker_scan_node_test.cpp b/be/test/exec/broker_scan_node_test.cpp
index bc7c91dff3..8b8e376b34 100644
--- a/be/test/exec/broker_scan_node_test.cpp
+++ b/be/test/exec/broker_scan_node_test.cpp
@@ -40,7 +40,7 @@ class BrokerScanNodeTest : public testing::Test {
 public:
     BrokerScanNodeTest() : _runtime_state(TQueryGlobals()) {
         init();
-        _runtime_state._instance_mem_tracker.reset(new MemTracker());
+        _runtime_state.init_instance_mem_tracker();
     }
     void init();
     static void SetUpTestCase() {
diff --git a/be/test/exec/broker_scanner_test.cpp b/be/test/exec/broker_scanner_test.cpp
index 0a65982058..d750370aff 100644
--- a/be/test/exec/broker_scanner_test.cpp
+++ b/be/test/exec/broker_scanner_test.cpp
@@ -29,7 +29,6 @@
 #include "gen_cpp/PlanNodes_types.h"
 #include "io/local_file_reader.h"
 #include "runtime/descriptors.h"
-#include "runtime/mem_tracker.h"
 #include "runtime/runtime_state.h"
 #include "runtime/tuple.h"
 #include "runtime/user_function_cache.h"
@@ -38,10 +37,10 @@ namespace doris {
 
 class BrokerScannerTest : public testing::Test {
 public:
-    BrokerScannerTest() : _tracker(new MemTracker()), _runtime_state(TQueryGlobals()) {
+    BrokerScannerTest() : _runtime_state(TQueryGlobals()) {
         init();
         _profile = _runtime_state.runtime_profile();
-        _runtime_state._instance_mem_tracker.reset(new MemTracker());
+        _runtime_state.init_instance_mem_tracker();
     }
     void init();
 
@@ -59,7 +58,6 @@ private:
     void init_desc_table();
     void init_params();
 
-    std::shared_ptr _tracker;
     RuntimeState _runtime_state;
     RuntimeProfile* _profile;
     ObjectPool _obj_pool;
@@ -363,7 +361,7 @@ TEST_F(BrokerScannerTest, normal) {
     auto st = scanner.open();
     EXPECT_TRUE(st.ok());
 
-    MemPool tuple_pool(_tracker.get());
+    MemPool tuple_pool;
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool fill_tuple;
     bool eof = false;
@@ -424,7 +422,7 @@ TEST_F(BrokerScannerTest, normal2) {
     auto st = scanner.open();
     EXPECT_TRUE(st.ok());
 
-    MemPool tuple_pool(_tracker.get());
+    MemPool tuple_pool;
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool fill_tuple;
     bool eof = false;
@@ -479,7 +477,7 @@ TEST_F(BrokerScannerTest, normal3) {
     auto st = scanner.open();
     EXPECT_TRUE(st.ok());
 
-    MemPool tuple_pool(_tracker.get());
+    MemPool tuple_pool;
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool fill_tuple;
     bool eof = false;
@@ -536,7 +534,7 @@ TEST_F(BrokerScannerTest, normal4) {
     auto st = scanner.open();
     EXPECT_TRUE(st.ok());
 
-    MemPool tuple_pool(_tracker.get());
+    MemPool tuple_pool;
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool fill_tuple;
     bool eof = false;
@@ -569,7 +567,7 @@ TEST_F(BrokerScannerTest, normal5) {
     auto st = scanner.open();
     EXPECT_TRUE(st.ok());
 
-    MemPool tuple_pool(_tracker.get());
+    MemPool tuple_pool;
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool fill_tuple;
     bool eof = false;
@@ -595,7 +593,7 @@ TEST_F(BrokerScannerTest, normal6) {
     auto st = scanner.open();
     EXPECT_TRUE(st.ok());
 
-    MemPool tuple_pool(_tracker.get());
+    MemPool tuple_pool;
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool fill_tuple;
     bool eof = false;
@@ -628,7 +626,7 @@ TEST_F(BrokerScannerTest, normal7) {
     auto st = scanner.open();
     EXPECT_TRUE(st.ok());
 
-    MemPool tuple_pool(_tracker.get());
+    MemPool tuple_pool;
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool fill_tuple;
     bool eof = false;
@@ -654,7 +652,7 @@ TEST_F(BrokerScannerTest, normal8) {
     auto st = scanner.open();
     EXPECT_TRUE(st.ok());
 
-    MemPool tuple_pool(_tracker.get());
+    MemPool tuple_pool;
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool fill_tuple;
     bool eof = false;
@@ -687,7 +685,7 @@ TEST_F(BrokerScannerTest, normal9) {
     auto st = scanner.open();
     EXPECT_TRUE(st.ok());
 
-    MemPool tuple_pool(_tracker.get());
+    MemPool tuple_pool;
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool fill_tuple;
     bool eof = false;
@@ -717,7 +715,7 @@ TEST_F(BrokerScannerTest, multi_bytes_1) {
     auto st = scanner.open();
     EXPECT_TRUE(st.ok());
 
-    MemPool tuple_pool(_tracker.get());
+    MemPool tuple_pool;
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool fill_tuple;
     bool eof = false;
diff --git a/be/test/exec/es_http_scan_node_test.cpp b/be/test/exec/es_http_scan_node_test.cpp
index fdcbb89426..8dc0e71b79 100644
--- a/be/test/exec/es_http_scan_node_test.cpp
+++ b/be/test/exec/es_http_scan_node_test.cpp
@@ -40,7 +40,7 @@ namespace doris {
 class EsHttpScanNodeTest : public testing::Test {
 public:
     EsHttpScanNodeTest() : _runtime_state(TQueryGlobals()) {
-        _runtime_state._instance_mem_tracker.reset(new MemTracker());
+        _runtime_state.init_instance_mem_tracker();
         TDescriptorTable t_desc_table;
 
         // table descriptors
diff --git a/be/test/exec/es_predicate_test.cpp b/be/test/exec/es_predicate_test.cpp
index 966b0c0eff..8cf009b991 100644
--- a/be/test/exec/es_predicate_test.cpp
+++ b/be/test/exec/es_predicate_test.cpp
@@ -31,7 +31,7 @@
 #include "rapidjson/rapidjson.h"
 #include "rapidjson/stringbuffer.h"
 #include "rapidjson/writer.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/primitive_type.h"
 #include "runtime/runtime_state.h"
 #include "runtime/string_value.h"
@@ -43,7 +43,7 @@ class RuntimeState;
 class EsPredicateTest : public testing::Test {
 public:
     EsPredicateTest() : _runtime_state(TQueryGlobals()) {
-        _runtime_state._instance_mem_tracker.reset(new MemTracker());
+        _runtime_state.init_instance_mem_tracker();
         TDescriptorTable t_desc_table;
 
         // table descriptors
diff --git a/be/test/exec/hash_table_test.cpp b/be/test/exec/hash_table_test.cpp
index c94fcceb77..2a5e8f62d6 100644
--- a/be/test/exec/hash_table_test.cpp
+++ b/be/test/exec/hash_table_test.cpp
@@ -33,7 +33,7 @@
 #include "exprs/slot_ref.h"
 #include "runtime/exec_env.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/runtime_state.h"
 #include "runtime/string_value.h"
 #include "runtime/test_env.h"
@@ -47,9 +47,7 @@ namespace doris {
 class HashTableTest : public testing::Test {
 public:
     HashTableTest() {
-        _tracker = MemTracker::create_tracker(-1, "root");
-        _pool_tracker = MemTracker::create_tracker(-1, "mem-pool", _tracker);
-        _mem_pool.reset(new MemPool(_pool_tracker.get()));
+        _mem_pool.reset(new MemPool());
         _state = _pool.add(new RuntimeState(TQueryGlobals()));
         _state->init_instance_mem_tracker();
         _state->_exec_env = ExecEnv::GetInstance();
@@ -57,8 +55,6 @@ public:
 
 protected:
     RuntimeState* _state;
-    std::shared_ptr _tracker;
-    std::shared_ptr _pool_tracker;
     ObjectPool _pool;
     std::shared_ptr _mem_pool;
     std::vector _build_expr;
@@ -71,12 +67,12 @@ protected:
 
         auto build_slot_ref = _pool.add(new SlotRef(int_desc, 0));
         _build_expr.push_back(_pool.add(new ExprContext(build_slot_ref)));
-        status = Expr::prepare(_build_expr, _state, desc, _tracker);
+        status = Expr::prepare(_build_expr, _state, desc);
         EXPECT_TRUE(status.ok());
 
         auto probe_slot_ref = _pool.add(new SlotRef(int_desc, 0));
         _probe_expr.push_back(_pool.add(new ExprContext(probe_slot_ref)));
-        status = Expr::prepare(_probe_expr, _state, desc, _tracker);
+        status = Expr::prepare(_probe_expr, _state, desc);
         EXPECT_TRUE(status.ok());
     }
 
@@ -195,9 +191,6 @@ TEST_F(HashTableTest, SetupTest) {
 // testing for probe rows that are both there and not.
 // The hash table is rehashed a few times and the scans/finds are tested again.
 TEST_F(HashTableTest, BasicTest) {
-    std::shared_ptr hash_table_tracker =
-            MemTracker::create_tracker(-1, "hash-table-basic-tracker", _tracker);
-
     TupleRow* build_rows[5];
     TupleRow* scan_rows[5] = {0};
 
@@ -219,7 +212,7 @@ TEST_F(HashTableTest, BasicTest) {
     int initial_seed = 1;
     int64_t num_buckets = 4;
     HashTable hash_table(_build_expr, _probe_expr, 1, false, is_null_safe, initial_seed,
-                         hash_table_tracker, num_buckets);
+                         num_buckets);
 
     for (int i = 0; i < 5; ++i) {
         hash_table.insert(build_rows[i]);
@@ -259,14 +252,11 @@ TEST_F(HashTableTest, BasicTest) {
 
 // This tests makes sure we can scan ranges of buckets
 TEST_F(HashTableTest, ScanTest) {
-    std::shared_ptr hash_table_tracker =
-            MemTracker::create_tracker(-1, "hash-table-scan-tracker", _tracker);
-
     std::vector is_null_safe = {false};
     int initial_seed = 1;
     int64_t num_buckets = 4;
     HashTable hash_table(_build_expr, _probe_expr, 1, false, is_null_safe, initial_seed,
-                         hash_table_tracker, num_buckets);
+                         num_buckets);
     // Add 1 row with val 1, 2 with val 2, etc
     std::vector build_rows;
     ProbeTestData probe_rows[15];
@@ -313,14 +303,13 @@ TEST_F(HashTableTest, GrowTableTest) {
     int num_to_add = LOOP_LESS_OR_MORE(2, 4);
     int expected_size = 0;
 
-    std::shared_ptr mem_tracker =
-            MemTracker::create_tracker(1024 * 1024, "hash-table-grow-tracker", _tracker);
+    int mem_limit = 1024 * 1024;
     std::vector is_null_safe = {false};
     int initial_seed = 1;
     int64_t num_buckets = 4;
     HashTable hash_table(_build_expr, _probe_expr, 1, false, is_null_safe, initial_seed,
-                         mem_tracker, num_buckets);
-    EXPECT_FALSE(mem_tracker->limit_exceeded());
+                         num_buckets);
+    EXPECT_FALSE(hash_table.mem_tracker()->limit_exceeded(mem_limit));
 
     for (int i = 0; i < LOOP_LESS_OR_MORE(1, 20); ++i) {
         for (int j = 0; j < num_to_add; ++build_row_val, ++j) {
@@ -331,9 +320,10 @@ TEST_F(HashTableTest, GrowTableTest) {
         num_to_add *= 2;
         EXPECT_EQ(hash_table.size(), expected_size);
     }
-    LOG(INFO) << "consume:" << mem_tracker->consumption() << ",expected_size:" << expected_size;
+    LOG(INFO) << "consume:" << hash_table.mem_tracker()->consumption()
+              << ",expected_size:" << expected_size;
 
-    EXPECT_EQ(LOOP_LESS_OR_MORE(0, 1), mem_tracker->limit_exceeded());
+    EXPECT_EQ(LOOP_LESS_OR_MORE(0, 1), hash_table.mem_tracker()->limit_exceeded(mem_limit));
 
     // Validate that we can find the entries
     for (int i = 0; i < expected_size * 5; i += 100000) {
@@ -354,13 +344,11 @@ TEST_F(HashTableTest, GrowTableTest) {
 TEST_F(HashTableTest, GrowTableTest2) {
     int build_row_val = 0;
 
-    std::shared_ptr mem_tracker =
-            MemTracker::create_tracker(1024 * 1024 * 1024, "hash-table-grow2-tracker", _tracker);
     std::vector is_null_safe = {false};
     int initial_seed = 1;
     int64_t num_buckets = 4;
     HashTable hash_table(_build_expr, _probe_expr, 1, false, is_null_safe, initial_seed,
-                         mem_tracker, num_buckets);
+                         num_buckets);
 
     LOG(INFO) << time(nullptr);
 
diff --git a/be/test/exec/json_scanner_test.cpp b/be/test/exec/json_scanner_test.cpp
index 744746f7b7..5daa4ef19a 100644
--- a/be/test/exec/json_scanner_test.cpp
+++ b/be/test/exec/json_scanner_test.cpp
@@ -42,7 +42,7 @@ class JsonScannerTest : public testing::Test {
 public:
     JsonScannerTest() : _runtime_state(TQueryGlobals()) {
         init();
-        _runtime_state._instance_mem_tracker.reset(new MemTracker());
+        _runtime_state.init_instance_mem_tracker();
         _runtime_state._exec_env = ExecEnv::GetInstance();
     }
     void init();
diff --git a/be/test/exec/json_scanner_with_jsonpath_test.cpp b/be/test/exec/json_scanner_with_jsonpath_test.cpp
index 0394de1035..578cd10443 100644
--- a/be/test/exec/json_scanner_with_jsonpath_test.cpp
+++ b/be/test/exec/json_scanner_with_jsonpath_test.cpp
@@ -41,7 +41,7 @@ class JsonScannerWithJsonPathTest : public testing::Test {
 public:
     JsonScannerWithJsonPathTest() : _runtime_state(TQueryGlobals()) {
         init();
-        _runtime_state._instance_mem_tracker.reset(new MemTracker());
+        _runtime_state.init_instance_mem_tracker();
         _runtime_state._exec_env = ExecEnv::GetInstance();
     }
     void init();
diff --git a/be/test/exec/multi_bytes_separator_test.cpp b/be/test/exec/multi_bytes_separator_test.cpp
index 614d9f279f..d514d3b538 100644
--- a/be/test/exec/multi_bytes_separator_test.cpp
+++ b/be/test/exec/multi_bytes_separator_test.cpp
@@ -28,7 +28,7 @@
 #include "gen_cpp/PlanNodes_types.h"
 #include "io/local_file_reader.h"
 #include "runtime/descriptors.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/runtime_state.h"
 #include "runtime/tuple.h"
 #include "runtime/user_function_cache.h"
diff --git a/be/test/exec/orc_scanner_test.cpp b/be/test/exec/orc_scanner_test.cpp
index cd1023e692..bcc4d79e53 100644
--- a/be/test/exec/orc_scanner_test.cpp
+++ b/be/test/exec/orc_scanner_test.cpp
@@ -44,7 +44,7 @@ class OrcScannerTest : public testing::Test {
 public:
     OrcScannerTest() : _runtime_state(TQueryGlobals()) {
         _profile = _runtime_state.runtime_profile();
-        _runtime_state._instance_mem_tracker.reset(new MemTracker());
+        _runtime_state.init_instance_mem_tracker();
     }
 
     static void SetUpTestCase() {
@@ -411,8 +411,7 @@ TEST_F(OrcScannerTest, normal) {
                        &_counter);
     EXPECT_TRUE(scanner.open().ok());
 
-    auto tracker = std::make_shared();
-    MemPool tuple_pool(tracker.get());
+    MemPool tuple_pool;
 
     Tuple* tuple = (Tuple*)tuple_pool.allocate(_desc_tbl->get_tuple_descriptor(1)->byte_size());
     bool eof = false;
@@ -535,8 +534,7 @@ TEST_F(OrcScannerTest, normal2) {
                        &_counter);
     EXPECT_TRUE(scanner.open().ok());
 
-    auto tracker = std::make_shared();
-    MemPool tuple_pool(tracker.get());
+    MemPool tuple_pool;
 
     Tuple* tuple = (Tuple*)tuple_pool.allocate(_desc_tbl->get_tuple_descriptor(1)->byte_size());
     bool eof = false;
@@ -885,8 +883,7 @@ TEST_F(OrcScannerTest, normal3) {
                        &_counter);
     EXPECT_TRUE(scanner.open().ok());
 
-    auto tracker = std::make_shared();
-    MemPool tuple_pool(tracker.get());
+    MemPool tuple_pool;
 
     Tuple* tuple = (Tuple*)tuple_pool.allocate(_desc_tbl->get_tuple_descriptor(1)->byte_size());
     bool eof = false;
diff --git a/be/test/exec/parquet_scanner_test.cpp b/be/test/exec/parquet_scanner_test.cpp
index 5b1020224e..35d0f6e359 100644
--- a/be/test/exec/parquet_scanner_test.cpp
+++ b/be/test/exec/parquet_scanner_test.cpp
@@ -40,7 +40,7 @@ class ParquetScannerTest : public testing::Test {
 public:
     ParquetScannerTest() : _runtime_state(TQueryGlobals()) {
         init();
-        _runtime_state._instance_mem_tracker.reset(new MemTracker());
+        _runtime_state.init_instance_mem_tracker();
     }
     void init();
     static void SetUpTestCase() {
diff --git a/be/test/exec/tablet_info_test.cpp b/be/test/exec/tablet_info_test.cpp
index 423737d86e..93144476c6 100644
--- a/be/test/exec/tablet_info_test.cpp
+++ b/be/test/exec/tablet_info_test.cpp
@@ -20,7 +20,7 @@
 #include 
 
 #include "runtime/descriptor_helper.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/row_batch.h"
 #include "runtime/tuple_row.h"
 
diff --git a/be/test/exec/tablet_sink_test.cpp b/be/test/exec/tablet_sink_test.cpp
index ff33954e7e..812313750f 100644
--- a/be/test/exec/tablet_sink_test.cpp
+++ b/be/test/exec/tablet_sink_test.cpp
@@ -26,6 +26,7 @@
 #include "runtime/decimalv2_value.h"
 #include "runtime/descriptor_helper.h"
 #include "runtime/exec_env.h"
+#include "runtime/memory/mem_tracker_task_pool.h"
 #include "runtime/result_queue_mgr.h"
 #include "runtime/row_batch.h"
 #include "runtime/runtime_state.h"
@@ -57,7 +58,7 @@ public:
         _env->_internal_client_cache = new BrpcClientCache();
         _env->_function_client_cache = new BrpcClientCache();
         _env->_buffer_reservation = new ReservationTracker();
-        _env->_task_pool_mem_tracker_registry.reset(new MemTrackerTaskPool());
+        _env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool();
         ThreadPoolBuilder("SendBatchThreadPool")
                 .set_min_threads(1)
                 .set_max_threads(5)
@@ -74,6 +75,7 @@ public:
         SAFE_DELETE(_env->_master_info);
         SAFE_DELETE(_env->_thread_mgr);
         SAFE_DELETE(_env->_buffer_reservation);
+        SAFE_DELETE(_env->_task_pool_mem_tracker_registry);
         if (_server) {
             _server->Stop(100);
             _server->Join();
@@ -383,8 +385,6 @@ TEST_F(OlapTableSinkTest, normal) {
     query_options.batch_size = 1;
     RuntimeState state(fragment_id, query_options, TQueryGlobals(), _env);
     state.init_mem_trackers(TUniqueId());
-    // state._query_mem_tracker.reset(new MemTracker());
-    // state._instance_mem_tracker.reset(new MemTracker(-1, "test", state._query_mem_tracker.get()));
 
     ObjectPool obj_pool;
     TDescriptorTable tdesc_tbl;
diff --git a/be/test/olap/aggregate_func_test.cpp b/be/test/olap/aggregate_func_test.cpp
index 42a80eee8f..687048419b 100644
--- a/be/test/olap/aggregate_func_test.cpp
+++ b/be/test/olap/aggregate_func_test.cpp
@@ -23,7 +23,6 @@
 #include "olap/decimal12.h"
 #include "olap/uint24.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 
 namespace doris {
 
@@ -39,8 +38,7 @@ void test_min() {
     static const size_t kValSize = sizeof(CppType) + 1; // '1' represent the leading bool flag.
     char buf[64];
 
-    std::shared_ptr tracker(new MemTracker(-1));
-    std::unique_ptr mem_pool(new MemPool(tracker.get()));
+    std::unique_ptr mem_pool(new MemPool());
     ObjectPool agg_object_pool;
     const AggregateInfo* agg = get_aggregate_info(OLAP_FIELD_AGGREGATION_MIN, field_type);
 
@@ -115,8 +113,7 @@ void test_max() {
 
     char buf[64];
 
-    std::shared_ptr tracker(new MemTracker(-1));
-    std::unique_ptr mem_pool(new MemPool(tracker.get()));
+    std::unique_ptr mem_pool(new MemPool());
     ObjectPool agg_object_pool;
     const AggregateInfo* agg = get_aggregate_info(OLAP_FIELD_AGGREGATION_MAX, field_type);
 
@@ -191,8 +188,7 @@ void test_sum() {
     char buf[64];
     RowCursorCell dst(buf);
 
-    std::shared_ptr tracker(new MemTracker(-1));
-    std::unique_ptr mem_pool(new MemPool(tracker.get()));
+    std::unique_ptr mem_pool(new MemPool());
     ObjectPool agg_object_pool;
     const AggregateInfo* agg = get_aggregate_info(OLAP_FIELD_AGGREGATION_SUM, field_type);
 
@@ -266,8 +262,7 @@ void test_replace() {
     char buf[64];
     RowCursorCell dst(buf);
 
-    std::shared_ptr tracker(new MemTracker(-1));
-    std::unique_ptr mem_pool(new MemPool(tracker.get()));
+    std::unique_ptr mem_pool(new MemPool());
     ObjectPool agg_object_pool;
     const AggregateInfo* agg = get_aggregate_info(OLAP_FIELD_AGGREGATION_REPLACE, field_type);
 
@@ -325,8 +320,7 @@ void test_replace_string() {
     dst_slice->data = nullptr;
     dst_slice->size = 0;
 
-    std::shared_ptr tracker(new MemTracker(-1));
-    std::unique_ptr mem_pool(new MemPool(tracker.get()));
+    std::unique_ptr mem_pool(new MemPool());
     ObjectPool agg_object_pool;
     const AggregateInfo* agg = get_aggregate_info(OLAP_FIELD_AGGREGATION_REPLACE, field_type);
 
diff --git a/be/test/olap/block_column_predicate_test.cpp b/be/test/olap/block_column_predicate_test.cpp
index a027196c0c..2d488e0791 100644
--- a/be/test/olap/block_column_predicate_test.cpp
+++ b/be/test/olap/block_column_predicate_test.cpp
@@ -34,10 +34,7 @@ namespace doris {
 
 class BlockColumnPredicateTest : public testing::Test {
 public:
-    BlockColumnPredicateTest() {
-        _mem_tracker.reset(new MemTracker(-1));
-        _mem_pool.reset(new MemPool(_mem_tracker.get()));
-    }
+    BlockColumnPredicateTest() { _mem_pool.reset(new MemPool()); }
 
     ~BlockColumnPredicateTest() = default;
 
@@ -65,7 +62,6 @@ public:
         _row_block.reset(new RowBlockV2(schema, size));
     }
 
-    std::shared_ptr _mem_tracker;
     std::unique_ptr _mem_pool;
     std::unique_ptr _row_block;
 };
diff --git a/be/test/olap/bloom_filter_column_predicate_test.cpp b/be/test/olap/bloom_filter_column_predicate_test.cpp
index c42ab8d6cd..3d697ecfe3 100644
--- a/be/test/olap/bloom_filter_column_predicate_test.cpp
+++ b/be/test/olap/bloom_filter_column_predicate_test.cpp
@@ -37,10 +37,7 @@ namespace doris {
 
 class TestBloomFilterColumnPredicate : public testing::Test {
 public:
-    TestBloomFilterColumnPredicate() : _row_block(nullptr) {
-        _mem_tracker.reset(new MemTracker(-1));
-        _mem_pool.reset(new MemPool(_mem_tracker.get()));
-    }
+    TestBloomFilterColumnPredicate() : _row_block(nullptr) { _mem_pool.reset(new MemPool()); }
 
     ~TestBloomFilterColumnPredicate() {}
 
@@ -69,7 +66,6 @@ public:
         _row_block.reset(new RowBlockV2(schema, size));
     }
 
-    std::shared_ptr _mem_tracker;
     std::unique_ptr _mem_pool;
     std::unique_ptr _row_block;
 };
diff --git a/be/test/olap/column_vector_test.cpp b/be/test/olap/column_vector_test.cpp
index 19a28088d8..35ba271f4b 100644
--- a/be/test/olap/column_vector_test.cpp
+++ b/be/test/olap/column_vector_test.cpp
@@ -24,20 +24,18 @@
 #include "olap/types.cpp"
 #include "runtime/collection_value.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 
 namespace doris {
 
 class ColumnVectorTest : public testing::Test {
 public:
-    ColumnVectorTest() : _pool(&_tracker) {}
+    ColumnVectorTest() : _pool() {}
 
 protected:
     void SetUp() {}
     void TearDown() {}
 
 private:
-    MemTracker _tracker;
     MemPool _pool;
 };
 
diff --git a/be/test/olap/comparison_predicate_test.cpp b/be/test/olap/comparison_predicate_test.cpp
index 23538dd6fc..6a59c4abb3 100644
--- a/be/test/olap/comparison_predicate_test.cpp
+++ b/be/test/olap/comparison_predicate_test.cpp
@@ -88,10 +88,7 @@ static std::string to_datetime_string(uint64_t& datetime_value) {
 #define TEST_PREDICATE_DEFINITION(CLASS_NAME)                                                     \
     class CLASS_NAME : public testing::Test {                                                     \
     public:                                                                                       \
-        CLASS_NAME() {                                                                            \
-            _mem_tracker.reset(new MemTracker(-1));                                               \
-            _mem_pool.reset(new MemPool(_mem_tracker.get()));                                     \
-        }                                                                                         \
+        CLASS_NAME() { _mem_pool.reset(new MemPool()); }                                          \
         ~CLASS_NAME() {}                                                                          \
         void SetTabletSchema(std::string name, const std::string& type,                           \
                              const std::string& aggregation, uint32_t length, bool is_allow_null, \
@@ -116,7 +113,6 @@ static std::string to_datetime_string(uint64_t& datetime_value) {
             Schema schema(*tablet_schema);                                                        \
             _row_block.reset(new RowBlockV2(schema, size));                                       \
         }                                                                                         \
-        std::shared_ptr _mem_tracker;                                                 \
         std::unique_ptr _mem_pool;                                                       \
         std::unique_ptr _row_block;                                                   \
     };
diff --git a/be/test/olap/delta_writer_test.cpp b/be/test/olap/delta_writer_test.cpp
index 4e5e05e600..83502b24b6 100644
--- a/be/test/olap/delta_writer_test.cpp
+++ b/be/test/olap/delta_writer_test.cpp
@@ -34,7 +34,6 @@
 #include "runtime/descriptor_helper.h"
 #include "runtime/exec_env.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 #include "runtime/tuple.h"
 #include "util/file_utils.h"
 #include "util/logging.h"
@@ -433,8 +432,7 @@ TEST_F(TestDeltaWriter, write) {
     DeltaWriter::open(&write_req, &delta_writer);
     EXPECT_NE(delta_writer, nullptr);
 
-    auto tracker = std::make_shared();
-    MemPool pool(tracker.get());
+    MemPool pool;
     // Tuple 1
     {
         Tuple* tuple = reinterpret_cast(pool.allocate(tuple_desc->byte_size()));
@@ -554,8 +552,7 @@ TEST_F(TestDeltaWriter, vec_write) {
     DeltaWriter::open(&write_req, &delta_writer, true);
     ASSERT_NE(delta_writer, nullptr);
 
-    auto tracker = std::make_shared();
-    MemPool pool(tracker.get());
+    MemPool pool;
 
     vectorized::Block block;
     for (const auto& slot_desc : tuple_desc->slots()) {
@@ -701,8 +698,7 @@ TEST_F(TestDeltaWriter, sequence_col) {
     DeltaWriter::open(&write_req, &delta_writer);
     EXPECT_NE(delta_writer, nullptr);
 
-    MemTracker tracker;
-    MemPool pool(&tracker);
+    MemPool pool;
     // Tuple 1
     {
         Tuple* tuple = reinterpret_cast(pool.allocate(tuple_desc->byte_size()));
@@ -769,8 +765,7 @@ TEST_F(TestDeltaWriter, vec_sequence_col) {
     DeltaWriter::open(&write_req, &delta_writer, true);
     ASSERT_NE(delta_writer, nullptr);
 
-    MemTracker tracker;
-    MemPool pool(&tracker);
+    MemPool pool;
 
     vectorized::Block block;
     for (const auto& slot_desc : tuple_desc->slots()) {
diff --git a/be/test/olap/engine_storage_migration_task_test.cpp b/be/test/olap/engine_storage_migration_task_test.cpp
index 5eaf26392d..927b257d43 100644
--- a/be/test/olap/engine_storage_migration_task_test.cpp
+++ b/be/test/olap/engine_storage_migration_task_test.cpp
@@ -35,7 +35,6 @@
 #include "runtime/descriptor_helper.h"
 #include "runtime/exec_env.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 #include "runtime/tuple.h"
 #include "util/file_utils.h"
 #include "util/logging.h"
@@ -176,8 +175,7 @@ TEST_F(TestEngineStorageMigrationTask, write_and_migration) {
     DeltaWriter::open(&write_req, &delta_writer);
     EXPECT_NE(delta_writer, nullptr);
 
-    MemTracker tracker;
-    MemPool pool(&tracker);
+    MemPool pool;
     // Tuple 1
     {
         Tuple* tuple = reinterpret_cast(pool.allocate(tuple_desc->byte_size()));
diff --git a/be/test/olap/in_list_predicate_test.cpp b/be/test/olap/in_list_predicate_test.cpp
index b3f463bdd0..73a77654d1 100644
--- a/be/test/olap/in_list_predicate_test.cpp
+++ b/be/test/olap/in_list_predicate_test.cpp
@@ -102,10 +102,7 @@ static std::string to_datetime_string(uint64_t& datetime_value) {
 
 class TestInListPredicate : public testing::Test {
 public:
-    TestInListPredicate() : _row_block(nullptr) {
-        _mem_tracker.reset(new MemTracker(-1));
-        _mem_pool.reset(new MemPool(_mem_tracker.get()));
-    }
+    TestInListPredicate() : _row_block(nullptr) { _mem_pool.reset(new MemPool()); }
 
     ~TestInListPredicate() {}
 
@@ -134,7 +131,6 @@ public:
         _row_block.reset(new RowBlockV2(*_schema, size));
     }
 
-    std::shared_ptr _mem_tracker;
     std::unique_ptr _mem_pool;
     std::unique_ptr _row_block;
     std::unique_ptr _schema;
diff --git a/be/test/olap/key_coder_test.cpp b/be/test/olap/key_coder_test.cpp
index 96f6210f21..746da2a1a6 100644
--- a/be/test/olap/key_coder_test.cpp
+++ b/be/test/olap/key_coder_test.cpp
@@ -23,18 +23,16 @@
 #include 
 
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 #include "util/debug_util.h"
 
 namespace doris {
 
 class KeyCoderTest : public testing::Test {
 public:
-    KeyCoderTest() : _tracker(new MemTracker()), _pool(_tracker.get()) {}
+    KeyCoderTest() : _pool() {}
     virtual ~KeyCoderTest() {}
 
 private:
-    std::shared_ptr _tracker;
     MemPool _pool;
 };
 
diff --git a/be/test/olap/lru_cache_test.cpp b/be/test/olap/lru_cache_test.cpp
index b7e18cc01e..0ec5b98a63 100644
--- a/be/test/olap/lru_cache_test.cpp
+++ b/be/test/olap/lru_cache_test.cpp
@@ -221,7 +221,10 @@ static void deleter(const CacheKey& key, void* v) {}
 static void insert_LRUCache(LRUCache& cache, const CacheKey& key, int value,
                             CachePriority priority) {
     uint32_t hash = key.hash(key.data(), key.size(), 0);
-    cache.release(cache.insert(key, hash, EncodeValue(value), value, &deleter, priority));
+    static std::unique_ptr lru_cache_tracker =
+            std::make_unique(-1, "TestLruCache");
+    cache.release(cache.insert(key, hash, EncodeValue(value), value, &deleter,
+                               lru_cache_tracker.get(), priority));
 }
 
 TEST_F(CacheTest, Usage) {
diff --git a/be/test/olap/null_predicate_test.cpp b/be/test/olap/null_predicate_test.cpp
index fdfa168ea6..978b5a6dbe 100644
--- a/be/test/olap/null_predicate_test.cpp
+++ b/be/test/olap/null_predicate_test.cpp
@@ -55,10 +55,7 @@ static uint32_t to_date_v2_timestamp(const char* date_string) {
 
 class TestNullPredicate : public testing::Test {
 public:
-    TestNullPredicate() : _row_block(nullptr) {
-        _mem_tracker.reset(new MemTracker(-1));
-        _mem_pool.reset(new MemPool(_mem_tracker.get()));
-    }
+    TestNullPredicate() : _row_block(nullptr) { _mem_pool.reset(new MemPool()); }
 
     ~TestNullPredicate() {}
 
@@ -87,7 +84,6 @@ public:
         _row_block.reset(new RowBlockV2(*_schema, size));
     }
 
-    std::shared_ptr _mem_tracker;
     std::unique_ptr _mem_pool;
     std::unique_ptr _row_block;
     std::unique_ptr _schema;
diff --git a/be/test/olap/row_block_v2_test.cpp b/be/test/olap/row_block_v2_test.cpp
index a0e7fec4f6..0283026959 100644
--- a/be/test/olap/row_block_v2_test.cpp
+++ b/be/test/olap/row_block_v2_test.cpp
@@ -89,8 +89,7 @@ TEST_F(TestRowBlockV2, test_convert) {
     block_info.row_num = 1024;
     block_info.null_supported = true;
     output_block.init(block_info);
-    auto tracker = std::make_shared();
-    MemPool pool(tracker.get());
+    MemPool pool;
     for (int i = 0; i < input_block.capacity(); ++i) {
         RowBlockRow row = input_block.row(i);
 
diff --git a/be/test/olap/row_cursor_test.cpp b/be/test/olap/row_cursor_test.cpp
index 96722c77d7..fe30e0544d 100644
--- a/be/test/olap/row_cursor_test.cpp
+++ b/be/test/olap/row_cursor_test.cpp
@@ -24,7 +24,6 @@
 #include "olap/schema.h"
 #include "olap/tablet_schema.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 #include "util/logging.h"
 #include "util/types.h"
 
@@ -262,16 +261,12 @@ void set_tablet_schema_for_cmp_and_aggregate(TabletSchema* tablet_schema) {
 
 class TestRowCursor : public testing::Test {
 public:
-    TestRowCursor() {
-        _mem_tracker.reset(new MemTracker(-1));
-        _mem_pool.reset(new MemPool(_mem_tracker.get()));
-    }
+    TestRowCursor() { _mem_pool.reset(new MemPool()); }
 
     virtual void SetUp() {}
 
     virtual void TearDown() {}
 
-    std::shared_ptr _mem_tracker;
     std::unique_ptr _mem_pool;
 };
 
@@ -486,8 +481,7 @@ TEST_F(TestRowCursor, AggregateWithoutNull) {
     left.set_field_content(4, reinterpret_cast(&l_decimal), _mem_pool.get());
     left.set_field_content(5, reinterpret_cast(&l_varchar), _mem_pool.get());
 
-    std::shared_ptr tracker(new MemTracker(-1));
-    std::unique_ptr mem_pool(new MemPool(tracker.get()));
+    std::unique_ptr mem_pool(new MemPool());
     ObjectPool agg_object_pool;
     init_row_with_others(&row, left, mem_pool.get(), &agg_object_pool);
 
@@ -547,8 +541,7 @@ TEST_F(TestRowCursor, AggregateWithNull) {
     left.set_null(4);
     left.set_field_content(5, reinterpret_cast(&l_varchar), _mem_pool.get());
 
-    std::shared_ptr tracker(new MemTracker(-1));
-    std::unique_ptr mem_pool(new MemPool(tracker.get()));
+    std::unique_ptr mem_pool(new MemPool());
     ObjectPool agg_object_pool;
     init_row_with_others(&row, left, mem_pool.get(), &agg_object_pool);
 
diff --git a/be/test/olap/rowset/beta_rowset_test.cpp b/be/test/olap/rowset/beta_rowset_test.cpp
index 7406985ca1..d0b4a785f6 100644
--- a/be/test/olap/rowset/beta_rowset_test.cpp
+++ b/be/test/olap/rowset/beta_rowset_test.cpp
@@ -38,7 +38,7 @@
 #include "olap/utils.h"
 #include "runtime/exec_env.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "util/file_utils.h"
 #include "util/slice.h"
 
@@ -192,7 +192,7 @@ TEST_F(BetaRowsetTest, BasicFunctionTest) {
         // k2 := k1 * 10
         // k3 := 4096 * i + rid
         for (int i = 0; i < num_segments; ++i) {
-            MemPool mem_pool("BetaRowsetTest");
+            MemPool mem_pool;
             for (int rid = 0; rid < rows_per_segment; ++rid) {
                 uint32_t k1 = rid * 10 + i;
                 uint32_t k2 = k1 * 10;
diff --git a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
index bb3493efd6..84ffd9b82a 100644
--- a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
@@ -30,7 +30,6 @@
 #include "olap/rowset/segment_v2/page_decoder.h"
 #include "olap/types.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 #include "testutil/test_util.h"
 #include "util/debug_util.h"
 
@@ -96,8 +95,7 @@ public:
         EXPECT_EQ(slices.size(), page_decoder.count());
 
         //check values
-        auto tracker = std::make_shared();
-        MemPool pool(tracker.get());
+        MemPool pool;
         auto type_info = get_scalar_type_info(OLAP_FIELD_TYPE_VARCHAR);
         size_t size = slices.size();
         std::unique_ptr cvb;
@@ -201,8 +199,7 @@ public:
             EXPECT_TRUE(status.ok());
 
             //check values
-            auto tracker = std::make_shared();
-            MemPool pool(tracker.get());
+            MemPool pool;
             auto type_info = get_scalar_type_info(OLAP_FIELD_TYPE_VARCHAR);
             std::unique_ptr cvb;
             ColumnVectorBatch::create(1, false, type_info, nullptr, &cvb);
diff --git a/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp
index 1da1db55e1..825c43cf48 100644
--- a/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp
@@ -28,7 +28,6 @@
 #include "olap/rowset/segment_v2/page_decoder.h"
 #include "olap/types.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 
 namespace doris {
 namespace segment_v2 {
@@ -71,8 +70,7 @@ public:
         EXPECT_TRUE(status.ok());
 
         //test1
-        auto tracker = std::make_shared();
-        MemPool pool(tracker.get());
+        MemPool pool;
         size_t size = 3;
         std::unique_ptr cvb;
         ColumnVectorBatch::create(size, true, get_scalar_type_info(OLAP_FIELD_TYPE_VARCHAR),
diff --git a/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp
index b1ee537668..e84106d936 100644
--- a/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp
@@ -28,7 +28,6 @@
 #include "olap/rowset/segment_v2/page_decoder.h"
 #include "olap/types.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 #include "util/debug_util.h"
 
 namespace doris {
@@ -74,8 +73,7 @@ public:
         EXPECT_EQ(slices.size(), page_decoder->count());
 
         //check values
-        auto tracker = std::make_shared();
-        MemPool pool(tracker.get());
+        MemPool pool;
         auto type_info = get_scalar_type_info(OLAP_FIELD_TYPE_VARCHAR);
         size_t size = slices.size();
         std::unique_ptr cvb;
diff --git a/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp
index 99419d8b14..48b5dc7ec3 100644
--- a/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp
@@ -26,7 +26,6 @@
 #include "olap/rowset/segment_v2/page_builder.h"
 #include "olap/rowset/segment_v2/page_decoder.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 #include "util/logging.h"
 
 using doris::segment_v2::PageBuilderOptions;
@@ -40,8 +39,7 @@ public:
 
     template 
     void copy_one(PageDecoderType* decoder, typename TypeTraits::CppType* ret) {
-        auto tracker = std::make_shared();
-        MemPool pool(tracker.get());
+        MemPool pool;
         std::unique_ptr cvb;
         ColumnVectorBatch::create(1, true, get_scalar_type_info(type), nullptr, &cvb);
         ColumnBlock block(cvb.get(), &pool);
@@ -85,8 +83,7 @@ public:
         EXPECT_TRUE(status.ok());
         EXPECT_EQ(0, page_decoder.current_index());
 
-        auto tracker = std::make_shared();
-        MemPool pool(tracker.get());
+        MemPool pool;
 
         std::unique_ptr cvb;
         ColumnVectorBatch::create(size, false, get_scalar_type_info(Type), nullptr, &cvb);
diff --git a/be/test/olap/rowset/segment_v2/bloom_filter_page_test.cpp b/be/test/olap/rowset/segment_v2/bloom_filter_page_test.cpp
index 997eb84f19..7e78fab5cc 100644
--- a/be/test/olap/rowset/segment_v2/bloom_filter_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/bloom_filter_page_test.cpp
@@ -26,7 +26,6 @@
 #include "olap/rowset/segment_v2/page_builder.h"
 #include "olap/rowset/segment_v2/page_decoder.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 #include "util/logging.h"
 
 using doris::segment_v2::PageBuilderOptions;
@@ -64,8 +63,7 @@ public:
         status = bf_page_decoder.seek_to_position_in_page(0);
         EXPECT_TRUE(status.ok());
 
-        auto tracker = std::make_shared();
-        MemPool pool(tracker.get());
+        MemPool pool;
         Slice* values = reinterpret_cast(pool.allocate(sizeof(Slice)));
         ColumnBlock block(get_type_info(OLAP_FIELD_TYPE_VARCHAR), (uint8_t*)values, nullptr, 2,
                           &pool);
diff --git a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
index 0355ea80ff..21dc7b693b 100644
--- a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
@@ -31,7 +31,6 @@
 #include "olap/tablet_schema_helper.h"
 #include "olap/types.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 #include "testutil/test_util.h"
 #include "util/file_utils.h"
 #include "vec/core/types.h"
@@ -51,7 +50,7 @@ static const std::string TEST_DIR = "./ut_dir/column_reader_writer_test";
 
 class ColumnReaderWriterTest : public testing::Test {
 public:
-    ColumnReaderWriterTest() : _tracker(new MemTracker()), _pool(_tracker.get()) {}
+    ColumnReaderWriterTest() : _pool() {}
     ~ColumnReaderWriterTest() override = default;
 
 protected:
@@ -70,7 +69,6 @@ protected:
     }
 
 private:
-    std::shared_ptr _tracker;
     MemPool _pool;
 };
 
@@ -155,8 +153,7 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows,
             st = iter->seek_to_first();
             EXPECT_TRUE(st.ok()) << st.to_string();
 
-            auto tracker = std::make_shared();
-            MemPool pool(tracker.get());
+            MemPool pool;
             std::unique_ptr cvb;
             ColumnVectorBatch::create(0, true, type_info, nullptr, &cvb);
             cvb->resize(1024);
@@ -207,8 +204,7 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows,
             st = iter->init(iter_opts);
             EXPECT_TRUE(st.ok());
 
-            auto tracker = std::make_shared();
-            MemPool pool(tracker.get());
+            MemPool pool;
             std::unique_ptr cvb;
             ColumnVectorBatch::create(0, true, type_info, nullptr, &cvb);
             cvb->resize(1024);
@@ -332,8 +328,7 @@ void test_array_nullable_data(CollectionValue* src_data, uint8_t* src_is_null, i
             st = iter->seek_to_first();
             EXPECT_TRUE(st.ok()) << st.to_string();
 
-            MemTracker tracker;
-            MemPool pool(&tracker);
+            MemPool pool;
             std::unique_ptr cvb;
             ColumnVectorBatch::create(0, true, type_info.get(), field, &cvb);
             cvb->resize(1024);
@@ -359,8 +354,7 @@ void test_array_nullable_data(CollectionValue* src_data, uint8_t* src_is_null, i
         }
         // seek read
         {
-            MemTracker tracker;
-            MemPool pool(&tracker);
+            MemPool pool;
             std::unique_ptr cvb;
             ColumnVectorBatch::create(0, true, type_info.get(), field, &cvb);
             cvb->resize(1024);
@@ -468,8 +462,7 @@ void test_read_default_value(string value, void* result) {
             st = iter.seek_to_first();
             EXPECT_TRUE(st.ok()) << st.to_string();
 
-            auto tracker = std::make_shared();
-            MemPool pool(tracker.get());
+            MemPool pool;
             std::unique_ptr cvb;
             ColumnVectorBatch::create(0, true, scalar_type_info, nullptr, &cvb);
             cvb->resize(1024);
@@ -499,8 +492,7 @@ void test_read_default_value(string value, void* result) {
         }
 
         {
-            auto tracker = std::make_shared();
-            MemPool pool(tracker.get());
+            MemPool pool;
             std::unique_ptr cvb;
             ColumnVectorBatch::create(0, true, scalar_type_info, nullptr, &cvb);
             cvb->resize(1024);
diff --git a/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp b/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp
index de7fce79ac..eef1b26511 100644
--- a/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp
@@ -24,7 +24,6 @@
 #include "olap/rowset/segment_v2/options.h"
 #include "runtime/large_int_value.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 
 using doris::segment_v2::PageBuilderOptions;
 using doris::segment_v2::PageDecoderOptions;
@@ -34,8 +33,7 @@ class FrameOfReferencePageTest : public testing::Test {
 public:
     template 
     void copy_one(PageDecoderType* decoder, typename TypeTraits::CppType* ret) {
-        auto tracker = std::make_shared();
-        MemPool pool(tracker.get());
+        MemPool pool;
         std::unique_ptr cvb;
         ColumnVectorBatch::create(1, true, get_scalar_type_info(type), nullptr, &cvb);
         ColumnBlock block(cvb.get(), &pool);
@@ -66,8 +64,7 @@ public:
         EXPECT_EQ(0, for_page_decoder.current_index());
         EXPECT_EQ(size, for_page_decoder.count());
 
-        auto tracker = std::make_shared();
-        MemPool pool(tracker.get());
+        MemPool pool;
         std::unique_ptr cvb;
         ColumnVectorBatch::create(size, true, get_scalar_type_info(Type), nullptr, &cvb);
         ColumnBlock block(cvb.get(), &pool);
diff --git a/be/test/olap/rowset/segment_v2/plain_page_test.cpp b/be/test/olap/rowset/segment_v2/plain_page_test.cpp
index 1f14d0ee58..4cd3e47ba9 100644
--- a/be/test/olap/rowset/segment_v2/plain_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/plain_page_test.cpp
@@ -27,7 +27,6 @@
 #include "olap/rowset/segment_v2/page_decoder.h"
 #include "olap/types.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 
 namespace doris {
 namespace segment_v2 {
@@ -46,8 +45,7 @@ public:
 
     template 
     void copy_one(PageDecoderType* decoder, typename TypeTraits::CppType* ret) {
-        auto tracker = std::make_shared();
-        MemPool pool(tracker.get());
+        MemPool pool;
         std::unique_ptr cvb;
         ColumnVectorBatch::create(1, true, get_scalar_type_info(type), nullptr, &cvb);
         ColumnBlock block(cvb.get(), &pool);
@@ -85,8 +83,7 @@ public:
 
         EXPECT_EQ(0, page_decoder.current_index());
 
-        auto tracker = std::make_shared();
-        MemPool pool(tracker.get());
+        MemPool pool;
 
         std::unique_ptr cvb;
         ColumnVectorBatch::create(size, true, get_scalar_type_info(Type), nullptr, &cvb);
diff --git a/be/test/olap/rowset/segment_v2/rle_page_test.cpp b/be/test/olap/rowset/segment_v2/rle_page_test.cpp
index 70c14b3f8e..c2970534db 100644
--- a/be/test/olap/rowset/segment_v2/rle_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/rle_page_test.cpp
@@ -25,7 +25,6 @@
 #include "olap/rowset/segment_v2/page_builder.h"
 #include "olap/rowset/segment_v2/page_decoder.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 #include "util/logging.h"
 
 using doris::segment_v2::PageBuilderOptions;
@@ -39,8 +38,7 @@ public:
 
     template 
     void copy_one(PageDecoderType* decoder, typename TypeTraits::CppType* ret) {
-        auto tracker = std::make_shared();
-        MemPool pool(tracker.get());
+        MemPool pool;
         std::unique_ptr cvb;
         ColumnVectorBatch::create(1, true, get_scalar_type_info(type), nullptr, &cvb);
         ColumnBlock block(cvb.get(), &pool);
@@ -77,8 +75,7 @@ public:
         EXPECT_EQ(0, rle_page_decoder.current_index());
         EXPECT_EQ(size, rle_page_decoder.count());
 
-        auto tracker = std::make_shared();
-        MemPool pool(tracker.get());
+        MemPool pool;
         std::unique_ptr cvb;
         ColumnVectorBatch::create(size, true, get_scalar_type_info(Type), nullptr, &cvb);
         ColumnBlock block(cvb.get(), &pool);
diff --git a/be/test/olap/rowset/segment_v2/segment_test.cpp b/be/test/olap/rowset/segment_v2/segment_test.cpp
index a3db6cd628..58a50a2c80 100644
--- a/be/test/olap/rowset/segment_v2/segment_test.cpp
+++ b/be/test/olap/rowset/segment_v2/segment_test.cpp
@@ -36,11 +36,11 @@
 #include "olap/row_cursor.h"
 #include "olap/rowset/segment_v2/segment_iterator.h"
 #include "olap/rowset/segment_v2/segment_writer.h"
+#include "olap/storage_engine.h"
 #include "olap/tablet_schema.h"
 #include "olap/tablet_schema_helper.h"
 #include "olap/types.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 #include "testutil/test_util.h"
 #include "util/file_utils.h"
 
@@ -71,6 +71,8 @@ static bool column_contains_index(ColumnMetaPB column_meta, ColumnIndexTypePB ty
     return false;
 }
 
+static StorageEngine* k_engine = nullptr;
+
 class SegmentReaderWriterTest : public ::testing::Test {
 protected:
     void SetUp() override {
@@ -78,12 +80,21 @@ protected:
             EXPECT_TRUE(FileUtils::remove_all(kSegmentDir).ok());
         }
         EXPECT_TRUE(FileUtils::create_dir(kSegmentDir).ok());
+
+        doris::EngineOptions options;
+        k_engine = new StorageEngine(options);
+        StorageEngine::_s_instance = k_engine;
     }
 
     void TearDown() override {
         if (FileUtils::check_exist(kSegmentDir)) {
             EXPECT_TRUE(FileUtils::remove_all(kSegmentDir).ok());
         }
+        if (k_engine != nullptr) {
+            k_engine->stop();
+            delete k_engine;
+            k_engine = nullptr;
+        }
     }
 
     TabletSchema create_schema(const std::vector& columns,
@@ -775,8 +786,7 @@ TEST_F(SegmentReaderWriterTest, TestDefaultValueColumn) {
 
 TEST_F(SegmentReaderWriterTest, TestStringDict) {
     size_t num_rows_per_block = 10;
-    auto tracker = std::make_shared();
-    MemPool pool(tracker.get());
+    MemPool pool;
 
     std::shared_ptr tablet_schema(new TabletSchema());
     tablet_schema->_num_columns = 4;
diff --git a/be/test/olap/skiplist_test.cpp b/be/test/olap/skiplist_test.cpp
index c0999a76fa..7447b0aa7c 100644
--- a/be/test/olap/skiplist_test.cpp
+++ b/be/test/olap/skiplist_test.cpp
@@ -24,7 +24,6 @@
 
 #include "olap/schema.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 #include "testutil/test_util.h"
 #include "util/hash_util.hpp"
 #include "util/priority_thread_pool.hpp"
@@ -50,8 +49,7 @@ struct TestComparator {
 class SkipTest : public testing::Test {};
 
 TEST_F(SkipTest, Empty) {
-    std::shared_ptr tracker(new MemTracker(-1));
-    std::unique_ptr mem_pool(new MemPool(tracker.get()));
+    std::unique_ptr mem_pool(new MemPool());
 
     TestComparator* cmp = new TestComparator();
     SkipList list(cmp, mem_pool.get(), false);
@@ -69,8 +67,7 @@ TEST_F(SkipTest, Empty) {
 }
 
 TEST_F(SkipTest, InsertAndLookup) {
-    std::shared_ptr tracker(new MemTracker(-1));
-    std::unique_ptr mem_pool(new MemPool(tracker.get()));
+    std::unique_ptr mem_pool(new MemPool());
 
     const int N = 2000;
     const int R = 5000;
@@ -151,8 +148,7 @@ TEST_F(SkipTest, InsertAndLookup) {
 
 // Only non-DUP model will use Find() and InsertWithHint().
 TEST_F(SkipTest, InsertWithHintNoneDupModel) {
-    std::shared_ptr tracker(new MemTracker(-1));
-    std::unique_ptr mem_pool(new MemPool(tracker.get()));
+    std::unique_ptr mem_pool(new MemPool());
 
     const int N = 2000;
     const int R = 5000;
@@ -258,7 +254,6 @@ private:
     // Current state of the test
     State _current;
 
-    std::shared_ptr _mem_tracker;
     std::unique_ptr _mem_pool;
     std::shared_ptr _comparator;
     // SkipList is not protected by _mu.  We just use a single writer
@@ -267,8 +262,7 @@ private:
 
 public:
     ConcurrentTest()
-            : _mem_tracker(new MemTracker(-1)),
-              _mem_pool(new MemPool(_mem_tracker.get())),
+            : _mem_pool(new MemPool()),
               _comparator(new TestComparator()),
               _list(_comparator.get(), _mem_pool.get(), false) {}
 
diff --git a/be/test/olap/storage_types_test.cpp b/be/test/olap/storage_types_test.cpp
index 663a9b3d07..0319326d4d 100644
--- a/be/test/olap/storage_types_test.cpp
+++ b/be/test/olap/storage_types_test.cpp
@@ -20,7 +20,6 @@
 #include "olap/field.h"
 #include "olap/types.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 #include "util/slice.h"
 
 namespace doris {
@@ -39,8 +38,7 @@ void common_test(typename TypeTraits::CppType src_val) {
     EXPECT_EQ(sizeof(src_val), type->size());
     {
         typename TypeTraits::CppType dst_val;
-        auto tracker = std::make_shared();
-        MemPool pool(tracker.get());
+        MemPool pool;
         type->deep_copy((char*)&dst_val, (char*)&src_val, &pool);
         EXPECT_TRUE(type->equal((char*)&src_val, (char*)&dst_val));
         EXPECT_EQ(0, type->cmp((char*)&src_val, (char*)&dst_val));
@@ -79,8 +77,7 @@ void test_char(Slice src_val) {
     {
         char buf[64];
         Slice dst_val(buf, sizeof(buf));
-        auto tracker = std::make_shared();
-        MemPool pool(tracker.get());
+        MemPool pool;
         type->deep_copy((char*)&dst_val, (char*)&src_val, &pool);
         EXPECT_TRUE(type->equal((char*)&src_val, (char*)&dst_val));
         EXPECT_EQ(0, type->cmp((char*)&src_val, (char*)&dst_val));
@@ -163,8 +160,7 @@ void common_test_array(CollectionValue src_val) {
 
     { // test deep copy
         CollectionValue dst_val;
-        auto tracker = std::make_shared();
-        MemPool pool(tracker.get());
+        MemPool pool;
         array_type->deep_copy((char*)&dst_val, (char*)&src_val, &pool);
         EXPECT_TRUE(array_type->equal((char*)&src_val, (char*)&dst_val));
         EXPECT_EQ(0, array_type->cmp((char*)&src_val, (char*)&dst_val));
diff --git a/be/test/olap/tablet_clone_test.cpp b/be/test/olap/tablet_clone_test.cpp
index 80d0963ba7..64490d0011 100644
--- a/be/test/olap/tablet_clone_test.cpp
+++ b/be/test/olap/tablet_clone_test.cpp
@@ -162,8 +162,7 @@ TEST_F(TabletCloneTest, convert_rowset_ids_has_file_in_s3) {
     DeltaWriter::open(&write_req, &delta_writer);
     ASSERT_NE(delta_writer, nullptr);
 
-    MemTracker tracker;
-    MemPool pool(&tracker);
+    MemPool pool;
     // Tuple 1
     {
         Tuple* tuple = reinterpret_cast(pool.allocate(tuple_desc->byte_size()));
diff --git a/be/test/olap/tablet_cooldown_test.cpp b/be/test/olap/tablet_cooldown_test.cpp
index a191fd0fcf..d4d8acbbb0 100644
--- a/be/test/olap/tablet_cooldown_test.cpp
+++ b/be/test/olap/tablet_cooldown_test.cpp
@@ -163,8 +163,7 @@ TEST_F(TabletCooldownTest, normal) {
     DeltaWriter::open(&write_req, &delta_writer);
     ASSERT_NE(delta_writer, nullptr);
 
-    MemTracker tracker;
-    MemPool pool(&tracker);
+    MemPool pool;
     // Tuple 1
     {
         Tuple* tuple = reinterpret_cast(pool.allocate(tuple_desc->byte_size()));
diff --git a/be/test/olap/tablet_test.cpp b/be/test/olap/tablet_test.cpp
index e5240b27d3..2ca5d4450d 100644
--- a/be/test/olap/tablet_test.cpp
+++ b/be/test/olap/tablet_test.cpp
@@ -23,6 +23,7 @@
 
 #include "olap/olap_define.h"
 #include "olap/rowset/beta_rowset.h"
+#include "olap/storage_engine.h"
 #include "olap/storage_policy_mgr.h"
 #include "olap/tablet_meta.h"
 #include "testutil/mock_rowset.h"
@@ -34,11 +35,13 @@ namespace doris {
 
 using RowsetMetaSharedContainerPtr = std::shared_ptr>;
 
+static StorageEngine* k_engine = nullptr;
+
 class TestTablet : public testing::Test {
 public:
     virtual ~TestTablet() {}
 
-    virtual void SetUp() {
+    void SetUp() override {
         _tablet_meta = new_tablet_meta(TTabletSchema());
         _json_rowset_meta = R"({
             "rowset_id": 540081,
@@ -88,9 +91,19 @@ public:
                 }]
             }
         })";
+
+        doris::EngineOptions options;
+        k_engine = new StorageEngine(options);
+        StorageEngine::_s_instance = k_engine;
     }
 
-    virtual void TearDown() {}
+    void TearDown() override {
+        if (k_engine != nullptr) {
+            k_engine->stop();
+            delete k_engine;
+            k_engine = nullptr;
+        }
+    }
 
     TabletMetaSharedPtr new_tablet_meta(TTabletSchema schema, bool enable_merge_on_write = false) {
         return static_cast(
diff --git a/be/test/runtime/array_test.cpp b/be/test/runtime/array_test.cpp
index 0c60965cb8..43294006ab 100644
--- a/be/test/runtime/array_test.cpp
+++ b/be/test/runtime/array_test.cpp
@@ -39,7 +39,6 @@
 #include "runtime/collection_value.h"
 #include "runtime/descriptors.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 #include "runtime/primitive_type.h"
 #include "runtime/raw_value.h"
 #include "testutil/array_utils.h"
@@ -153,9 +152,7 @@ void validate(const Field* field, const CollectionValue* expect, const Collectio
 
 class ArrayTest : public ::testing::Test {
 public:
-    ArrayTest()
-            : _mem_tracker(new MemTracker(MAX_MEMORY_BYTES, "ArrayTest")),
-              _mem_pool(new MemPool(_mem_tracker.get())) {}
+    ArrayTest() : _mem_pool(new MemPool()) {}
 
     template 
     void test(const ColumnPB& column_pb, const std::vector& literal_arrays) {
@@ -457,7 +454,6 @@ private:
 private:
     static constexpr size_t MAX_MEMORY_BYTES = 1024 * 1024;
     static const std::string TEST_DIR;
-    std::unique_ptr _mem_tracker;
     std::unique_ptr _mem_pool;
     ObjectPool _object_pool;
 };
diff --git a/be/test/runtime/buffered_block_mgr2_test.cpp b/be/test/runtime/buffered_block_mgr2_test.cpp
index f553a7502b..9aaadeeea3 100644
--- a/be/test/runtime/buffered_block_mgr2_test.cpp
+++ b/be/test/runtime/buffered_block_mgr2_test.cpp
@@ -65,15 +65,11 @@ class BufferedBlockMgrTest : public ::testing::Test {
 protected:
     const static int _block_size = 1024;
 
-    virtual void SetUp() {
-        _test_env.reset(new TestEnv());
-        _client_tracker.reset(new MemTracker(-1));
-    }
+    virtual void SetUp() { _test_env.reset(new TestEnv()); }
 
     virtual void TearDown() {
         TearDownMgrs();
         _test_env.reset();
-        _client_tracker.reset();
 
         // Tests modify permissions, so make sure we can delete if they didn't clean up.
         for (int i = 0; i < _created_tmp_dirs.size(); ++i) {
@@ -147,26 +143,23 @@ protected:
     }
 
     BufferedBlockMgr2* CreateMgrAndClient(int64_t query_id, int max_buffers, int block_size,
-                                          int reserved_blocks,
-                                          const std::shared_ptr& tracker,
-                                          BufferedBlockMgr2::Client** client) {
+                                          int reserved_blocks, BufferedBlockMgr2::Client** client) {
         RuntimeState* state = nullptr;
         BufferedBlockMgr2* mgr = CreateMgr(query_id, max_buffers, block_size, &state);
-        EXPECT_TRUE(mgr->register_client(reserved_blocks, tracker, state, client).ok());
+        EXPECT_TRUE(mgr->register_client(reserved_blocks, state, client).ok());
         EXPECT_TRUE(client != nullptr);
         return mgr;
     }
 
     void CreateMgrsAndClients(int64_t start_query_id, int num_mgrs, int buffers_per_mgr,
                               int block_size, int reserved_blocks_per_client,
-                              const std::shared_ptr& tracker,
                               std::vector* mgrs,
                               std::vector* clients) {
         for (int i = 0; i < num_mgrs; ++i) {
             BufferedBlockMgr2::Client* client;
             BufferedBlockMgr2* mgr =
                     CreateMgrAndClient(start_query_id + i, buffers_per_mgr, _block_size,
-                                       reserved_blocks_per_client, tracker, &client);
+                                       reserved_blocks_per_client, &client);
             mgrs->push_back(mgr);
             clients->push_back(client);
         }
@@ -176,7 +169,6 @@ protected:
     void TearDownMgrs() {
         // Freeing all block managers should clean up all consumed memory.
         _test_env->tear_down_query_states();
-        EXPECT_EQ(_test_env->block_mgr_parent_tracker()->consumption(), 0);
     }
 
     void AllocateBlocks(BufferedBlockMgr2* block_mgr, BufferedBlockMgr2::Client* client,
@@ -270,8 +262,8 @@ protected:
         int max_num_blocks = 5;
         BufferedBlockMgr2* block_mgr = nullptr;
         BufferedBlockMgr2::Client* client;
-        block_mgr = CreateMgrAndClient(0, max_num_blocks, block_size, 0, _client_tracker, &client);
-        EXPECT_EQ(_test_env->block_mgr_parent_tracker()->consumption(), 0);
+        block_mgr = CreateMgrAndClient(0, max_num_blocks, block_size, 0, &client);
+        EXPECT_EQ(block_mgr->mem_tracker()->consumption(), 0);
 
         // Allocate blocks until max_num_blocks, they should all succeed and memory
         // usage should go up.
@@ -314,7 +306,8 @@ protected:
         int max_num_buffers = 5;
         BufferedBlockMgr2* block_mgr = nullptr;
         BufferedBlockMgr2::Client* client = nullptr;
-        block_mgr = CreateMgrAndClient(0, max_num_buffers, block_size, 0, _client_tracker, &client);
+        block_mgr = CreateMgrAndClient(0, max_num_buffers, block_size, 0,
+                                       block_mgr->get_tracker(client), &client);
 
         // Check counters.
         RuntimeProfile* profile = block_mgr->profile();
@@ -396,7 +389,7 @@ protected:
         ApiFunction api_function;
 
         BufferedBlockMgr2::Client* client;
-        Status status = block_mgr->register_client(0, _client_tracker, state, &client);
+        Status status = block_mgr->register_client(0, state, &client);
         EXPECT_TRUE(status.ok());
         EXPECT_TRUE(client != nullptr);
 
@@ -543,9 +536,9 @@ protected:
         for (int i = 0; i < iters; ++i) {
             LOG(WARNING) << "CreateDestroyThread thread " << index << " begin " << i << std::endl;
             std::shared_ptr mgr;
-            Status status = BufferedBlockMgr2::create(
-                    state, _test_env->block_mgr_parent_tracker(), state->runtime_profile(),
-                    _test_env->tmp_file_mgr(), _block_size * num_buffers, _block_size, &mgr);
+            Status status = BufferedBlockMgr2::create(state, -1, state->runtime_profile(),
+                                                      _test_env->tmp_file_mgr(),
+                                                      _block_size * num_buffers, _block_size, &mgr);
             LOG(WARNING) << "CreateDestroyThread thread " << index << " end " << i << std::endl;
         }
     }
@@ -567,7 +560,6 @@ protected:
     }
 
     std::unique_ptr _test_env;
-    std::shared_ptr _client_tracker;
     std::vector _created_tmp_dirs;
 };
 
@@ -583,8 +575,8 @@ TEST_F(BufferedBlockMgrTest, GetNewBlockSmallBlocks) {
     int max_num_blocks = 3;
     BufferedBlockMgr2* block_mgr;
     BufferedBlockMgr2::Client* client;
-    block_mgr = CreateMgrAndClient(0, max_num_blocks, block_size, 0, _client_tracker, &client);
-    EXPECT_EQ(0, _test_env->block_mgr_parent_tracker()->consumption());
+    block_mgr = CreateMgrAndClient(0, max_num_blocks, block_size, 0, &client);
+    EXPECT_EQ(0, block_mgr->mem_tracker()->consumption());
 
     std::vector blocks;
 
@@ -593,8 +585,8 @@ TEST_F(BufferedBlockMgrTest, GetNewBlockSmallBlocks) {
     EXPECT_TRUE(block_mgr->get_new_block(client, nullptr, &new_block, 128).ok());
     EXPECT_TRUE(new_block != nullptr);
     EXPECT_EQ(block_mgr->bytes_allocated(), 0);
-    EXPECT_EQ(_test_env->block_mgr_parent_tracker()->consumption(), 0);
-    EXPECT_EQ(_client_tracker->consumption(), 128);
+    EXPECT_EQ(block_mgr->mem_tracker()->consumption(), 0);
+    EXPECT_EQ(block_mgr->get_tracker(client)->consumption(), 128);
     EXPECT_TRUE(new_block->is_pinned());
     EXPECT_EQ(new_block->bytes_remaining(), 128);
     EXPECT_TRUE(new_block->buffer() != nullptr);
@@ -604,8 +596,8 @@ TEST_F(BufferedBlockMgrTest, GetNewBlockSmallBlocks) {
     EXPECT_TRUE(block_mgr->get_new_block(client, nullptr, &new_block).ok());
     EXPECT_TRUE(new_block != nullptr);
     EXPECT_EQ(block_mgr->bytes_allocated(), block_mgr->max_block_size());
-    EXPECT_EQ(_test_env->block_mgr_parent_tracker()->consumption(), block_mgr->max_block_size());
-    EXPECT_EQ(_client_tracker->consumption(), 128 + block_mgr->max_block_size());
+    EXPECT_EQ(block_mgr->mem_tracker()->consumption(), block_mgr->max_block_size());
+    EXPECT_EQ(block_mgr->get_tracker(client)->consumption(), 128 + block_mgr->max_block_size());
     EXPECT_TRUE(new_block->is_pinned());
     EXPECT_EQ(new_block->bytes_remaining(), block_mgr->max_block_size());
     EXPECT_TRUE(new_block->buffer() != nullptr);
@@ -615,8 +607,9 @@ TEST_F(BufferedBlockMgrTest, GetNewBlockSmallBlocks) {
     EXPECT_TRUE(block_mgr->get_new_block(client, nullptr, &new_block, 512).ok());
     EXPECT_TRUE(new_block != nullptr);
     EXPECT_EQ(block_mgr->bytes_allocated(), block_mgr->max_block_size());
-    EXPECT_EQ(_test_env->block_mgr_parent_tracker()->consumption(), block_mgr->max_block_size());
-    EXPECT_EQ(_client_tracker->consumption(), 128 + 512 + block_mgr->max_block_size());
+    EXPECT_EQ(block_mgr->mem_tracker()->consumption(), block_mgr->max_block_size());
+    EXPECT_EQ(block_mgr->get_tracker(client)->consumption(),
+              128 + 512 + block_mgr->max_block_size());
     EXPECT_TRUE(new_block->is_pinned());
     EXPECT_EQ(new_block->bytes_remaining(), 512);
     EXPECT_TRUE(new_block->buffer() != nullptr);
@@ -643,7 +636,7 @@ TEST_F(BufferedBlockMgrTest, Pin) {
     const int block_size = 1024;
     BufferedBlockMgr2* block_mgr;
     BufferedBlockMgr2::Client* client;
-    block_mgr = CreateMgrAndClient(0, max_num_blocks, block_size, 0, _client_tracker, &client);
+    block_mgr = CreateMgrAndClient(0, max_num_blocks, block_size, 0, &client);
 
     std::vector blocks;
     AllocateBlocks(block_mgr, client, max_num_blocks, &blocks);
@@ -697,7 +690,7 @@ TEST_F(BufferedBlockMgrTest, Deletion) {
     const int block_size = 1024;
     BufferedBlockMgr2* block_mgr;
     BufferedBlockMgr2::Client* client;
-    block_mgr = CreateMgrAndClient(0, max_num_buffers, block_size, 0, _client_tracker, &client);
+    block_mgr = CreateMgrAndClient(0, max_num_buffers, block_size, 0, &client);
 
     // Check counters.
     RuntimeProfile* profile = block_mgr->profile();
@@ -723,8 +716,7 @@ TEST_F(BufferedBlockMgrTest, Deletion) {
 TEST_F(BufferedBlockMgrTest, DeleteSingleBlocks) {
     int max_num_buffers = 16;
     BufferedBlockMgr2::Client* client;
-    BufferedBlockMgr2* block_mgr =
-            CreateMgrAndClient(0, max_num_buffers, _block_size, 0, _client_tracker, &client);
+    BufferedBlockMgr2* block_mgr = CreateMgrAndClient(0, max_num_buffers, _block_size, 0, &client);
 
     // Pinned I/O block.
     BufferedBlockMgr2::Block* new_block;
@@ -733,16 +725,16 @@ TEST_F(BufferedBlockMgrTest, DeleteSingleBlocks) {
     EXPECT_TRUE(new_block->is_pinned());
     EXPECT_TRUE(new_block->is_max_size());
     new_block->del();
-    EXPECT_TRUE(_client_tracker->consumption() == 0);
+    EXPECT_TRUE(block_mgr->get_tracker(client)->consumption() == 0);
 
     // Pinned non-I/O block.
     int small_block_size = 128;
     EXPECT_TRUE(block_mgr->get_new_block(client, nullptr, &new_block, small_block_size).ok());
     EXPECT_TRUE(new_block != nullptr);
     EXPECT_TRUE(new_block->is_pinned());
-    EXPECT_EQ(small_block_size, _client_tracker->consumption());
+    EXPECT_EQ(small_block_size, block_mgr->get_tracker(client)->consumption());
     new_block->del();
-    EXPECT_EQ(0, _client_tracker->consumption());
+    EXPECT_EQ(0, block_mgr->get_tracker(client)->consumption());
 
     // Unpinned I/O block - delete after written to disk.
     EXPECT_TRUE(block_mgr->get_new_block(client, nullptr, &new_block).ok());
@@ -753,7 +745,7 @@ TEST_F(BufferedBlockMgrTest, DeleteSingleBlocks) {
     EXPECT_FALSE(new_block->is_pinned());
     WaitForWrites(block_mgr);
     new_block->del();
-    EXPECT_TRUE(_client_tracker->consumption() == 0);
+    EXPECT_TRUE(block_mgr->get_tracker(client)->consumption() == 0);
 
     // Unpinned I/O block - delete before written to disk.
     EXPECT_TRUE(block_mgr->get_new_block(client, nullptr, &new_block).ok());
@@ -764,7 +756,7 @@ TEST_F(BufferedBlockMgrTest, DeleteSingleBlocks) {
     EXPECT_FALSE(new_block->is_pinned());
     new_block->del();
     WaitForWrites(block_mgr);
-    EXPECT_TRUE(_client_tracker->consumption() == 0);
+    EXPECT_TRUE(block_mgr->get_tracker(client)->consumption() == 0);
 
     TearDownMgrs();
 }
@@ -775,7 +767,7 @@ TEST_F(BufferedBlockMgrTest, Close) {
     const int block_size = 1024;
     BufferedBlockMgr2* block_mgr;
     BufferedBlockMgr2::Client* client;
-    block_mgr = CreateMgrAndClient(0, max_num_buffers, block_size, 0, _client_tracker, &client);
+    block_mgr = CreateMgrAndClient(0, max_num_buffers, block_size, 0, &client);
 
     std::vector blocks;
     AllocateBlocks(block_mgr, client, max_num_buffers, &blocks);
@@ -816,7 +808,7 @@ TEST_F(BufferedBlockMgrTest, WriteError) {
     const int block_size = 1024;
     BufferedBlockMgr2* block_mgr;
     BufferedBlockMgr2::Client* client;
-    block_mgr = CreateMgrAndClient(0, max_num_buffers, block_size, 0, _client_tracker, &client);
+    block_mgr = CreateMgrAndClient(0, max_num_buffers, block_size, 0, &client);
 
     std::vector blocks;
     AllocateBlocks(block_mgr, client, max_num_buffers, &blocks);
@@ -859,8 +851,7 @@ TEST_F(BufferedBlockMgrTest, TmpFileAllocateError) {
     Status status;
     int max_num_buffers = 2;
     BufferedBlockMgr2::Client* client;
-    BufferedBlockMgr2* block_mgr =
-            CreateMgrAndClient(0, max_num_buffers, _block_size, 0, _client_tracker, &client);
+    BufferedBlockMgr2* block_mgr = CreateMgrAndClient(0, max_num_buffers, _block_size, 0, &client);
 
     std::vector blocks;
     AllocateBlocks(block_mgr, client, max_num_buffers, &blocks);
@@ -894,8 +885,7 @@ TEST_F(BufferedBlockMgrTest, DISABLED_WriteErrorBlacklist) {
     int blocks_per_mgr = MAX_NUM_BLOCKS / NUM_BLOCK_MGRS;
     std::vector block_mgrs;
     std::vector clients;
-    CreateMgrsAndClients(0, NUM_BLOCK_MGRS, blocks_per_mgr, _block_size, 0, _client_tracker,
-                         &block_mgrs, &clients);
+    CreateMgrsAndClients(0, NUM_BLOCK_MGRS, blocks_per_mgr, _block_size, 0, &block_mgrs, &clients);
 
     // Allocate files for all 2x2 combinations by unpinning blocks.
     std::vector> blocks;
@@ -954,7 +944,7 @@ TEST_F(BufferedBlockMgrTest, DISABLED_WriteErrorBlacklist) {
     // A new block manager should only use the good dir for backing storage.
     BufferedBlockMgr2::Client* new_client;
     BufferedBlockMgr2* new_block_mgr =
-            CreateMgrAndClient(9999, blocks_per_mgr, _block_size, 0, _client_tracker, &new_client);
+            CreateMgrAndClient(9999, blocks_per_mgr, _block_size, 0, &new_client);
     std::vector new_mgr_blocks;
     AllocateBlocks(new_block_mgr, new_client, blocks_per_mgr, &new_mgr_blocks);
     UnpinBlocks(new_mgr_blocks);
@@ -977,8 +967,7 @@ TEST_F(BufferedBlockMgrTest, AllocationErrorHandling) {
     // std::vector runtime_states;
     std::vector block_mgrs;
     std::vector clients;
-    CreateMgrsAndClients(0, num_block_mgrs, blocks_per_mgr, _block_size, 0, _client_tracker,
-                         &block_mgrs, &clients);
+    CreateMgrsAndClients(0, num_block_mgrs, blocks_per_mgr, _block_size, 0, &block_mgrs, &clients);
 
     // Allocate files for all 2x2 combinations by unpinning blocks.
     std::vector> blocks;
@@ -1015,8 +1004,7 @@ TEST_F(BufferedBlockMgrTest, NoDirsAllocationError) {
     std::vector tmp_dirs = InitMultipleTmpDirs(2);
     int max_num_buffers = 2;
     BufferedBlockMgr2::Client* client;
-    BufferedBlockMgr2* block_mgr =
-            CreateMgrAndClient(0, max_num_buffers, _block_size, 0, _client_tracker, &client);
+    BufferedBlockMgr2* block_mgr = CreateMgrAndClient(0, max_num_buffers, _block_size, 0, &client);
     std::vector blocks;
     AllocateBlocks(block_mgr, client, max_num_buffers, &blocks);
     for (int i = 0; i < tmp_dirs.size(); ++i) {
@@ -1040,10 +1028,10 @@ TEST_F(BufferedBlockMgrTest, MultipleClients) {
 
     BufferedBlockMgr2::Client* client1 = nullptr;
     BufferedBlockMgr2::Client* client2 = nullptr;
-    status = block_mgr->register_client(client1_buffers, _client_tracker, runtime_state, &client1);
+    status = block_mgr->register_client(client1_buffers, runtime_state, &client1);
     EXPECT_TRUE(status.ok());
     EXPECT_TRUE(client1 != nullptr);
-    status = block_mgr->register_client(client2_buffers, _client_tracker, runtime_state, &client2);
+    status = block_mgr->register_client(client2_buffers, runtime_state, &client2);
     EXPECT_TRUE(status.ok());
     EXPECT_TRUE(client2 != nullptr);
 
@@ -1151,10 +1139,10 @@ TEST_F(BufferedBlockMgrTest, MultipleClientsExtraBuffers) {
     BufferedBlockMgr2::Client* client1 = nullptr;
     BufferedBlockMgr2::Client* client2 = nullptr;
     BufferedBlockMgr2::Block* block = nullptr;
-    status = block_mgr->register_client(client1_buffers, _client_tracker, runtime_state, &client1);
+    status = block_mgr->register_client(client1_buffers, runtime_state, &client1);
     EXPECT_TRUE(status.ok());
     EXPECT_TRUE(client1 != nullptr);
-    status = block_mgr->register_client(client2_buffers, _client_tracker, runtime_state, &client2);
+    status = block_mgr->register_client(client2_buffers, runtime_state, &client2);
     EXPECT_TRUE(status.ok());
     EXPECT_TRUE(client2 != nullptr);
 
@@ -1198,10 +1186,10 @@ TEST_F(BufferedBlockMgrTest, ClientOversubscription) {
     BufferedBlockMgr2::Client* client1 = nullptr;
     BufferedBlockMgr2::Client* client2 = nullptr;
     BufferedBlockMgr2::Block* block = nullptr;
-    status = block_mgr->register_client(client1_buffers, _client_tracker, runtime_state, &client1);
+    status = block_mgr->register_client(client1_buffers, runtime_state, &client1);
     EXPECT_TRUE(status.ok());
     EXPECT_TRUE(client1 != nullptr);
-    status = block_mgr->register_client(client2_buffers, _client_tracker, runtime_state, &client2);
+    status = block_mgr->register_client(client2_buffers, runtime_state, &client2);
     EXPECT_TRUE(status.ok());
     EXPECT_TRUE(client2 != nullptr);
 
diff --git a/be/test/runtime/buffered_tuple_stream2_test.cpp b/be/test/runtime/buffered_tuple_stream2_test.cpp
index 7ae670c787..5ef517f407 100644
--- a/be/test/runtime/buffered_tuple_stream2_test.cpp
+++ b/be/test/runtime/buffered_tuple_stream2_test.cpp
@@ -60,7 +60,7 @@ static const int NUM_STRINGS = sizeof(STRINGS) / sizeof(StringValue);
 
 class SimpleTupleStreamTest : public testing::Test {
 public:
-    SimpleTupleStreamTest() : _tracker(new MemTracker(-1)) {}
+    SimpleTupleStreamTest() {}
     // A null dtor to pass codestyle check
     ~SimpleTupleStreamTest() {}
 
@@ -68,7 +68,7 @@ protected:
     virtual void SetUp() {
         _test_env.reset(new TestEnv());
         create_descriptors();
-        _mem_pool.reset(new MemPool(_tracker.get()));
+        _mem_pool.reset(new MemPool());
     }
 
     virtual void create_descriptors() {
@@ -99,8 +99,7 @@ protected:
     void InitBlockMgr(int64_t limit, int block_size) {
         Status status = _test_env->create_query_state(0, limit, block_size, &_runtime_state);
         EXPECT_TRUE(status.ok());
-        status = _runtime_state->block_mgr2()->register_client(0, _tracker, _runtime_state,
-                                                               &_client);
+        status = _runtime_state->block_mgr2()->register_client(0, _runtime_state, &_client);
         EXPECT_TRUE(status.ok());
     }
 
@@ -209,7 +208,7 @@ protected:
     void ReadValues(BufferedTupleStream2* stream, RowDescriptor* desc, std::vector* results,
                     int num_batches = -1) {
         bool eos = false;
-        RowBatch batch(*desc, BATCH_SIZE, _tracker.get());
+        RowBatch batch(*desc, BATCH_SIZE);
         int batches_read = 0;
         do {
             batch.reset();
@@ -354,7 +353,6 @@ protected:
     RuntimeState* _runtime_state;
     BufferedBlockMgr2::Client* _client;
 
-    std::shared_ptr _tracker;
     ObjectPool _pool;
     RowDescriptor* _int_desc;
     RowDescriptor* _string_desc;
@@ -784,7 +782,7 @@ TEST_F(ArrayTupleStreamTest, TestArrayDeepCopy) {
     array_len_index = 0;
     bool eos = false;
     int rows_read = 0;
-    RowBatch batch(*_array_desc, BATCH_SIZE, _tracker.get());
+    RowBatch batch(*_array_desc, BATCH_SIZE);
     do {
         batch.reset();
         EXPECT_TRUE(stream.get_next(&batch, &eos).ok());
diff --git a/be/test/runtime/data_stream_test.cpp b/be/test/runtime/data_stream_test.cpp
index b0a8de6529..00ebc6636f 100644
--- a/be/test/runtime/data_stream_test.cpp
+++ b/be/test/runtime/data_stream_test.cpp
@@ -113,10 +113,7 @@ private:
 
 class DataStreamTest : public testing::Test {
 protected:
-    DataStreamTest()
-            : _limit(new MemTracker(-1)),
-              _runtime_state(TUniqueId(), TQueryOptions(), "", &_exec_env),
-              _next_val(0) {
+    DataStreamTest() : _runtime_state(TUniqueId(), TQueryOptions(), "", &_exec_env), _next_val(0) {
         _exec_env.init_for_tests();
         _runtime_state.init_mem_trackers(TUniqueId());
     }
@@ -200,8 +197,6 @@ protected:
     static const int NUM_BATCHES = TOTAL_DATA_SIZE / BATCH_CAPACITY / PER_ROW_DATA;
 
     ObjectPool _obj_pool;
-    std::shared_ptr _limit;
-    std::shared_ptr _tracker;
     DescriptorTbl* _desc_tbl;
     const RowDescriptor* _row_desc;
     TupleRowComparator* _less_than;
@@ -324,8 +319,8 @@ protected:
         SlotRef* rhs_slot = _obj_pool.add(new SlotRef(expr_node));
         _rhs_slot_ctx = _obj_pool.add(new ExprContext(rhs_slot));
 
-        _lhs_slot_ctx->prepare(&_runtime_state, *_row_desc, _tracker.get());
-        _rhs_slot_ctx->prepare(&_runtime_state, *_row_desc, _tracker.get());
+        _lhs_slot_ctx->prepare(&_runtime_state, *_row_desc);
+        _rhs_slot_ctx->prepare(&_runtime_state, *_row_desc);
         _lhs_slot_ctx->open(nullptr);
         _rhs_slot_ctx->open(nullptr);
         SortExecExprs* sort_exprs = _obj_pool.add(new SortExecExprs());
@@ -337,7 +332,7 @@ protected:
 
     // Create _batch, but don't fill it with data yet. Assumes we created _row_desc.
     RowBatch* create_row_batch() {
-        RowBatch* batch = new RowBatch(*_row_desc, BATCH_CAPACITY, _limit.get());
+        RowBatch* batch = new RowBatch(*_row_desc, BATCH_CAPACITY);
         int64_t* tuple_mem =
                 reinterpret_cast(batch->tuple_data_pool()->allocate(BATCH_CAPACITY * 8));
         bzero(tuple_mem, BATCH_CAPACITY * 8);
@@ -424,7 +419,7 @@ protected:
         if (info->status.is_cancelled()) {
             return;
         }
-        RowBatch batch(*_row_desc, 1024, _limit.get());
+        RowBatch batch(*_row_desc, 1024);
         VLOG_QUERY << "start reading merging";
         bool eos = false;
         while (!(info->status = info->stream_recvr->get_next(&batch, &eos)).is_cancelled()) {
diff --git a/be/test/runtime/disk_io_mgr_test.cpp b/be/test/runtime/disk_io_mgr_test.cpp
index 36c5cfda9c..a1ae91711c 100644
--- a/be/test/runtime/disk_io_mgr_test.cpp
+++ b/be/test/runtime/disk_io_mgr_test.cpp
@@ -191,7 +191,6 @@ protected:
 // by reading the data back via a separate IoMgr instance. All writes are expected to
 // complete successfully.
 TEST_F(DiskIoMgrTest, SingleWriter) {
-    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     _num_ranges_written = 0;
     string tmp_file = "/tmp/disk_io_mgr_test.txt";
     int num_ranges = 100;
@@ -204,20 +203,19 @@ TEST_F(DiskIoMgrTest, SingleWriter) {
     }
 
     std::unique_ptr read_io_mgr(new DiskIoMgr(1, 1, 1, 10));
-    std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
-    Status status = read_io_mgr->init(reader_mem_tracker);
+    Status status = read_io_mgr->init(LARGE_MEM_LIMIT);
     EXPECT_TRUE(status.ok());
     DiskIoMgr::RequestContext* reader;
-    status = read_io_mgr->register_context(&reader, reader_mem_tracker);
+    status = read_io_mgr->register_context(&reader);
     EXPECT_TRUE(status.ok());
     for (int num_threads_per_disk = 1; num_threads_per_disk <= 5; ++num_threads_per_disk) {
         for (int num_disks = 1; num_disks <= 5; num_disks += 2) {
             _pool.reset(new ObjectPool);
             DiskIoMgr io_mgr(num_disks, num_threads_per_disk, 1, 10);
-            status = io_mgr.init(mem_tracker);
+            status = io_mgr.init(LARGE_MEM_LIMIT);
             EXPECT_TRUE(status.ok());
             DiskIoMgr::RequestContext* writer;
-            io_mgr.register_context(&writer, mem_tracker);
+            io_mgr.register_context(&writer);
             for (int i = 0; i < num_ranges; ++i) {
                 int32_t* data = _pool->add(new int32_t);
                 *data = rand();
@@ -251,11 +249,10 @@ TEST_F(DiskIoMgrTest, SingleWriter) {
 // Perform invalid writes (e.g. non-existent file, negative offset) and validate
 // that an error status is returned via the write callback.
 TEST_F(DiskIoMgrTest, InvalidWrite) {
-    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     _num_ranges_written = 0;
     string tmp_file = "/tmp/non-existent.txt";
     DiskIoMgr io_mgr(1, 1, 1, 10);
-    Status status = io_mgr.init(mem_tracker);
+    Status status = io_mgr.init(LARGE_MEM_LIMIT);
     EXPECT_TRUE(status.ok());
     DiskIoMgr::RequestContext* writer;
     status = io_mgr.register_context(&writer);
@@ -307,7 +304,6 @@ TEST_F(DiskIoMgrTest, InvalidWrite) {
 // add_write_range() is expected to succeed before the cancel and fail after it.
 // The writes themselves may finish with status cancelled or ok.
 TEST_F(DiskIoMgrTest, SingleWriterCancel) {
-    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     _num_ranges_written = 0;
     string tmp_file = "/tmp/disk_io_mgr_test.txt";
     int num_ranges = 100;
@@ -321,19 +317,18 @@ TEST_F(DiskIoMgrTest, SingleWriterCancel) {
     }
 
     std::unique_ptr read_io_mgr(new DiskIoMgr(1, 1, 1, 10));
-    std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
-    Status status = read_io_mgr->init(reader_mem_tracker);
+    Status status = read_io_mgr->init(LARGE_MEM_LIMIT);
     EXPECT_TRUE(status.ok());
     DiskIoMgr::RequestContext* reader;
-    status = read_io_mgr->register_context(&reader, reader_mem_tracker);
+    status = read_io_mgr->register_context(&reader);
     EXPECT_TRUE(status.ok());
     for (int num_threads_per_disk = 1; num_threads_per_disk <= 5; ++num_threads_per_disk) {
         for (int num_disks = 1; num_disks <= 5; num_disks += 2) {
             _pool.reset(new ObjectPool);
             DiskIoMgr io_mgr(num_disks, num_threads_per_disk, 1, 10);
-            status = io_mgr.init(mem_tracker);
+            status = io_mgr.init(LARGE_MEM_LIMIT);
             DiskIoMgr::RequestContext* writer;
-            io_mgr.register_context(&writer, mem_tracker);
+            io_mgr.register_context(&writer);
             Status validate_status = Status::OK();
             for (int i = 0; i < num_ranges; ++i) {
                 if (i == num_ranges_before_cancel) {
@@ -373,7 +368,6 @@ TEST_F(DiskIoMgrTest, SingleWriterCancel) {
 // Basic test with a single reader, testing multiple threads, disks and a different
 // number of buffers.
 TEST_F(DiskIoMgrTest, SingleReader) {
-    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const char* tmp_file = "/tmp/disk_io_mgr_test.txt";
     const char* data = "abcdefghijklm";
     int len = strlen(data);
@@ -398,11 +392,10 @@ TEST_F(DiskIoMgrTest, SingleReader) {
                     }
                     DiskIoMgr io_mgr(num_disks, num_threads_per_disk, 1, 1);
 
-                    Status status = io_mgr.init(mem_tracker);
+                    Status status = io_mgr.init(LARGE_MEM_LIMIT);
                     EXPECT_TRUE(status.ok());
-                    std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
                     DiskIoMgr::RequestContext* reader;
-                    status = io_mgr.register_context(&reader, reader_mem_tracker);
+                    status = io_mgr.register_context(&reader);
                     EXPECT_TRUE(status.ok());
 
                     std::vector ranges;
@@ -424,17 +417,15 @@ TEST_F(DiskIoMgrTest, SingleReader) {
 
                     EXPECT_EQ(num_ranges_processed, ranges.size());
                     io_mgr.unregister_context(reader);
-                    EXPECT_EQ(reader_mem_tracker->consumption(), 0);
                 }
             }
         }
     }
-    EXPECT_EQ(mem_tracker->consumption(), 0);
+    EXPECT_EQ(io_mgr->mem_tracker()->consumption(), 0);
 }
 
 // This test issues adding additional scan ranges while there are some still in flight.
 TEST_F(DiskIoMgrTest, AddScanRangeTest) {
-    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const char* tmp_file = "/tmp/disk_io_mgr_test.txt";
     const char* data = "abcdefghijklm";
     int len = strlen(data);
@@ -455,11 +446,10 @@ TEST_F(DiskIoMgrTest, AddScanRangeTest) {
                 if (++iters % 5000 == 0) LOG(ERROR) << "Starting iteration " << iters;
                 DiskIoMgr io_mgr(num_disks, num_threads_per_disk, 1, 1);
 
-                Status status = io_mgr.init(mem_tracker);
+                Status status = io_mgr.init(LARGE_MEM_LIMIT);
                 EXPECT_TRUE(status.ok());
-                std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
                 DiskIoMgr::RequestContext* reader;
-                status = io_mgr.register_context(&reader, reader_mem_tracker);
+                status = io_mgr.register_context(&reader);
                 EXPECT_TRUE(status.ok());
 
                 std::vector ranges_first_half;
@@ -499,18 +489,16 @@ TEST_F(DiskIoMgrTest, AddScanRangeTest) {
                 threads.join_all();
                 EXPECT_EQ(num_ranges_processed, len);
                 io_mgr.unregister_context(reader);
-                EXPECT_EQ(reader_mem_tracker->consumption(), 0);
             }
         }
     }
-    EXPECT_EQ(mem_tracker->consumption(), 0);
+    EXPECT_EQ(io_mgr->mem_tracker()->consumption(), 0);
 }
 
 // Test to make sure that sync reads and async reads work together
 // Note: this test is constructed so the number of buffers is greater than the
 // number of scan ranges.
 TEST_F(DiskIoMgrTest, SyncReadTest) {
-    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const char* tmp_file = "/tmp/disk_io_mgr_test.txt";
     const char* data = "abcdefghijklm";
     int len = strlen(data);
@@ -534,11 +522,10 @@ TEST_F(DiskIoMgrTest, SyncReadTest) {
                 }
                 DiskIoMgr io_mgr(num_disks, num_threads_per_disk, MIN_BUFFER_SIZE, MAX_BUFFER_SIZE);
 
-                Status status = io_mgr.init(mem_tracker);
+                Status status = io_mgr.init(LARGE_MEM_LIMIT);
                 EXPECT_TRUE(status.ok());
-                std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
                 DiskIoMgr::RequestContext* reader;
-                status = io_mgr.register_context(&reader, reader_mem_tracker);
+                status = io_mgr.register_context(&reader);
                 EXPECT_TRUE(status.ok());
 
                 DiskIoMgr::ScanRange* complete_range =
@@ -578,16 +565,14 @@ TEST_F(DiskIoMgrTest, SyncReadTest) {
 
                 EXPECT_EQ(num_ranges_processed, ranges.size());
                 io_mgr.unregister_context(reader);
-                EXPECT_EQ(reader_mem_tracker->consumption(), 0);
             }
         }
     }
-    EXPECT_EQ(mem_tracker->consumption(), 0);
+    EXPECT_EQ(io_mgr->mem_tracker()->consumption(), 0);
 }
 
 // Tests a single reader cancelling half way through scan ranges.
 TEST_F(DiskIoMgrTest, SingleReaderCancel) {
-    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const char* tmp_file = "/tmp/disk_io_mgr_test.txt";
     const char* data = "abcdefghijklm";
     int len = strlen(data);
@@ -608,11 +593,10 @@ TEST_F(DiskIoMgrTest, SingleReaderCancel) {
                 if (++iters % 5000 == 0) LOG(ERROR) << "Starting iteration " << iters;
                 DiskIoMgr io_mgr(num_disks, num_threads_per_disk, 1, 1);
 
-                Status status = io_mgr.init(mem_tracker);
+                Status status = io_mgr.init(LARGE_MEM_LIMIT);
                 EXPECT_TRUE(status.ok());
-                std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
                 DiskIoMgr::RequestContext* reader;
-                status = io_mgr.register_context(&reader, reader_mem_tracker);
+                status = io_mgr.register_context(&reader);
                 EXPECT_TRUE(status.ok());
 
                 std::vector ranges;
@@ -647,11 +631,10 @@ TEST_F(DiskIoMgrTest, SingleReaderCancel) {
                 threads.join_all();
                 EXPECT_TRUE(io_mgr.context_status(reader).is_cancelled());
                 io_mgr.unregister_context(reader);
-                EXPECT_EQ(reader_mem_tracker->consumption(), 0);
             }
         }
     }
-    EXPECT_EQ(mem_tracker->consumption(), 0);
+    EXPECT_EQ(io_mgr->mem_tracker()->consumption(), 0);
 }
 
 // Test when the reader goes over the mem limit
@@ -676,15 +659,12 @@ TEST_F(DiskIoMgrTest, MemTrackers) {
             LOG(ERROR) << "Starting iteration " << iters;
         }
 
-        std::shared_ptr mem_tracker(
-                new MemTracker(mem_limit_num_buffers * MAX_BUFFER_SIZE));
         DiskIoMgr io_mgr(1, 1, MIN_BUFFER_SIZE, MAX_BUFFER_SIZE);
 
-        Status status = io_mgr.init(mem_tracker);
+        Status status = io_mgr.init(LARGE_MEM_LIMIT);
         EXPECT_TRUE(status.ok());
-        std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
         DiskIoMgr::RequestContext* reader;
-        status = io_mgr.register_context(&reader, reader_mem_tracker);
+        status = io_mgr.register_context(&reader);
         EXPECT_TRUE(status.ok());
 
         std::vector ranges;
@@ -729,7 +709,6 @@ TEST_F(DiskIoMgrTest, MemTrackers) {
 
         EXPECT_TRUE(io_mgr.context_status(reader).is_mem_limit_exceeded());
         io_mgr.unregister_context(reader);
-        EXPECT_EQ(reader_mem_tracker->consumption(), 0);
     }
 }
 #if 0
@@ -738,7 +717,6 @@ TEST_F(DiskIoMgrTest, MemTrackers) {
 // only tests the fallback mechanism.
 // TODO: we can fake the cached read path without HDFS
 TEST_F(DiskIoMgrTest, CachedReads) {
-    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const char* tmp_file = "/tmp/disk_io_mgr_test.txt";
     const char* data = "abcdefghijklm";
     int len = strlen(data);
@@ -757,11 +735,10 @@ TEST_F(DiskIoMgrTest, CachedReads) {
         if (++iters % 5000 == 0) LOG(ERROR) << "Starting iteration " << iters;
         DiskIoMgr io_mgr(num_disks, 1, MIN_BUFFER_SIZE, MAX_BUFFER_SIZE);
 
-        Status status = io_mgr.init(mem_tracker);
+        Status status = io_mgr.init(LARGE_MEM_LIMIT);
         EXPECT_TRUE(status.ok());
-        std::shared_ptr reader_mem_tracker(new MemTracker());
         DiskIoMgr::RequestContext* reader;
-        status = io_mgr.register_context(&reader, reader_mem_tracker);
+        status = io_mgr.register_context(&reader);
         EXPECT_TRUE(status.ok());
 
         DiskIoMgr::ScanRange* complete_range =
@@ -800,14 +777,12 @@ TEST_F(DiskIoMgrTest, CachedReads) {
 
         EXPECT_EQ(num_ranges_processed, ranges.size());
         io_mgr.unregister_context(reader);
-        EXPECT_EQ(reader_mem_tracker->consumption(), 0);
     }
-    EXPECT_EQ(mem_tracker->consumption(), 0);
+    EXPECT_EQ(io_mgr->mem_tracker()->consumption(), 0);
 }
 #endif // end #if 0
 
 TEST_F(DiskIoMgrTest, MultipleReaderWriter) {
-    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const int ITERATIONS = 1;
     const char* data = "abcdefghijklmnopqrstuvwxyz";
     const int num_contexts = 5;
@@ -833,7 +808,7 @@ TEST_F(DiskIoMgrTest, MultipleReaderWriter) {
         for (int threads_per_disk = 1; threads_per_disk <= 5; ++threads_per_disk) {
             for (int num_disks = 1; num_disks <= 5; num_disks += 2) {
                 DiskIoMgr io_mgr(num_disks, threads_per_disk, MIN_BUFFER_SIZE, MAX_BUFFER_SIZE);
-                io_mgr.init(mem_tracker);
+                io_mgr.init(LARGE_MEM_LIMIT);
                 for (int file_index = 0; file_index < num_contexts; ++file_index) {
                     status = io_mgr.register_context(&contexts[file_index]);
                     EXPECT_TRUE(status.ok());
@@ -899,7 +874,6 @@ TEST_F(DiskIoMgrTest, MultipleReaderWriter) {
 
 // This test will test multiple concurrent reads each reading a different file.
 TEST_F(DiskIoMgrTest, MultipleReader) {
-    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const int NUM_READERS = 5;
     const int DATA_LEN = 50;
     const int ITERATIONS = 25;
@@ -953,7 +927,7 @@ TEST_F(DiskIoMgrTest, MultipleReader) {
                     if (++iters % 2500 == 0) LOG(ERROR) << "Starting iteration " << iters;
 
                     DiskIoMgr io_mgr(num_disks, threads_per_disk, MIN_BUFFER_SIZE, MAX_BUFFER_SIZE);
-                    Status status = io_mgr.init(mem_tracker);
+                    Status status = io_mgr.init(LARGE_MEM_LIMIT);
                     EXPECT_TRUE(status.ok());
 
                     for (int i = 0; i < NUM_READERS; ++i) {
@@ -988,7 +962,7 @@ TEST_F(DiskIoMgrTest, MultipleReader) {
             }
         }
     }
-    EXPECT_EQ(mem_tracker->consumption(), 0);
+    EXPECT_EQ(io_mgr->mem_tracker()->consumption(), 0);
 }
 
 #if 0
@@ -1006,12 +980,11 @@ TEST_F(DiskIoMgrTest, Buffers) {
     // Test default min/max buffer size
     int min_buffer_size = 1024;
     int max_buffer_size = 8 * 1024 * 1024; // 8 MB
-    std::shared_ptr mem_tracker(new MemTracker(max_buffer_size * 2));
 
     DiskIoMgr io_mgr(1, 1, min_buffer_size, max_buffer_size);
-    Status status = io_mgr.init(mem_tracker);
+    Status status = io_mgr.init(max_buffer_size * 2);
     EXPECT_TRUE(status.ok());
-    EXPECT_EQ(mem_tracker->consumption(), 0);
+    EXPECT_EQ(io_mgr->mem_tracker()->consumption(), 0);
 
     // buffer length should be rounded up to min buffer size
     int64_t buffer_len = 1;
@@ -1019,7 +992,7 @@ TEST_F(DiskIoMgrTest, Buffers) {
     EXPECT_EQ(buffer_len, min_buffer_size);
     EXPECT_EQ(io_mgr._num_allocated_buffers, 1);
     io_mgr.return_free_buffer(buf, buffer_len);
-    EXPECT_EQ(mem_tracker->consumption(), min_buffer_size);
+    EXPECT_EQ(io_mgr->mem_tracker()->consumption(), min_buffer_size);
 
     // reuse buffer
     buffer_len = min_buffer_size;
@@ -1027,19 +1000,19 @@ TEST_F(DiskIoMgrTest, Buffers) {
     EXPECT_EQ(buffer_len, min_buffer_size);
     EXPECT_EQ(io_mgr._num_allocated_buffers, 1);
     io_mgr.return_free_buffer(buf, buffer_len);
-    EXPECT_EQ(mem_tracker->consumption(), min_buffer_size);
+    EXPECT_EQ(io_mgr->mem_tracker()->consumption(), min_buffer_size);
 
     // bump up to next buffer size
     buffer_len = min_buffer_size + 1;
     buf = io_mgr.get_free_buffer(&buffer_len);
     EXPECT_EQ(buffer_len, min_buffer_size * 2);
     EXPECT_EQ(io_mgr._num_allocated_buffers, 2);
-    EXPECT_EQ(mem_tracker->consumption(), min_buffer_size * 3);
+    EXPECT_EQ(io_mgr->mem_tracker()->consumption(), min_buffer_size * 3);
 
     // gc unused buffer
     io_mgr.gc_io_buffers();
     EXPECT_EQ(io_mgr._num_allocated_buffers, 1);
-    EXPECT_EQ(mem_tracker->consumption(), min_buffer_size * 2);
+    EXPECT_EQ(io_mgr->mem_tracker()->consumption(), min_buffer_size * 2);
 
     io_mgr.return_free_buffer(buf, buffer_len);
 
@@ -1049,17 +1022,16 @@ TEST_F(DiskIoMgrTest, Buffers) {
     EXPECT_EQ(buffer_len, max_buffer_size);
     EXPECT_EQ(io_mgr._num_allocated_buffers, 2);
     io_mgr.return_free_buffer(buf, buffer_len);
-    EXPECT_EQ(mem_tracker->consumption(), min_buffer_size * 2 + max_buffer_size);
+    EXPECT_EQ(io_mgr->mem_tracker()->consumption(), min_buffer_size * 2 + max_buffer_size);
 
     // gc buffers
     io_mgr.gc_io_buffers();
     EXPECT_EQ(io_mgr._num_allocated_buffers, 0);
-    EXPECT_EQ(mem_tracker->consumption(), 0);
+    EXPECT_EQ(io_mgr->mem_tracker()->consumption(), 0);
 }
 
 // IMPALA-2366: handle partial read where range goes past end of file.
 TEST_F(DiskIoMgrTest, PartialRead) {
-    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const char* tmp_file = "/tmp/disk_io_mgr_test.txt";
     const char* data = "the quick brown fox jumped over the lazy dog";
     int len = strlen(data);
@@ -1073,11 +1045,10 @@ TEST_F(DiskIoMgrTest, PartialRead) {
     _pool.reset(new ObjectPool);
     std::unique_ptr io_mgr(new DiskIoMgr(1, 1, read_len, read_len));
 
-    Status status = io_mgr->init(mem_tracker);
+    Status status = io_mgr->init(LARGE_MEM_LIMIT);
     EXPECT_TRUE(status.ok());
-    std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     DiskIoMgr::RequestContext* reader;
-    status = io_mgr->register_context(&reader, reader_mem_tracker);
+    status = io_mgr->register_context(&reader);
     EXPECT_TRUE(status.ok());
 
     // We should not read past the end of file.
@@ -1093,8 +1064,7 @@ TEST_F(DiskIoMgrTest, PartialRead) {
     io_mgr->unregister_context(reader);
     _pool.reset();
     io_mgr.reset();
-    EXPECT_EQ(reader_mem_tracker->consumption(), 0);
-    EXPECT_EQ(mem_tracker->consumption(), 0);
+    EXPECT_EQ(io_mgr->mem_tracker()->consumption(), 0);
 }
 
 } // end namespace doris
diff --git a/be/test/runtime/load_channel_mgr_test.cpp b/be/test/runtime/load_channel_mgr_test.cpp
index e100dbb5e7..b6f62e9a92 100644
--- a/be/test/runtime/load_channel_mgr_test.cpp
+++ b/be/test/runtime/load_channel_mgr_test.cpp
@@ -30,7 +30,6 @@
 #include "runtime/descriptor_helper.h"
 #include "runtime/descriptors.h"
 #include "runtime/exec_env.h"
-#include "runtime/mem_tracker.h"
 #include "runtime/primitive_type.h"
 #include "runtime/row_batch.h"
 #include "runtime/tuple_row.h"
@@ -354,7 +353,6 @@ TEST_F(LoadChannelMgrTest, add_failed) {
     DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl);
     auto tuple_desc = desc_tbl->get_tuple_descriptor(0);
     RowDescriptor row_desc(*desc_tbl, {0}, {false});
-    auto tracker = std::make_shared();
     PUniqueId load_id;
     load_id.set_hi(2);
     load_id.set_lo(3);
diff --git a/be/test/runtime/mem_limit_test.cpp b/be/test/runtime/mem_limit_test.cpp
index a5fe7bfaf8..b0a298ff8a 100644
--- a/be/test/runtime/mem_limit_test.cpp
+++ b/be/test/runtime/mem_limit_test.cpp
@@ -17,14 +17,15 @@
 
 #include 
 
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
+#include "runtime/memory/mem_tracker_limiter.h"
 #include "util/logging.h"
 #include "util/metrics.h"
 
 namespace doris {
 
 TEST(MemTrackerTest, SingleTrackerNoLimit) {
-    auto t = MemTracker::create_tracker();
+    auto t = std::make_unique();
     EXPECT_FALSE(t->has_limit());
     t->consume(10);
     EXPECT_EQ(t->consumption(), 10);
@@ -37,7 +38,7 @@ TEST(MemTrackerTest, SingleTrackerNoLimit) {
 }
 
 TEST(MemTestTest, SingleTrackerWithLimit) {
-    auto t = MemTracker::create_tracker(11, "limit tracker");
+    auto t = std::make_unique(11, "limit tracker");
     EXPECT_TRUE(t->has_limit());
     t->consume(10);
     EXPECT_EQ(t->consumption(), 10);
@@ -52,9 +53,9 @@ TEST(MemTestTest, SingleTrackerWithLimit) {
 }
 
 TEST(MemTestTest, TrackerHierarchy) {
-    auto p = MemTracker::create_tracker(100);
-    auto c1 = MemTracker::create_tracker(80, "c1", p);
-    auto c2 = MemTracker::create_tracker(50, "c2", p);
+    auto p = std::make_unique(100);
+    auto c1 = std::make_unique(80, "c1", p.get());
+    auto c2 = std::make_unique(50, "c2", p.get());
 
     // everything below limits
     c1->consume(60);
@@ -95,9 +96,9 @@ TEST(MemTestTest, TrackerHierarchy) {
 }
 
 TEST(MemTestTest, TrackerHierarchyTryConsume) {
-    auto p = MemTracker::create_tracker(100);
-    auto c1 = MemTracker::create_tracker(80, "c1", p);
-    auto c2 = MemTracker::create_tracker(50, "c2", p);
+    auto p = std::make_unique(100);
+    auto c1 = std::make_unique(80, "c1", p.get());
+    auto c2 = std::make_unique(50, "c2", p.get());
 
     // everything below limits
     bool consumption = c1->try_consume(60).ok();
diff --git a/be/test/runtime/mem_pool_test.cpp b/be/test/runtime/mem_pool_test.cpp
index 4ec20cbd10..6cff27dce5 100644
--- a/be/test/runtime/mem_pool_test.cpp
+++ b/be/test/runtime/mem_pool_test.cpp
@@ -21,16 +21,14 @@
 
 #include 
 
-#include "runtime/mem_tracker.h"
 #include "util/logging.h"
 
 namespace doris {
 
 TEST(MemPoolTest, Basic) {
-    MemTracker tracker(-1);
-    MemPool p(&tracker);
-    MemPool p2(&tracker);
-    MemPool p3(&tracker);
+    MemPool p;
+    MemPool p2;
+    MemPool p3;
 
     // allocate a total of 24K in 32-byte pieces (for which we only request 25 bytes)
     for (int i = 0; i < 768; ++i) {
@@ -89,7 +87,7 @@ TEST(MemPoolTest, Basic) {
     EXPECT_EQ(256 * 1024, p2.total_reserved_bytes());
 
     {
-        MemPool p4(&tracker);
+        MemPool p4;
         p4.exchange_data(&p2);
         EXPECT_EQ(33 * 1024, p4.total_allocated_bytes());
         EXPECT_EQ(256 * 1024, p4.total_reserved_bytes());
@@ -101,8 +99,7 @@ TEST(MemPoolTest, Basic) {
 // remaining chunks are consistent if there were more than one used chunk and some
 // free chunks.
 TEST(MemPoolTest, Keep) {
-    MemTracker tracker(-1);
-    MemPool p(&tracker);
+    MemPool p;
     p.allocate(4 * 1024);
     p.allocate(8 * 1024);
     p.allocate(16 * 1024);
@@ -115,7 +112,7 @@ TEST(MemPoolTest, Keep) {
     p.allocate(4 * 1024);
     EXPECT_EQ(p.total_allocated_bytes(), (1 + 4) * 1024);
     EXPECT_EQ(p.total_reserved_bytes(), (4 + 8 + 16) * 1024);
-    MemPool p2(&tracker);
+    MemPool p2;
     p2.acquire_data(&p, true);
 
     {
@@ -134,8 +131,7 @@ TEST(MemPoolTest, MaxAllocation) {
     int64_t int_max_rounded = BitUtil::round_up(LARGE_ALLOC_SIZE, 8);
 
     // Allocate a single LARGE_ALLOC_SIZE chunk
-    MemTracker tracker(-1);
-    MemPool p1(&tracker);
+    MemPool p1;
     uint8_t* ptr = p1.allocate(LARGE_ALLOC_SIZE);
     EXPECT_TRUE(ptr != nullptr);
     EXPECT_EQ(int_max_rounded, p1.total_reserved_bytes());
@@ -143,7 +139,7 @@ TEST(MemPoolTest, MaxAllocation) {
     p1.free_all();
 
     // Allocate a small chunk (DEFAULT_INITIAL_CHUNK_SIZE) followed by an LARGE_ALLOC_SIZE chunk
-    MemPool p2(&tracker);
+    MemPool p2;
     p2.allocate(8);
     EXPECT_EQ(p2.total_reserved_bytes(), 4096);
     EXPECT_EQ(p2.total_allocated_bytes(), 8);
@@ -155,7 +151,7 @@ TEST(MemPoolTest, MaxAllocation) {
 
     // Allocate three LARGE_ALLOC_SIZE chunks followed by a small chunk followed by another LARGE_ALLOC_SIZE
     // chunk
-    MemPool p3(&tracker);
+    MemPool p3;
     p3.allocate(LARGE_ALLOC_SIZE);
     // Allocates new int_max_rounded * 2 chunk
     // NOTE: exceed MAX_CHUNK_SIZE limit, will not *2
diff --git a/be/test/runtime/memory_scratch_sink_test.cpp b/be/test/runtime/memory_scratch_sink_test.cpp
index 450b21ee6d..ef566559f9 100644
--- a/be/test/runtime/memory_scratch_sink_test.cpp
+++ b/be/test/runtime/memory_scratch_sink_test.cpp
@@ -33,7 +33,6 @@
 #include "gen_cpp/Types_types.h"
 #include "olap/options.h"
 #include "olap/row.h"
-#include "runtime/mem_tracker.h"
 #include "runtime/primitive_type.h"
 #include "runtime/row_batch.h"
 #include "runtime/runtime_state.h"
@@ -96,7 +95,6 @@ private:
     TPlanNode _tnode;
     RowDescriptor* _row_desc = nullptr;
     TMemoryScratchSink _tsink;
-    std::shared_ptr _mem_tracker = nullptr;
     DescriptorTbl* _desc_tbl = nullptr;
     std::vector _exprs;
 };
@@ -113,9 +111,6 @@ void MemoryScratchSinkTest::init_runtime_state() {
     query_id.lo = 10;
     query_id.hi = 100;
     _state = new RuntimeState(query_id, query_options, TQueryGlobals(), _env->exec_env());
-    _state->init_instance_mem_tracker();
-    _mem_tracker =
-            MemTracker::create_tracker(-1, "MemoryScratchSinkTest", _state->instance_mem_tracker());
     _state->set_desc_tbl(_desc_tbl);
     _state->_load_dir = "./test_run/output/";
     _state->init_mem_trackers(TUniqueId());
diff --git a/be/test/runtime/string_buffer_test.cpp b/be/test/runtime/string_buffer_test.cpp
index 376d6fb7be..2064b282c9 100644
--- a/be/test/runtime/string_buffer_test.cpp
+++ b/be/test/runtime/string_buffer_test.cpp
@@ -22,7 +22,6 @@
 #include 
 
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 
 namespace doris {
 
@@ -36,8 +35,7 @@ void validate_string(const std::string& std_str, const StringBuffer& str) {
 }
 
 TEST(StringBufferTest, Basic) {
-    MemTracker tracker;
-    MemPool pool(&tracker);
+    MemPool pool;
     StringBuffer str(&pool);
     std::string std_str;
 
diff --git a/be/test/runtime/test_env.cc b/be/test/runtime/test_env.cc
index 8fdb6715ca..6892cafd31 100644
--- a/be/test/runtime/test_env.cc
+++ b/be/test/runtime/test_env.cc
@@ -24,19 +24,19 @@
 #include "olap/storage_engine.h"
 #include "runtime/fragment_mgr.h"
 #include "runtime/initial_reservations.h"
+#include "runtime/memory/mem_tracker_task_pool.h"
 #include "runtime/result_queue_mgr.h"
 #include "util/disk_info.h"
 #include "util/priority_thread_pool.hpp"
 
 namespace doris {
 
-TestEnv::TestEnv()
-        : _block_mgr_parent_tracker(MemTracker::create_tracker(-1, "BufferedBlockMgr2")) {
+TestEnv::TestEnv() {
     // Some code will use ExecEnv::GetInstance(), so init the global ExecEnv singleton
     _exec_env = ExecEnv::GetInstance();
     _exec_env->_thread_mgr = new ThreadResourceMgr(2);
     _exec_env->_buffer_reservation = new ReservationTracker();
-    _exec_env->_task_pool_mem_tracker_registry.reset(new MemTrackerTaskPool());
+    _exec_env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool();
     _exec_env->_disk_io_mgr = new DiskIoMgr(1, 1, 1, 10);
     _exec_env->disk_io_mgr()->init(-1);
     _exec_env->_scan_thread_pool = new PriorityThreadPool(1, 16);
@@ -63,6 +63,7 @@ TestEnv::~TestEnv() {
     SAFE_DELETE(_exec_env->_buffer_pool);
     SAFE_DELETE(_exec_env->_scan_thread_pool);
     SAFE_DELETE(_exec_env->_disk_io_mgr);
+    SAFE_DELETE(_exec_env->_task_pool_mem_tracker_registry);
     SAFE_DELETE(_exec_env->_buffer_reservation);
     SAFE_DELETE(_exec_env->_thread_mgr);
 
@@ -88,9 +89,8 @@ Status TestEnv::create_query_state(int64_t query_id, int max_buffers, int block_
     }
 
     std::shared_ptr mgr;
-    RETURN_IF_ERROR(BufferedBlockMgr2::create(
-            *runtime_state, (*runtime_state)->runtime_profile(), _tmp_file_mgr.get(),
-            calculate_mem_tracker(max_buffers, block_size), block_size, &mgr));
+    RETURN_IF_ERROR(BufferedBlockMgr2::create(*runtime_state, (*runtime_state)->runtime_profile(),
+                                              _tmp_file_mgr.get(), block_size, &mgr));
     (*runtime_state)->set_block_mgr2(mgr);
     // (*runtime_state)->_block_mgr = mgr;
 
diff --git a/be/test/runtime/test_env.h b/be/test/runtime/test_env.h
index 5212efc4da..e47a7d595d 100644
--- a/be/test/runtime/test_env.h
+++ b/be/test/runtime/test_env.h
@@ -60,8 +60,6 @@ public:
     static int64_t calculate_mem_tracker(int max_buffers, int block_size);
 
     ExecEnv* exec_env() { return _exec_env; }
-    std::shared_ptr block_mgr_parent_tracker() { return _block_mgr_parent_tracker; }
-    MemTracker* io_mgr_tracker() { return _io_mgr_tracker.get(); }
     TmpFileMgr* tmp_file_mgr() { return _tmp_file_mgr.get(); }
 
 private:
@@ -69,8 +67,6 @@ private:
     RuntimeState* create_runtime_state(int64_t query_id);
 
     ExecEnv* _exec_env;
-    std::shared_ptr _block_mgr_parent_tracker;
-    std::shared_ptr _io_mgr_tracker;
     std::shared_ptr _tmp_file_mgr;
 
     // Per-query states with associated block managers.
diff --git a/be/test/testutil/array_utils.cpp b/be/test/testutil/array_utils.cpp
index d1946a8559..834f8c6157 100644
--- a/be/test/testutil/array_utils.cpp
+++ b/be/test/testutil/array_utils.cpp
@@ -23,7 +23,7 @@
 #include "runtime/collection_value.h"
 #include "runtime/free_pool.hpp"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "udf/udf_internal.h"
 #include "util/array_parser.h"
 
diff --git a/be/test/testutil/function_utils.cpp b/be/test/testutil/function_utils.cpp
index 934e20c6d6..28aaeb2455 100644
--- a/be/test/testutil/function_utils.cpp
+++ b/be/test/testutil/function_utils.cpp
@@ -27,7 +27,7 @@ namespace doris {
 FunctionUtils::FunctionUtils() {
     doris_udf::FunctionContext::TypeDesc return_type;
     std::vector arg_types;
-    _memory_pool = new MemPool("function util");
+    _memory_pool = new MemPool();
     _fn_ctx = FunctionContextImpl::create_context(_state, _memory_pool, return_type, arg_types, 0,
                                                   false);
 }
@@ -35,7 +35,7 @@ FunctionUtils::FunctionUtils(RuntimeState* state) {
     _state = state;
     doris_udf::FunctionContext::TypeDesc return_type;
     std::vector arg_types;
-    _memory_pool = new MemPool("function util");
+    _memory_pool = new MemPool();
     _fn_ctx = FunctionContextImpl::create_context(_state, _memory_pool, return_type, arg_types, 0,
                                                   false);
 }
@@ -43,7 +43,7 @@ FunctionUtils::FunctionUtils(RuntimeState* state) {
 FunctionUtils::FunctionUtils(const doris_udf::FunctionContext::TypeDesc& return_type,
                              const std::vector& arg_types,
                              int varargs_buffer_size) {
-    _memory_pool = new MemPool("function util");
+    _memory_pool = new MemPool();
     _fn_ctx = FunctionContextImpl::create_context(_state, _memory_pool, return_type, arg_types,
                                                   varargs_buffer_size, false);
 }
diff --git a/be/test/testutil/run_all_tests.cpp b/be/test/testutil/run_all_tests.cpp
index 842282e8c6..b84cc6c375 100644
--- a/be/test/testutil/run_all_tests.cpp
+++ b/be/test/testutil/run_all_tests.cpp
@@ -20,6 +20,8 @@
 #include "common/config.h"
 #include "olap/page_cache.h"
 #include "olap/segment_loader.h"
+#include "runtime/exec_env.h"
+#include "runtime/memory/mem_tracker_limiter.h"
 #include "service/backend_options.h"
 #include "util/cpu_info.h"
 #include "util/disk_info.h"
@@ -27,6 +29,9 @@
 #include "util/mem_info.h"
 
 int main(int argc, char** argv) {
+    doris::MemTrackerLimiter* process_mem_tracker = new doris::MemTrackerLimiter(-1, "Process");
+    doris::ExecEnv::GetInstance()->set_process_mem_tracker(process_mem_tracker);
+    doris::thread_context()->_thread_mem_tracker_mgr->init();
     doris::StoragePageCache::create_global_cache(1 << 30, 10);
     doris::SegmentLoader::create_global_instance(1000);
     std::string conf = std::string(getenv("DORIS_HOME")) + "/conf/be.conf";
diff --git a/be/test/tools/benchmark_tool.cpp b/be/test/tools/benchmark_tool.cpp
index 279305e72c..3b51659178 100644
--- a/be/test/tools/benchmark_tool.cpp
+++ b/be/test/tools/benchmark_tool.cpp
@@ -52,7 +52,6 @@
 #include "olap/tablet_schema_helper.h"
 #include "olap/types.h"
 #include "runtime/mem_pool.h"
-#include "runtime/mem_tracker.h"
 #include "testutil/test_util.h"
 #include "util/debug_util.h"
 #include "util/file_utils.h"
@@ -190,8 +189,7 @@ public:
             //check values
             size_t num = page_start_ids[slice_index + 1] - page_start_ids[slice_index];
 
-            auto tracker = std::make_shared();
-            MemPool pool(tracker.get());
+            MemPool pool;
             const auto* type_info = get_scalar_type_info();
             std::unique_ptr cvb;
             ColumnVectorBatch::create(num, false, type_info, nullptr, &cvb);
@@ -269,9 +267,7 @@ private:
 class SegmentBenchmark : public BaseBenchmark {
 public:
     SegmentBenchmark(const std::string& name, int iterations, const std::string& column_type)
-            : BaseBenchmark(name, iterations),
-              _tracker(std::make_shared()),
-              _pool(_tracker.get()) {
+            : BaseBenchmark(name, iterations), _pool() {
         if (FileUtils::check_exist(kSegmentDir)) {
             FileUtils::remove_all(kSegmentDir);
         }
@@ -280,9 +276,7 @@ public:
         init_schema(column_type);
     }
     SegmentBenchmark(const std::string& name, int iterations)
-            : BaseBenchmark(name, iterations),
-              _tracker(std::make_shared()),
-              _pool(_tracker.get()) {
+            : BaseBenchmark(name, iterations), _pool() {
         if (FileUtils::check_exist(kSegmentDir)) {
             FileUtils::remove_all(kSegmentDir);
         }
@@ -405,7 +399,6 @@ private:
     }
 
 private:
-    std::shared_ptr _tracker;
     MemPool _pool;
     TabletSchema _tablet_schema;
     std::shared_ptr _schema;
diff --git a/be/test/util/array_parser_test.cpp b/be/test/util/array_parser_test.cpp
index e5e2564744..c88ef91753 100644
--- a/be/test/util/array_parser_test.cpp
+++ b/be/test/util/array_parser_test.cpp
@@ -22,7 +22,6 @@
 
 #include "olap/tablet_schema.h"
 #include "olap/types.h"
-#include "runtime/mem_tracker.h"
 #include "runtime/string_value.h"
 #include "testutil/array_utils.h"
 
@@ -52,8 +51,7 @@ static TypeInfoPtr get_type_info(const ColumnPB& column_pb) {
 
 static void test_array_parser(const ColumnPB& column_pb, const std::string& json,
                               const CollectionValue& expect) {
-    MemTracker tracker(1024 * 1024, "ArrayParserTest");
-    MemPool mem_pool(&tracker);
+    MemPool mem_pool;
     FunctionContext context;
     ArrayUtils::prepare_context(context, mem_pool, column_pb);
     CollectionValue actual;
@@ -175,8 +173,7 @@ TEST(ArrayParserTest, TestDecimalArray) {
 
 TEST(ArrayParserTest, TestFreePool) {
     auto column_pb = create_column_pb("ARRAY", "DECIMAL");
-    MemTracker tracker(1024 * 1024, "ArrayParserTest");
-    MemPool mem_pool(&tracker);
+    MemPool mem_pool;
     FunctionContext context;
     ArrayUtils::prepare_context(context, mem_pool, column_pb);
     int alignment = 1;
diff --git a/be/test/util/arrow/arrow_row_batch_test.cpp b/be/test/util/arrow/arrow_row_batch_test.cpp
index cf5bf79d32..4899758e00 100644
--- a/be/test/util/arrow/arrow_row_batch_test.cpp
+++ b/be/test/util/arrow/arrow_row_batch_test.cpp
@@ -32,7 +32,7 @@
 #include 
 
 #include "common/object_pool.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/row_batch.h"
 #include "util/debug_util.h"
 
diff --git a/be/test/util/arrow/arrow_work_flow_test.cpp b/be/test/util/arrow/arrow_work_flow_test.cpp
index f04cc56b6c..c160047de4 100644
--- a/be/test/util/arrow/arrow_work_flow_test.cpp
+++ b/be/test/util/arrow/arrow_work_flow_test.cpp
@@ -30,7 +30,7 @@
 #include "olap/row.h"
 #include "runtime/bufferpool/reservation_tracker.h"
 #include "runtime/exec_env.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker_task_pool.h"
 #include "runtime/result_queue_mgr.h"
 #include "runtime/row_batch.h"
 #include "runtime/runtime_state.h"
@@ -69,6 +69,7 @@ protected:
             delete _exec_env->_result_queue_mgr;
             delete _exec_env->_thread_mgr;
             delete _exec_env->_buffer_reservation;
+            delete _exec_env->_task_pool_mem_tracker_registry;
         }
     }
 
@@ -83,7 +84,6 @@ private:
     TPlanNode _tnode;
     ExecEnv* _exec_env = nullptr;
     RuntimeState* _state = nullptr;
-    std::shared_ptr _mem_tracker;
 }; // end class ArrowWorkFlowTest
 
 void ArrowWorkFlowTest::init() {
@@ -96,7 +96,7 @@ void ArrowWorkFlowTest::init_runtime_state() {
     _exec_env->_result_queue_mgr = new ResultQueueMgr();
     _exec_env->_thread_mgr = new ThreadResourceMgr();
     _exec_env->_buffer_reservation = new ReservationTracker();
-    _exec_env->_task_pool_mem_tracker_registry.reset(new MemTrackerTaskPool());
+    _exec_env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool();
     _exec_env->_is_init = true;
     TQueryOptions query_options;
     query_options.batch_size = 1024;
@@ -105,8 +105,6 @@ void ArrowWorkFlowTest::init_runtime_state() {
     query_id.hi = 100;
     _state = new RuntimeState(query_id, query_options, TQueryGlobals(), _exec_env);
     _state->init_instance_mem_tracker();
-    _mem_tracker =
-            MemTracker::create_tracker(-1, "ArrowWorkFlowTest", _state->instance_mem_tracker());
     _state->set_desc_tbl(_desc_tbl);
     _state->_load_dir = "./test_run/output/";
     _state->init_mem_trackers(TUniqueId());
diff --git a/be/test/util/tuple_row_zorder_compare_test.cpp b/be/test/util/tuple_row_zorder_compare_test.cpp
index 4399b505f1..66d014087f 100644
--- a/be/test/util/tuple_row_zorder_compare_test.cpp
+++ b/be/test/util/tuple_row_zorder_compare_test.cpp
@@ -37,13 +37,9 @@ namespace doris {
 class TupleRowZOrderCompareTest : public testing::Test {
 public:
     ObjectPool _agg_buffer_pool;
-    std::unique_ptr _mem_tracker;
     std::unique_ptr _buffer_mem_pool;
 
-    TupleRowZOrderCompareTest() {
-        _mem_tracker.reset(new MemTracker(-1));
-        _buffer_mem_pool.reset(new MemPool(_mem_tracker.get()));
-    }
+    TupleRowZOrderCompareTest() { _buffer_mem_pool.reset(new MemPool()); }
 
     ~TupleRowZOrderCompareTest() = default;
 
diff --git a/be/test/vec/exec/vbroker_scan_node_test.cpp b/be/test/vec/exec/vbroker_scan_node_test.cpp
index 719d5014ea..6cff164b10 100644
--- a/be/test/vec/exec/vbroker_scan_node_test.cpp
+++ b/be/test/vec/exec/vbroker_scan_node_test.cpp
@@ -31,7 +31,7 @@
 #include "gen_cpp/PlanNodes_types.h"
 #include "io/local_file_reader.h"
 #include "runtime/descriptors.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/primitive_type.h"
 #include "runtime/runtime_state.h"
 #include "runtime/user_function_cache.h"
@@ -45,7 +45,7 @@ class VBrokerScanNodeTest : public testing::Test {
 public:
     VBrokerScanNodeTest() : _runtime_state(TQueryGlobals()) {
         init();
-        _runtime_state._instance_mem_tracker.reset(new MemTracker());
+        _runtime_state.init_instance_mem_tracker();
         _runtime_state._query_options.enable_vectorized_engine = true;
     }
     void init();
diff --git a/be/test/vec/exec/vbroker_scanner_test.cpp b/be/test/vec/exec/vbroker_scanner_test.cpp
index 27c4643050..7824f10c30 100644
--- a/be/test/vec/exec/vbroker_scanner_test.cpp
+++ b/be/test/vec/exec/vbroker_scanner_test.cpp
@@ -28,7 +28,7 @@
 #include "gen_cpp/PlanNodes_types.h"
 #include "io/local_file_reader.h"
 #include "runtime/descriptors.h"
-#include "runtime/mem_tracker.h"
+#include "runtime/memory/mem_tracker.h"
 #include "runtime/runtime_state.h"
 #include "runtime/user_function_cache.h"
 
@@ -40,7 +40,7 @@ public:
     VBrokerScannerTest() : _runtime_state(TQueryGlobals()) {
         init();
         _profile = _runtime_state.runtime_profile();
-        _runtime_state._instance_mem_tracker.reset(new MemTracker());
+        _runtime_state.init_instance_mem_tracker();
 
         TUniqueId unique_id;
         TQueryOptions query_options;
diff --git a/be/test/vec/exec/vjson_scanner_test.cpp b/be/test/vec/exec/vjson_scanner_test.cpp
index b059faf010..90df1fd8a7 100644
--- a/be/test/vec/exec/vjson_scanner_test.cpp
+++ b/be/test/vec/exec/vjson_scanner_test.cpp
@@ -46,7 +46,7 @@ class VJsonScannerTest : public testing::Test {
 public:
     VJsonScannerTest() : _runtime_state(TQueryGlobals()) {
         init();
-        _runtime_state._instance_mem_tracker.reset(new MemTracker());
+        _runtime_state.init_instance_mem_tracker();
 
         TUniqueId unique_id;
         TQueryOptions query_options;
diff --git a/be/test/vec/exec/vorc_scanner_test.cpp b/be/test/vec/exec/vorc_scanner_test.cpp
index e6b3b2a96f..7e9aff95ba 100644
--- a/be/test/vec/exec/vorc_scanner_test.cpp
+++ b/be/test/vec/exec/vorc_scanner_test.cpp
@@ -46,7 +46,7 @@ class VOrcScannerTest : public testing::Test {
 public:
     VOrcScannerTest() : _runtime_state(TQueryGlobals()) {
         _profile = _runtime_state.runtime_profile();
-        _runtime_state._instance_mem_tracker.reset(new MemTracker());
+        _runtime_state.init_instance_mem_tracker();
         _runtime_state._query_options.enable_vectorized_engine = true;
     }
     ~VOrcScannerTest() {}
@@ -414,10 +414,6 @@ TEST_F(VOrcScannerTest, normal) {
                         &_counter);
     EXPECT_TRUE(scanner.open().ok());
 
-    //auto tracker = std::make_shared();
-    //MemPool tuple_pool(tracker.get());
-
-    //Tuple* tuple = (Tuple*)tuple_pool.allocate(_desc_tbl->get_tuple_descriptor(1)->byte_size());
     vectorized::Block block;
     bool eof = false;
 
diff --git a/be/test/vec/exec/vparquet_scanner_test.cpp b/be/test/vec/exec/vparquet_scanner_test.cpp
index 6d3810cc73..bb1bb2c7f3 100644
--- a/be/test/vec/exec/vparquet_scanner_test.cpp
+++ b/be/test/vec/exec/vparquet_scanner_test.cpp
@@ -41,7 +41,7 @@ class VParquetScannerTest : public testing::Test {
 public:
     VParquetScannerTest() : _runtime_state(TQueryGlobals()) {
         init();
-        _runtime_state._instance_mem_tracker.reset(new MemTracker());
+        _runtime_state.init_instance_mem_tracker();
         _runtime_state._query_options.enable_vectorized_engine = true;
     }
     ~VParquetScannerTest() {}
diff --git a/be/test/vec/exec/vtablet_sink_test.cpp b/be/test/vec/exec/vtablet_sink_test.cpp
index 67ae97128d..39d98e38ad 100644
--- a/be/test/vec/exec/vtablet_sink_test.cpp
+++ b/be/test/vec/exec/vtablet_sink_test.cpp
@@ -29,6 +29,7 @@
 #include "runtime/decimalv2_value.h"
 #include "runtime/descriptor_helper.h"
 #include "runtime/exec_env.h"
+#include "runtime/memory/mem_tracker_task_pool.h"
 #include "runtime/result_queue_mgr.h"
 #include "runtime/runtime_state.h"
 #include "runtime/stream_load/load_stream_mgr.h"
@@ -59,7 +60,7 @@ public:
         _env->_internal_client_cache = new BrpcClientCache();
         _env->_function_client_cache = new BrpcClientCache();
         _env->_buffer_reservation = new ReservationTracker();
-        _env->_task_pool_mem_tracker_registry.reset(new MemTrackerTaskPool());
+        _env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool();
         ThreadPoolBuilder("SendBatchThreadPool")
                 .set_min_threads(1)
                 .set_max_threads(5)
@@ -76,6 +77,7 @@ public:
         SAFE_DELETE(_env->_master_info);
         SAFE_DELETE(_env->_thread_mgr);
         SAFE_DELETE(_env->_buffer_reservation);
+        SAFE_DELETE(_env->_task_pool_mem_tracker_registry);
         if (_server) {
             _server->Stop(100);
             _server->Join();
diff --git a/be/test/vec/exprs/vexpr_test.cpp b/be/test/vec/exprs/vexpr_test.cpp
index 4ece5fd792..473a8aa695 100644
--- a/be/test/vec/exprs/vexpr_test.cpp
+++ b/be/test/vec/exprs/vexpr_test.cpp
@@ -28,7 +28,6 @@
 #include "gen_cpp/Types_types.h"
 #include "runtime/exec_env.h"
 #include "runtime/large_int_value.h"
-#include "runtime/mem_tracker.h"
 #include "runtime/memory/chunk_allocator.h"
 #include "runtime/primitive_type.h"
 #include "runtime/row_batch.h"
@@ -72,8 +71,7 @@ TEST(TEST_VEXPR, ABSTEST) {
                                      doris::TQueryGlobals(), nullptr);
     runtime_stat.init_instance_mem_tracker();
     runtime_stat.set_desc_tbl(desc_tbl);
-    std::shared_ptr tracker = doris::MemTracker::create_tracker();
-    context->prepare(&runtime_stat, row_desc, tracker);
+    context->prepare(&runtime_stat, row_desc);
     context->open(&runtime_stat);
 
     auto block = row_batch.convert_to_vec_block();
@@ -118,8 +116,7 @@ TEST(TEST_VEXPR, ABSTEST2) {
     DescriptorTbl desc_tbl;
     desc_tbl._slot_desc_map[0] = tuple_desc->slots()[0];
     runtime_stat.set_desc_tbl(&desc_tbl);
-    std::shared_ptr tracker = doris::MemTracker::create_tracker();
-    context->prepare(&runtime_stat, row_desc, tracker);
+    context->prepare(&runtime_stat, row_desc);
     context->open(&runtime_stat);
 
     auto block = row_batch.convert_to_vec_block();
diff --git a/build-support/clang-format.sh b/build-support/clang-format.sh
index 005458372c..df054d4ea2 100755
--- a/build-support/clang-format.sh
+++ b/build-support/clang-format.sh
@@ -28,6 +28,7 @@ ROOT=`cd "$ROOT"; pwd`
 
 export DORIS_HOME=`cd "${ROOT}/.."; pwd`
 
-CLANG_FORMAT=${CLANG_FORMAT_BINARY:=$(which clang-format)}
+#CLANG_FORMAT=${CLANG_FORMAT_BINARY:=$(which clang-format)}
+CLANG_FORMAT=/mnt/disk1/liyifan/doris/ldb_toolchain/bin/clang-format
 
 python ${DORIS_HOME}/build-support/run_clang_format.py "--clang-format-executable" "${CLANG_FORMAT}" "-r" "--style" "file" "--inplace" "true" "--extensions" "c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx" "--exclude" "none" "be/src be/test"
diff --git a/docs/en/docs/admin-manual/config/be-config.md b/docs/en/docs/admin-manual/config/be-config.md
index 31291455ce..0e1aec9be4 100644
--- a/docs/en/docs/admin-manual/config/be-config.md
+++ b/docs/en/docs/admin-manual/config/be-config.md
@@ -1443,36 +1443,18 @@ The size of the buffer before flashing
   
 * Default: 3
 
-### `track_new_delete`
+### `enable_tcmalloc_hook`
 
 * Type: bool
 * Description: Whether Hook TCmalloc new/delete, currently consume/release tls mem tracker in Hook.
 * Default: true
 
-### `mem_tracker_level`
-
-* Type: int16
-* Description: The level at which MemTracker is displayed on the Web page equal or lower than this level will be displayed on the Web page
-  ```
-    OVERVIEW = 0
-    TASK = 1
-    INSTANCE = 2
-    VERBOSE = 3
-  ```
-* Default: 0
-
 ### `mem_tracker_consume_min_size_bytes`
 
 * Type: int32
 * Description: The minimum length of TCMalloc Hook when consume/release MemTracker. Consume size smaller than this value will continue to accumulate to avoid frequent calls to consume/release of MemTracker. Decreasing this value will increase the frequency of consume/release. Increasing this value will cause MemTracker statistics to be inaccurate. Theoretically, the statistical value of a MemTracker differs from the true value = ( mem_tracker_consume_min_size_bytes * the number of BE threads where the MemTracker is located).
 * Default: 1048576
 
-### `memory_leak_detection`
-
-* Type: bool
-* Description: Whether to start memory leak detection, when MemTracker is a negative value, it is considered that a memory leak has occurred, but the actual MemTracker records inaccurately will also cause a negative value, so this feature is in the experimental stage.
-* Default: false
-
 ### `max_segment_num_per_rowset`
 
 * Type: int32
diff --git a/docs/zh-CN/docs/admin-manual/config/be-config.md b/docs/zh-CN/docs/admin-manual/config/be-config.md
index 11fa254a2d..fc7cc62b94 100644
--- a/docs/zh-CN/docs/admin-manual/config/be-config.md
+++ b/docs/zh-CN/docs/admin-manual/config/be-config.md
@@ -1462,36 +1462,18 @@ webserver默认工作线程数
   ```
 * 默认值: 3
 
-### `track_new_delete`
+### `enable_tcmalloc_hook`
 
 * 类型:bool
 * 描述:是否Hook TCmalloc new/delete,目前在Hook中统计thread local MemTracker。
 * 默认值:true
 
-### `mem_tracker_level`
-
-* 类型: int16
-* 描述: MemTracker在Web页面上展示的级别,等于或低于这个级别的MemTracker会在Web页面上展示
-  ```
-    OVERVIEW = 0
-    TASK = 1
-    INSTANCE = 2
-    VERBOSE = 3
-  ```
-* 默认值: 0
-
 ### `mem_tracker_consume_min_size_bytes`
 
 * 类型: int32
 * 描述: TCMalloc Hook consume/release MemTracker时的最小长度,小于该值的consume size会持续累加,避免频繁调用MemTracker的consume/release,减小该值会增加consume/release的频率,增大该值会导致MemTracker统计不准,理论上一个MemTracker的统计值与真实值相差 = (mem_tracker_consume_min_size_bytes * 这个MemTracker所在的BE线程数)。
 * 默认值: 1048576
 
-### `memory_leak_detection`
-
-* 类型: bool
-* 描述: 是否启动内存泄漏检测,当 MemTracker 为负值时认为发生了内存泄漏,但实际 MemTracker 记录不准确时也会导致负值,所以这个功能处于实验阶段。
-* 默认值: false
-
 ### `max_segment_num_per_rowset`
 
 * 类型: int32
diff --git a/regression-test/conf/regression-conf.groovy b/regression-test/conf/regression-conf.groovy
index 6f419aea16..a4ebc7ea02 100644
--- a/regression-test/conf/regression-conf.groovy
+++ b/regression-test/conf/regression-conf.groovy
@@ -20,11 +20,11 @@
 // **Note**: default db will be create if not exist
 defaultDb = "regression_test"
 
-jdbcUrl = "jdbc:mysql://127.0.0.1:9030/?"
+jdbcUrl = "jdbc:mysql://127.0.0.1:9083/?"
 jdbcUser = "root"
 jdbcPassword = ""
 
-feHttpAddress = "127.0.0.1:8030"
+feHttpAddress = "127.0.0.1:8033"
 feHttpUser = "root"
 feHttpPassword = ""