diff --git a/be/src/exec/hash_table.cpp b/be/src/exec/hash_table.cpp
index 2aa195bebc..b50b03460e 100644
--- a/be/src/exec/hash_table.cpp
+++ b/be/src/exec/hash_table.cpp
@@ -175,7 +175,7 @@ Status HashTable::resize_buckets(int64_t num_buckets) {
 
     int64_t old_num_buckets = _num_buckets;
     int64_t delta_bytes = (num_buckets - old_num_buckets) * sizeof(Bucket);
-    Status st = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->check_limit(
+    Status st = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit(
             delta_bytes);
     if (!st) {
         LOG_EVERY_N(WARNING, 100) << "resize bucket failed: " << st.to_string();
diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp
index 1dcd5c3754..b4bd204614 100644
--- a/be/src/exec/olap_scan_node.cpp
+++ b/be/src/exec/olap_scan_node.cpp
@@ -1535,7 +1535,7 @@ void OlapScanNode::transfer_thread(RuntimeState* state) {
             size_t thread_slot_num = 0;
             mem_consume = _scanner_mem_tracker->consumption();
             // check limit for total memory and _scan_row_batches memory
-            if (mem_consume < (state->instance_mem_tracker()->limit() * 6) / 10 &&
+            if (mem_consume < (state->query_mem_tracker()->limit() * 6) / 10 &&
                 _scan_row_batches_bytes < _max_scanner_queue_size_bytes / 2) {
                 thread_slot_num = max_thread - assigned_thread_num;
             } else {
diff --git a/be/src/exec/partitioned_aggregation_node.cc b/be/src/exec/partitioned_aggregation_node.cc
index 624f7c71ce..49c02fa5a8 100644
--- a/be/src/exec/partitioned_aggregation_node.cc
+++ b/be/src/exec/partitioned_aggregation_node.cc
@@ -911,14 +911,14 @@ Tuple* PartitionedAggregationNode::ConstructIntermediateTuple(
             << "Backend: " << BackendOptions::get_localhost() << ", "
             << "fragment: " << print_id(state_->fragment_instance_id()) << " "
             << "Used: "
-            << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->consumption()
+            << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->consumption()
             << ", Limit: "
-            << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->limit() << ". "
+            << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->limit() << ". "
             << "You can change the limit by session variable exec_mem_limit.";
         string details = Substitute(str.str(), _id, tuple_data_size);
         *status = thread_context()
-                          ->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()
-                          ->mem_limit_exceeded(state_, details, tuple_data_size);
+                          ->_thread_mem_tracker_mgr->limiter_mem_tracker()
+                          ->fragment_mem_limit_exceeded(state_, details, tuple_data_size);
         return nullptr;
     }
     memset(tuple_data, 0, fixed_size);
diff --git a/be/src/exec/partitioned_hash_table.cc b/be/src/exec/partitioned_hash_table.cc
index 83fe65d1b6..f9598b510b 100644
--- a/be/src/exec/partitioned_hash_table.cc
+++ b/be/src/exec/partitioned_hash_table.cc
@@ -307,7 +307,7 @@ Status PartitionedHashTableCtx::ExprValuesCache::Init(RuntimeState* state,
                                      MAX_EXPR_VALUES_ARRAY_SIZE / expr_values_bytes_per_row_));
 
     int mem_usage = MemUsage(capacity_, expr_values_bytes_per_row_, num_exprs_);
-    if (UNLIKELY(!thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->check_limit(
+    if (UNLIKELY(!thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit(
                 mem_usage))) {
         capacity_ = 0;
         string details = Substitute(
diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp
index 762feacb15..a7e03c11e2 100644
--- a/be/src/exec/tablet_sink.cpp
+++ b/be/src/exec/tablet_sink.cpp
@@ -48,7 +48,7 @@ NodeChannel::NodeChannel(OlapTableSink* parent, IndexChannel* index_channel, int
         : _parent(parent), _index_channel(index_channel), _node_id(node_id) {
     _node_channel_tracker = std::make_unique<MemTracker>(fmt::format(
             "NodeChannel:indexID={}:threadId={}", std::to_string(_index_channel->_index_id),
-            thread_context()->thread_id_str()));
+            thread_context()->get_thread_id()));
 }
 
 NodeChannel::~NodeChannel() noexcept {
@@ -624,7 +624,7 @@ void NodeChannel::try_send_batch(RuntimeState* state) {
         _add_batch_closure->cntl.http_request().set_method(brpc::HTTP_METHOD_POST);
         _add_batch_closure->cntl.http_request().set_content_type("application/json");
         {
-            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
+            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
             _brpc_http_stub->tablet_writer_add_batch_by_http(&_add_batch_closure->cntl, NULL,
                                                              &_add_batch_closure->result,
                                                              _add_batch_closure);
@@ -632,7 +632,7 @@ void NodeChannel::try_send_batch(RuntimeState* state) {
     } else {
         _add_batch_closure->cntl.http_request().Clear();
         {
-            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
+            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
             _stub->tablet_writer_add_batch(&_add_batch_closure->cntl, &request,
                                            &_add_batch_closure->result, _add_batch_closure);
         }
diff --git a/be/src/exec/tablet_sink.h b/be/src/exec/tablet_sink.h
index 1ba4b46a60..45552329bf 100644
--- a/be/src/exec/tablet_sink.h
+++ b/be/src/exec/tablet_sink.h
@@ -96,7 +96,7 @@ public:
     ~ReusableClosure() override {
         // shouldn't delete when Run() is calling or going to be called, wait for current Run() done.
         join();
-        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
+        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
         cntl.Reset();
     }
 
@@ -124,7 +124,7 @@ public:
 
     // plz follow this order: reset() -> set_in_flight() -> send brpc batch
     void reset() {
-        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
+        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
         cntl.Reset();
         cid = cntl.call_id();
     }
diff --git a/be/src/http/default_path_handlers.cpp b/be/src/http/default_path_handlers.cpp
index a74fc740d5..7edc3ec7f0 100644
--- a/be/src/http/default_path_handlers.cpp
+++ b/be/src/http/default_path_handlers.cpp
@@ -84,9 +84,7 @@ void config_handler(const WebPageHandler::ArgumentMap& args, std::stringstream*
 // Registered to handle "/memz", and prints out memory allocation statistics.
 void mem_usage_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* output) {
     (*output) << "<pre>"
-              << "Mem Limit: "
-              << PrettyPrinter::print(ExecEnv::GetInstance()->process_mem_tracker()->limit(),
-                                      TUnit::BYTES)
+              << "Mem Limit: " << PrettyPrinter::print(MemInfo::mem_limit(), TUnit::BYTES)
               << std::endl
               << "Physical Mem From Perf: "
               << PrettyPrinter::print(PerfCounters::get_vm_rss(), TUnit::BYTES) << std::endl
@@ -121,14 +119,48 @@ void display_tablets_callback(const WebPageHandler::ArgumentMap& args, EasyJson*
 // Registered to handle "/mem_tracker", and prints out memory tracker information.
 void mem_tracker_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* output) {
     (*output) << "<h1>Memory usage by subsystem</h1>\n";
+    std::vector<MemTracker::Snapshot> snapshots;
+    auto iter = args.find("type");
+    if (iter != args.end()) {
+        if (iter->second == "global") {
+            MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::GLOBAL);
+        } else if (iter->second == "query") {
+            MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::QUERY);
+        } else if (iter->second == "load") {
+            MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::LOAD);
+        } else if (iter->second == "compaction") {
+            MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::COMPACTION);
+        } else if (iter->second == "schema_change") {
+            MemTrackerLimiter::make_type_snapshots(&snapshots,
+                                                   MemTrackerLimiter::Type::SCHEMA_CHANGE);
+        } else if (iter->second == "clone") {
+            MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::CLONE);
+        } else if (iter->second == "batch_load") {
+            MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::BATCHLOAD);
+        } else if (iter->second == "consistency") {
+            MemTrackerLimiter::make_type_snapshots(&snapshots,
+                                                   MemTrackerLimiter::Type::CONSISTENCY);
+        }
+    } else {
+        (*output) << "<h4>*Note: (see documentation for details)</h4>\n";
+        (*output) << "<h4>     1.`/mem_tracker?type=global` to view the memory statistics of each "
+                     "type</h4>\n";
+        (*output) << "<h4>     2.`/mem_tracker` counts virtual memory, which is equal to `Actual "
+                     "memory used` in `/memz`</h4>\n";
+        (*output) << "<h4>     3.`process` is equal to the sum of all types of memory, "
+                     "`/mem_tracker` can be logically divided into 4 layers: 1)`process` 2)`type` "
+                     "3)`query/load/compation task etc.` 4)`exec node etc.`</h4>\n";
+        MemTrackerLimiter::make_process_snapshots(&snapshots);
+    }
+
     (*output) << "<table data-toggle='table' "
                  "       data-pagination='true' "
                  "       data-search='true' "
                  "       class='table table-striped'>\n";
     (*output) << "<thead><tr>"
-                 "<th data-sortable='true'>Level</th>"
+                 "<th data-sortable='true'>Type</th>"
                  "<th data-sortable='true'>Label</th>"
-                 "<th>Parent</th>"
+                 "<th data-sortable='true'>Parent Label</th>"
                  "<th>Limit</th>"
                  "<th data-sortable='true' "
                  ">Current Consumption(Bytes)</th>"
@@ -136,35 +168,17 @@ void mem_tracker_handler(const WebPageHandler::ArgumentMap& args, std::stringstr
                  "<th data-sortable='true' "
                  ">Peak Consumption(Bytes)</th>"
                  "<th>Peak Consumption(Normalize)</th>"
-                 "<th data-sortable='true' "
-                 ">Child Count</th>"
                  "</tr></thead>";
     (*output) << "<tbody>\n";
-
-    size_t upper_level;
-    size_t cur_level = 1;
-    // the level equal or lower than upper_level will show in web page
-    auto iter = args.find("upper_level");
-    if (iter != args.end()) {
-        upper_level = std::stol(iter->second);
-    } else {
-        upper_level = 3;
-    }
-
-    std::vector<MemTracker::Snapshot> snapshots;
-    ExecEnv::GetInstance()->process_mem_tracker()->make_snapshot(&snapshots, cur_level,
-                                                                 upper_level);
-    MemTracker::make_global_mem_tracker_snapshot(&snapshots);
     for (const auto& item : snapshots) {
         string limit_str = item.limit == -1 ? "none" : AccurateItoaKMGT(item.limit);
         string current_consumption_normalize = AccurateItoaKMGT(item.cur_consumption);
         string peak_consumption_normalize = AccurateItoaKMGT(item.peak_consumption);
         (*output) << strings::Substitute(
                 "<tr><td>$0</td><td>$1</td><td>$2</td><td>$3</td><td>$4</td><td>$5</td><td>$6</"
-                "td><td>$7</td><td>$8</td></tr>\n",
-                item.level, item.label, item.parent, limit_str, item.cur_consumption,
-                current_consumption_normalize, item.peak_consumption, peak_consumption_normalize,
-                item.child_count);
+                "td><td>$7</td></tr>\n",
+                item.type, item.label, item.parent_label, limit_str, item.cur_consumption,
+                current_consumption_normalize, item.peak_consumption, peak_consumption_normalize);
     }
     (*output) << "</tbody></table>\n";
 }
diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp
index 63da436d23..0e1d737b15 100644
--- a/be/src/olap/base_compaction.cpp
+++ b/be/src/olap/base_compaction.cpp
@@ -68,7 +68,7 @@ Status BaseCompaction::execute_compact_impl() {
         return Status::OLAPInternalError(OLAP_ERR_BE_CLONE_OCCURRED);
     }
 
-    SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::COMPACTION);
+    SCOPED_ATTACH_TASK(_mem_tracker);
 
     // 2. do base compaction, merge rowsets
     int64_t permits = get_compaction_permits();
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 801cbf2990..e74dca760f 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -34,13 +34,7 @@ Compaction::Compaction(TabletSharedPtr tablet, const std::string& label)
           _input_rowsets_size(0),
           _input_row_num(0),
           _state(CompactionState::INITED) {
-#ifndef BE_TEST
-    _mem_tracker = std::make_shared<MemTrackerLimiter>(
-            -1, label, StorageEngine::instance()->compaction_mem_tracker());
-    _mem_tracker->enable_reset_zero();
-#else
-    _mem_tracker = std::make_shared<MemTrackerLimiter>(-1, label);
-#endif
+    _mem_tracker = std::make_shared<MemTrackerLimiter>(MemTrackerLimiter::Type::COMPACTION, label);
 }
 
 Compaction::~Compaction() {}
diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp
index 4461a240b5..4736454c09 100644
--- a/be/src/olap/cumulative_compaction.cpp
+++ b/be/src/olap/cumulative_compaction.cpp
@@ -70,7 +70,7 @@ Status CumulativeCompaction::execute_compact_impl() {
         return Status::OLAPInternalError(OLAP_ERR_CUMULATIVE_CLONE_OCCURRED);
     }
 
-    SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::COMPACTION);
+    SCOPED_ATTACH_TASK(_mem_tracker);
 
     // 3. do cumulative compaction, merge rowsets
     int64_t permits = get_compaction_permits();
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index 5ec23ea593..c9a4890fbb 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -25,6 +25,7 @@
 #include "olap/schema.h"
 #include "olap/schema_change.h"
 #include "olap/storage_engine.h"
+#include "runtime/load_channel_mgr.h"
 #include "runtime/row_batch.h"
 #include "runtime/tuple_row.h"
 #include "service/backend_options.h"
@@ -283,12 +284,23 @@ void DeltaWriter::_reset_mem_table() {
     if (_tablet->enable_unique_key_merge_on_write() && _delete_bitmap == nullptr) {
         _delete_bitmap.reset(new DeleteBitmap(_tablet->tablet_id()));
     }
+#ifndef BE_TEST
+    auto mem_table_insert_tracker = std::make_shared<MemTracker>(
+            fmt::format("MemTableManualInsert:TabletId={}:MemTableNum={}#loadID={}",
+                        std::to_string(tablet_id()), _mem_table_num, _load_id.to_string()),
+            nullptr, ExecEnv::GetInstance()->load_channel_mgr()->mem_tracker_set());
+    auto mem_table_flush_tracker = std::make_shared<MemTracker>(
+            fmt::format("MemTableHookFlush:TabletId={}:MemTableNum={}#loadID={}",
+                        std::to_string(tablet_id()), _mem_table_num++, _load_id.to_string()),
+            nullptr, ExecEnv::GetInstance()->load_channel_mgr()->mem_tracker_set());
+#else
     auto mem_table_insert_tracker = std::make_shared<MemTracker>(
             fmt::format("MemTableManualInsert:TabletId={}:MemTableNum={}#loadID={}",
                         std::to_string(tablet_id()), _mem_table_num, _load_id.to_string()));
     auto mem_table_flush_tracker = std::make_shared<MemTracker>(
             fmt::format("MemTableHookFlush:TabletId={}:MemTableNum={}#loadID={}",
                         std::to_string(tablet_id()), _mem_table_num++, _load_id.to_string()));
+#endif
     {
         std::lock_guard<SpinLock> l(_mem_table_tracker_lock);
         _mem_table_tracker.push_back(mem_table_insert_tracker);
diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp
index a1b2a174ab..a6505d9bbe 100644
--- a/be/src/olap/lru_cache.cpp
+++ b/be/src/olap/lru_cache.cpp
@@ -436,7 +436,7 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity,
           _num_shards(num_shards),
           _shards(nullptr),
           _last_id(1) {
-    _mem_tracker = std::make_unique<MemTrackerLimiter>(-1, name);
+    _mem_tracker = std::make_unique<MemTrackerLimiter>(MemTrackerLimiter::Type::GLOBAL, name);
     CHECK(num_shards > 0) << "num_shards cannot be 0";
     CHECK_EQ((num_shards & (num_shards - 1)), 0)
             << "num_shards should be power of two, but got " << num_shards;
diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp
index f61e945bda..9faf757bf9 100644
--- a/be/src/olap/memtable.cpp
+++ b/be/src/olap/memtable.cpp
@@ -157,7 +157,7 @@ MemTable::~MemTable() {
     _flush_mem_tracker->set_consumption(0);
     DCHECK_EQ(_insert_mem_tracker->consumption(), 0)
             << std::endl
-            << MemTracker::log_usage(_insert_mem_tracker->make_snapshot(0));
+            << MemTracker::log_usage(_insert_mem_tracker->make_snapshot());
     DCHECK_EQ(_flush_mem_tracker->consumption(), 0);
 }
 
diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp
index 958e60137e..ead81f7367 100644
--- a/be/src/olap/olap_server.cpp
+++ b/be/src/olap/olap_server.cpp
@@ -125,7 +125,7 @@ Status StorageEngine::start_bg_threads() {
             RETURN_IF_ERROR(Thread::create(
                     "StorageEngine", "path_scan_thread",
                     [this, data_dir]() {
-                        SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE);
+                        SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
                         this->_path_scan_thread_callback(data_dir);
                     },
                     &path_scan_thread));
@@ -135,7 +135,7 @@ Status StorageEngine::start_bg_threads() {
             RETURN_IF_ERROR(Thread::create(
                     "StorageEngine", "path_gc_thread",
                     [this, data_dir]() {
-                        SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE);
+                        SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
                         this->_path_gc_thread_callback(data_dir);
                     },
                     &path_gc_thread));
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp
index 795ca8e145..e89d945f53 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -272,8 +272,7 @@ Status BetaRowsetWriter::_check_correctness(std::unique_ptr<OlapReaderStatistics
 }
 
 Status BetaRowsetWriter::_do_compact_segments(SegCompactionCandidatesSharedPtr segments) {
-    SCOPED_ATTACH_TASK(StorageEngine::instance()->segcompaction_mem_tracker(),
-                       ThreadContext::TaskType::COMPACTION);
+    SCOPED_CONSUME_MEM_TRACKER(StorageEngine::instance()->segcompaction_mem_tracker());
     // throttle segcompaction task if memory depleted.
     if (MemTrackerLimiter::sys_mem_exceed_limit_check(GB_EXCHANGE_BYTE)) {
         LOG(WARNING) << "skip segcompaction due to memory shortage";
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 4ab9b80bdb..3081b37886 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -1596,10 +1596,10 @@ Status VSchemaChangeWithSorting::_inner_process(RowsetReaderSharedPtr rowset_rea
         }
 
         RETURN_IF_ERROR(_changer.change_block(ref_block.get(), new_block.get()));
-        if (!_mem_tracker->check_limit(_memory_limitation, new_block->allocated_bytes())) {
+        if (_mem_tracker->consumption() + new_block->allocated_bytes() > _memory_limitation) {
             RETURN_IF_ERROR(create_rowset());
 
-            if (!_mem_tracker->check_limit(_memory_limitation, new_block->allocated_bytes())) {
+            if (_mem_tracker->consumption() + new_block->allocated_bytes() > _memory_limitation) {
                 LOG(WARNING) << "Memory limitation is too small for Schema Change."
                              << " _memory_limitation=" << _memory_limitation
                              << ", new_block->allocated_bytes()=" << new_block->allocated_bytes()
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index 9da8c8e147..083814fef0 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -81,12 +81,6 @@ using strings::Substitute;
 namespace doris {
 
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(unused_rowsets_count, MetricUnit::ROWSETS);
-DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(segcompaction_mem_consumption, MetricUnit::BYTES, "",
-                                   mem_consumption, Labels({{"type", "segcompaction"}}));
-DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(compaction_mem_consumption, MetricUnit::BYTES, "",
-                                   mem_consumption, Labels({{"type", "compaction"}}));
-DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(schema_change_mem_consumption, MetricUnit::BYTES, "",
-                                   mem_consumption, Labels({{"type", "schema_change"}}));
 
 StorageEngine* StorageEngine::_s_instance = nullptr;
 
@@ -112,19 +106,9 @@ StorageEngine::StorageEngine(const EngineOptions& options)
           _available_storage_medium_type_count(0),
           _effective_cluster_id(-1),
           _is_all_cluster_id_exist(true),
-          _segcompaction_mem_tracker(
-                  std::make_shared<MemTrackerLimiter>(-1, "StorageEngine::SegCompaction")),
-          _compaction_mem_tracker(
-                  std::make_shared<MemTrackerLimiter>(-1, "StorageEngine::AutoCompaction")),
-          _segment_meta_mem_tracker(std::make_unique<MemTracker>("StorageEngine::SegmentMeta")),
-          _schema_change_mem_tracker(
-                  std::make_shared<MemTrackerLimiter>(-1, "StorageEngine::SchemaChange")),
-          _clone_mem_tracker(std::make_shared<MemTrackerLimiter>(-1, "StorageEngine::Clone")),
-          _batch_load_mem_tracker(
-                  std::make_shared<MemTrackerLimiter>(-1, "StorageEngine::BatchLoad")),
-          _consistency_mem_tracker(
-                  std::make_shared<MemTrackerLimiter>(-1, "StorageEngine::Consistency")),
-          _mem_tracker(std::make_shared<MemTrackerLimiter>(-1, "StorageEngine::Self")),
+          _mem_tracker(std::make_unique<MemTracker>("StorageEngine")),
+          _segcompaction_mem_tracker(std::make_unique<MemTracker>("SegCompaction")),
+          _segment_meta_mem_tracker(std::make_unique<MemTracker>("SegmentMeta")),
           _stop_background_threads_latch(1),
           _tablet_manager(new TabletManager(config::tablet_map_shard_size)),
           _txn_manager(new TxnManager(config::txn_map_shard_size, config::txn_shard_size)),
@@ -138,19 +122,10 @@ StorageEngine::StorageEngine(const EngineOptions& options)
         // std::lock_guard<std::mutex> lock(_gc_mutex);
         return _unused_rowsets.size();
     });
-    REGISTER_HOOK_METRIC(segcompaction_mem_consumption,
-                         [this]() { return _segcompaction_mem_tracker->consumption(); });
-    REGISTER_HOOK_METRIC(compaction_mem_consumption,
-                         [this]() { return _compaction_mem_tracker->consumption(); });
-    REGISTER_HOOK_METRIC(schema_change_mem_consumption,
-                         [this]() { return _schema_change_mem_tracker->consumption(); });
 }
 
 StorageEngine::~StorageEngine() {
     DEREGISTER_HOOK_METRIC(unused_rowsets_count);
-    DEREGISTER_HOOK_METRIC(segcompaction_mem_consumption);
-    DEREGISTER_HOOK_METRIC(compaction_mem_consumption);
-    DEREGISTER_HOOK_METRIC(schema_change_mem_consumption);
     _clear();
 
     if (_base_compaction_thread_pool) {
@@ -177,7 +152,7 @@ void StorageEngine::load_data_dirs(const std::vector<DataDir*>& data_dirs) {
     std::vector<std::thread> threads;
     for (auto data_dir : data_dirs) {
         threads.emplace_back([this, data_dir] {
-            SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE);
+            SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
             auto res = data_dir->load();
             if (!res.ok()) {
                 LOG(WARNING) << "io error when init load tables. res=" << res
@@ -223,7 +198,7 @@ Status StorageEngine::_init_store_map() {
                                      _tablet_manager.get(), _txn_manager.get());
         tmp_stores.emplace_back(store);
         threads.emplace_back([this, store, &error_msg_lock, &error_msg]() {
-            SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE);
+            SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
             auto st = store->init();
             if (!st.ok()) {
                 {
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index 93f1d0c438..ab7fa4ac7d 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -179,19 +179,8 @@ public:
 
     Status get_compaction_status_json(std::string* result);
 
-    std::shared_ptr<MemTrackerLimiter> segcompaction_mem_tracker() {
-        return _segcompaction_mem_tracker;
-    }
-    std::shared_ptr<MemTrackerLimiter> compaction_mem_tracker() { return _compaction_mem_tracker; }
     MemTracker* segment_meta_mem_tracker() { return _segment_meta_mem_tracker.get(); }
-    std::shared_ptr<MemTrackerLimiter> schema_change_mem_tracker() {
-        return _schema_change_mem_tracker;
-    }
-    std::shared_ptr<MemTrackerLimiter> clone_mem_tracker() { return _clone_mem_tracker; }
-    std::shared_ptr<MemTrackerLimiter> batch_load_mem_tracker() { return _batch_load_mem_tracker; }
-    std::shared_ptr<MemTrackerLimiter> consistency_mem_tracker() {
-        return _consistency_mem_tracker;
-    }
+    MemTracker* segcompaction_mem_tracker() { return _segcompaction_mem_tracker.get(); }
 
     // check cumulative compaction config
     void check_cumulative_compaction_config();
@@ -334,24 +323,13 @@ private:
     // map<rowset_id(str), RowsetSharedPtr>, if we use RowsetId as the key, we need custom hash func
     std::unordered_map<std::string, RowsetSharedPtr> _unused_rowsets;
 
+    // StorageEngine oneself
+    std::unique_ptr<MemTracker> _mem_tracker;
     // Count the memory consumption of segment compaction tasks.
-    std::shared_ptr<MemTrackerLimiter> _segcompaction_mem_tracker;
-    // Count the memory consumption of all Base and Cumulative tasks.
-    std::shared_ptr<MemTrackerLimiter> _compaction_mem_tracker;
+    std::unique_ptr<MemTracker> _segcompaction_mem_tracker;
     // This mem tracker is only for tracking memory use by segment meta data such as footer or index page.
     // The memory consumed by querying is tracked in segment iterator.
     std::unique_ptr<MemTracker> _segment_meta_mem_tracker;
-    // Count the memory consumption of all SchemaChange tasks.
-    std::shared_ptr<MemTrackerLimiter> _schema_change_mem_tracker;
-    // Count the memory consumption of all EngineCloneTask.
-    // Note: Memory that does not contain make/release snapshots.
-    std::shared_ptr<MemTrackerLimiter> _clone_mem_tracker;
-    // Count the memory consumption of all EngineBatchLoadTask.
-    std::shared_ptr<MemTrackerLimiter> _batch_load_mem_tracker;
-    // Count the memory consumption of all EngineChecksumTask.
-    std::shared_ptr<MemTrackerLimiter> _consistency_mem_tracker;
-    // StorageEngine oneself
-    std::shared_ptr<MemTrackerLimiter> _mem_tracker;
 
     CountDownLatch _stop_background_threads_latch;
     scoped_refptr<Thread> _unused_rowset_monitor_thread;
diff --git a/be/src/olap/task/engine_alter_tablet_task.cpp b/be/src/olap/task/engine_alter_tablet_task.cpp
index 8164049296..7ec1a4d4d7 100644
--- a/be/src/olap/task/engine_alter_tablet_task.cpp
+++ b/be/src/olap/task/engine_alter_tablet_task.cpp
@@ -26,15 +26,15 @@ namespace doris {
 EngineAlterTabletTask::EngineAlterTabletTask(const TAlterTabletReqV2& request)
         : _alter_tablet_req(request) {
     _mem_tracker = std::make_shared<MemTrackerLimiter>(
-            config::memory_limitation_per_thread_for_schema_change_bytes,
+            MemTrackerLimiter::Type::SCHEMA_CHANGE,
             fmt::format("EngineAlterTabletTask#baseTabletId={}:newTabletId={}",
                         std::to_string(_alter_tablet_req.base_tablet_id),
                         std::to_string(_alter_tablet_req.new_tablet_id)),
-            StorageEngine::instance()->schema_change_mem_tracker());
+            config::memory_limitation_per_thread_for_schema_change_bytes);
 }
 
 Status EngineAlterTabletTask::execute() {
-    SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE);
+    SCOPED_ATTACH_TASK(_mem_tracker);
     DorisMetrics::instance()->create_rollup_requests_total->increment(1);
 
     Status res = SchemaChangeHandler::process_alter_tablet_v2(_alter_tablet_req);
diff --git a/be/src/olap/task/engine_batch_load_task.cpp b/be/src/olap/task/engine_batch_load_task.cpp
index 9b56055339..59a9cf16bc 100644
--- a/be/src/olap/task/engine_batch_load_task.cpp
+++ b/be/src/olap/task/engine_batch_load_task.cpp
@@ -49,16 +49,15 @@ namespace doris {
 EngineBatchLoadTask::EngineBatchLoadTask(TPushReq& push_req, std::vector<TTabletInfo>* tablet_infos)
         : _push_req(push_req), _tablet_infos(tablet_infos) {
     _mem_tracker = std::make_shared<MemTrackerLimiter>(
-            -1,
+            MemTrackerLimiter::Type::BATCHLOAD,
             fmt::format("EngineBatchLoadTask#pushType={}:tabletId={}", _push_req.push_type,
-                        std::to_string(_push_req.tablet_id)),
-            StorageEngine::instance()->batch_load_mem_tracker());
+                        std::to_string(_push_req.tablet_id)));
 }
 
 EngineBatchLoadTask::~EngineBatchLoadTask() {}
 
 Status EngineBatchLoadTask::execute() {
-    SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE);
+    SCOPED_ATTACH_TASK(_mem_tracker);
     Status status;
     if (_push_req.push_type == TPushType::LOAD || _push_req.push_type == TPushType::LOAD_V2) {
         RETURN_IF_ERROR(_init());
diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp
index b6f25bb2a4..e040ffcece 100644
--- a/be/src/olap/task/engine_checksum_task.cpp
+++ b/be/src/olap/task/engine_checksum_task.cpp
@@ -27,12 +27,12 @@ EngineChecksumTask::EngineChecksumTask(TTabletId tablet_id, TSchemaHash schema_h
                                        TVersion version, uint32_t* checksum)
         : _tablet_id(tablet_id), _schema_hash(schema_hash), _version(version), _checksum(checksum) {
     _mem_tracker = std::make_shared<MemTrackerLimiter>(
-            -1, "EngineChecksumTask#tabletId=" + std::to_string(tablet_id),
-            StorageEngine::instance()->consistency_mem_tracker());
+            MemTrackerLimiter::Type::CONSISTENCY,
+            "EngineChecksumTask#tabletId=" + std::to_string(tablet_id));
 }
 
 Status EngineChecksumTask::execute() {
-    SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE);
+    SCOPED_ATTACH_TASK(_mem_tracker);
     return _compute_checksum();
 } // execute
 
diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp
index 9a42d8e0e9..b015adfe87 100644
--- a/be/src/olap/task/engine_clone_task.cpp
+++ b/be/src/olap/task/engine_clone_task.cpp
@@ -55,13 +55,13 @@ EngineCloneTask::EngineCloneTask(const TCloneReq& clone_req, const TMasterInfo&
           _signature(signature),
           _master_info(master_info) {
     _mem_tracker = std::make_shared<MemTrackerLimiter>(
-            -1, "EngineCloneTask#tabletId=" + std::to_string(_clone_req.tablet_id),
-            StorageEngine::instance()->clone_mem_tracker());
+            MemTrackerLimiter::Type::CLONE,
+            "EngineCloneTask#tabletId=" + std::to_string(_clone_req.tablet_id));
 }
 
 Status EngineCloneTask::execute() {
     // register the tablet to avoid it is deleted by gc thread during clone process
-    SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::STORAGE);
+    SCOPED_ATTACH_TASK(_mem_tracker);
     StorageEngine::instance()->tablet_manager()->register_clone_tablet(_clone_req.tablet_id);
     Status st = _do_clone();
     StorageEngine::instance()->tablet_manager()->unregister_clone_tablet(_clone_req.tablet_id);
diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt
index bd515e1c06..69af332894 100644
--- a/be/src/runtime/CMakeLists.txt
+++ b/be/src/runtime/CMakeLists.txt
@@ -99,7 +99,6 @@ set(RUNTIME_FILES
     memory/chunk_allocator.cpp
     memory/mem_tracker_limiter.cpp
     memory/mem_tracker.cpp
-    memory/mem_tracker_task_pool.cpp
     memory/thread_mem_tracker_mgr.cpp
     fold_constant_executor.cpp
     cache/result_node.cpp
diff --git a/be/src/runtime/buffer_control_block.cpp b/be/src/runtime/buffer_control_block.cpp
index 79496b449b..03bd6d3466 100644
--- a/be/src/runtime/buffer_control_block.cpp
+++ b/be/src/runtime/buffer_control_block.cpp
@@ -30,7 +30,7 @@ void GetResultBatchCtx::on_failure(const Status& status) {
     status.to_protobuf(result->mutable_status());
     {
         // call by result sink
-        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
+        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
         done->Run();
     }
     delete this;
@@ -45,7 +45,7 @@ void GetResultBatchCtx::on_close(int64_t packet_seq, QueryStatistics* statistics
     result->set_packet_seq(packet_seq);
     result->set_eos(true);
     {
-        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
+        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
         done->Run();
     }
     delete this;
@@ -73,7 +73,7 @@ void GetResultBatchCtx::on_data(const std::unique_ptr<TFetchDataResult>& t_resul
     }
     st.to_protobuf(result->mutable_status());
     {
-        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
+        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
         done->Run();
     }
     delete this;
diff --git a/be/src/runtime/buffered_block_mgr2.cc b/be/src/runtime/buffered_block_mgr2.cc
index fa2d1b70b0..e610cf3803 100644
--- a/be/src/runtime/buffered_block_mgr2.cc
+++ b/be/src/runtime/buffered_block_mgr2.cc
@@ -251,7 +251,7 @@ int64_t BufferedBlockMgr2::remaining_unreserved_buffers() const {
     int64_t num_buffers =
             _free_io_buffers.size() + _unpinned_blocks.size() + _non_local_outstanding_writes;
     num_buffers +=
-            thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->spare_capacity() /
+            thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->spare_capacity() /
             max_block_size();
     num_buffers -= _unfullfilled_reserved_buffers;
     return num_buffers;
@@ -358,9 +358,9 @@ Status BufferedBlockMgr2::get_new_block(Client* client, Block* unpin_block, Bloc
 
         if (len > 0 && len < _max_block_size) {
             DCHECK(unpin_block == nullptr);
-            Status st = thread_context()
-                                ->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()
-                                ->check_limit(len);
+            Status st =
+                    thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit(
+                            len);
             WARN_IF_ERROR(st, "get_new_block failed");
             if (st) {
                 client->_tracker->consume(len);
@@ -986,7 +986,7 @@ Status BufferedBlockMgr2::find_buffer(unique_lock<mutex>& lock, BufferDescriptor
 
     // First, try to allocate a new buffer.
     if (_free_io_buffers.size() < _block_write_threshold &&
-        thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->check_limit(
+        thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit(
                 _max_block_size)) {
         _mem_tracker->consume(_max_block_size);
         uint8_t* new_buffer = new uint8_t[_max_block_size];
@@ -1155,9 +1155,9 @@ string BufferedBlockMgr2::debug_internal() const {
        << "  Unfullfilled reserved buffers: " << _unfullfilled_reserved_buffers << endl
        << "  BUffer Block Mgr Used memory: " << _mem_tracker->consumption()
        << "  Instance remaining memory: "
-       << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->spare_capacity()
+       << thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->spare_capacity()
        << " (#blocks="
-       << (thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->spare_capacity() /
+       << (thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->spare_capacity() /
            _max_block_size)
        << ")" << endl
        << "  Block write threshold: " << _block_write_threshold;
diff --git a/be/src/runtime/data_stream_recvr.cc b/be/src/runtime/data_stream_recvr.cc
index c9533959f8..59d46102b3 100644
--- a/be/src/runtime/data_stream_recvr.cc
+++ b/be/src/runtime/data_stream_recvr.cc
@@ -186,10 +186,7 @@ Status DataStreamRecvr::SenderQueue::get_batch(RowBatch** next_batch) {
 
     if (!_pending_closures.empty()) {
         auto closure_pair = _pending_closures.front();
-        {
-            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
-            closure_pair.first->Run();
-        }
+        closure_pair.first->Run();
         _pending_closures.pop_front();
 
         closure_pair.second.stop();
@@ -339,11 +336,8 @@ void DataStreamRecvr::SenderQueue::cancel() {
 
     {
         std::lock_guard<std::mutex> l(_lock);
-        {
-            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
-            for (auto closure_pair : _pending_closures) {
-                closure_pair.first->Run();
-            }
+        for (auto closure_pair : _pending_closures) {
+            closure_pair.first->Run();
         }
         _pending_closures.clear();
     }
@@ -357,11 +351,8 @@ void DataStreamRecvr::SenderQueue::close() {
         std::lock_guard<std::mutex> l(_lock);
         _is_cancelled = true;
 
-        {
-            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
-            for (auto closure_pair : _pending_closures) {
-                closure_pair.first->Run();
-            }
+        for (auto closure_pair : _pending_closures) {
+            closure_pair.first->Run();
         }
         _pending_closures.clear();
     }
diff --git a/be/src/runtime/data_stream_sender.cpp b/be/src/runtime/data_stream_sender.cpp
index cb02491701..df1495b750 100644
--- a/be/src/runtime/data_stream_sender.cpp
+++ b/be/src/runtime/data_stream_sender.cpp
@@ -138,7 +138,7 @@ Status DataStreamSender::Channel::send_batch(PRowBatch* batch, bool eos) {
         _closure->ref();
     } else {
         RETURN_IF_ERROR(_wait_last_brpc());
-        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
+        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
         _closure->cntl.Reset();
     }
     VLOG_ROW << "Channel::send_batch() instance_id=" << _fragment_instance_id
@@ -160,7 +160,6 @@ Status DataStreamSender::Channel::send_batch(PRowBatch* batch, bool eos) {
     if (_parent->_transfer_large_data_by_brpc && _brpc_request.has_row_batch() &&
         _brpc_request.row_batch().has_tuple_data() &&
         _brpc_request.ByteSizeLong() > MIN_HTTP_BRPC_SIZE) {
-        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
         Status st = request_embed_attachment_contain_tuple<PTransmitDataParams,
                                                            RefCountClosure<PTransmitDataResult>>(
                 &_brpc_request, _closure);
@@ -174,11 +173,17 @@ Status DataStreamSender::Channel::send_batch(PRowBatch* batch, bool eos) {
                 brpc_url + "/PInternalServiceImpl/transmit_data_by_http";
         _closure->cntl.http_request().set_method(brpc::HTTP_METHOD_POST);
         _closure->cntl.http_request().set_content_type("application/json");
-        _brpc_http_stub->transmit_data_by_http(&_closure->cntl, NULL, &_closure->result, _closure);
+        {
+            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
+            _brpc_http_stub->transmit_data_by_http(&_closure->cntl, NULL, &_closure->result,
+                                                   _closure);
+        }
     } else {
-        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
         _closure->cntl.http_request().Clear();
-        _brpc_stub->transmit_data(&_closure->cntl, &_brpc_request, &_closure->result, _closure);
+        {
+            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
+            _brpc_stub->transmit_data(&_closure->cntl, &_brpc_request, &_closure->result, _closure);
+        }
     }
 
     if (batch != nullptr) {
diff --git a/be/src/runtime/disk_io_mgr.cc b/be/src/runtime/disk_io_mgr.cc
index 1f506d82b1..2f330f250c 100644
--- a/be/src/runtime/disk_io_mgr.cc
+++ b/be/src/runtime/disk_io_mgr.cc
@@ -347,7 +347,8 @@ DiskIoMgr::~DiskIoMgr() {
 }
 
 Status DiskIoMgr::init(const int64_t mem_limit) {
-    _mem_tracker = std::make_unique<MemTrackerLimiter>(mem_limit, "DiskIO");
+    _mem_tracker = std::make_unique<MemTrackerLimiter>(MemTrackerLimiter::Type::GLOBAL, "DiskIO",
+                                                       mem_limit);
 
     for (int i = 0; i < _disk_queues.size(); ++i) {
         _disk_queues[i] = new DiskQueue(i);
diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h
index 81af4a18f3..4bd9fd73af 100644
--- a/be/src/runtime/exec_env.h
+++ b/be/src/runtime/exec_env.h
@@ -49,7 +49,6 @@ class LoadStreamMgr;
 class MemTrackerLimiter;
 class MemTracker;
 class StorageEngine;
-class MemTrackerTaskPool;
 class PriorityThreadPool;
 class PriorityWorkStealingThreadPool;
 class ResultBufferMgr;
@@ -117,27 +116,12 @@ public:
         return nullptr;
     }
 
-    std::shared_ptr<MemTrackerLimiter> process_mem_tracker() { return _process_mem_tracker; }
-    void set_global_mem_tracker(const std::shared_ptr<MemTrackerLimiter>& process_tracker,
-                                const std::shared_ptr<MemTrackerLimiter>& orphan_tracker,
-                                const std::shared_ptr<MemTrackerLimiter>& nursery_mem_tracker,
-                                const std::shared_ptr<MemTrackerLimiter>& bthread_mem_tracker) {
-        _process_mem_tracker = process_tracker;
+    void set_orphan_mem_tracker(const std::shared_ptr<MemTrackerLimiter>& orphan_tracker) {
         _orphan_mem_tracker = orphan_tracker;
         _orphan_mem_tracker_raw = orphan_tracker.get();
-        _nursery_mem_tracker = nursery_mem_tracker;
-        _bthread_mem_tracker = bthread_mem_tracker;
-    }
-    std::shared_ptr<MemTracker> allocator_cache_mem_tracker() {
-        return _allocator_cache_mem_tracker;
     }
     std::shared_ptr<MemTrackerLimiter> orphan_mem_tracker() { return _orphan_mem_tracker; }
     MemTrackerLimiter* orphan_mem_tracker_raw() { return _orphan_mem_tracker_raw; }
-    std::shared_ptr<MemTrackerLimiter> nursery_mem_tracker() { return _nursery_mem_tracker; }
-    std::shared_ptr<MemTrackerLimiter> bthread_mem_tracker() { return _bthread_mem_tracker; }
-    std::shared_ptr<MemTrackerLimiter> query_pool_mem_tracker() { return _query_pool_mem_tracker; }
-    std::shared_ptr<MemTrackerLimiter> load_pool_mem_tracker() { return _load_pool_mem_tracker; }
-    MemTrackerTaskPool* task_pool_mem_tracker_registry() { return _task_pool_mem_tracker_registry; }
     ThreadResourceMgr* thread_mgr() { return _thread_mgr; }
     PriorityThreadPool* scan_thread_pool() { return _scan_thread_pool; }
     PriorityThreadPool* remote_scan_thread_pool() { return _remote_scan_thread_pool; }
@@ -194,7 +178,7 @@ private:
     Status _init(const std::vector<StorePath>& store_paths);
     void _destroy();
 
-    Status _init_mem_tracker();
+    Status _init_mem_env();
     /// Initialise 'buffer_pool_' with given capacity.
     void _init_buffer_pool(int64_t min_page_len, int64_t capacity, int64_t clean_pages_limit);
 
@@ -217,11 +201,6 @@ private:
     ClientCache<TPaloBrokerServiceClient>* _broker_client_cache = nullptr;
     ThreadResourceMgr* _thread_mgr = nullptr;
 
-    // The ancestor for all trackers. Every tracker is visible from the process down.
-    // Not limit total memory by process tracker, and it's just used to track virtual memory of process.
-    std::shared_ptr<MemTrackerLimiter> _process_mem_tracker;
-    // tcmalloc/jemalloc allocator cache tracker, Including thread cache, free heap, etc.
-    std::shared_ptr<MemTracker> _allocator_cache_mem_tracker;
     // The default tracker consumed by mem hook. If the thread does not attach other trackers,
     // by default all consumption will be passed to the process tracker through the orphan tracker.
     // In real time, `consumption of all limiter trackers` + `orphan tracker consumption` = `process tracker consumption`.
@@ -229,15 +208,6 @@ private:
     // and the consumption of the orphan mem tracker is close to 0, but greater than 0.
     std::shared_ptr<MemTrackerLimiter> _orphan_mem_tracker;
     MemTrackerLimiter* _orphan_mem_tracker_raw;
-    // Parent is orphan, Nursery of orphan memory after manually switching thread mem tracker
-    std::shared_ptr<MemTrackerLimiter> _nursery_mem_tracker;
-    // Parent is orphan, bthread default mem tracker
-    std::shared_ptr<MemTrackerLimiter> _bthread_mem_tracker;
-    // The ancestor for all querys tracker.
-    std::shared_ptr<MemTrackerLimiter> _query_pool_mem_tracker;
-    // The ancestor for all load tracker.
-    std::shared_ptr<MemTrackerLimiter> _load_pool_mem_tracker;
-    MemTrackerTaskPool* _task_pool_mem_tracker_registry;
 
     // The following two thread pools are used in different scenarios.
     // _scan_thread_pool is a priority thread pool.
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index d18e02b90b..bec666859c 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -39,7 +39,6 @@
 #include "runtime/load_channel_mgr.h"
 #include "runtime/load_path_mgr.h"
 #include "runtime/memory/mem_tracker.h"
-#include "runtime/memory/mem_tracker_task_pool.h"
 #include "runtime/result_buffer_mgr.h"
 #include "runtime/result_queue_mgr.h"
 #include "runtime/routine_load/routine_load_task_executor.h"
@@ -72,10 +71,6 @@ DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(send_batch_thread_pool_thread_num, MetricUnit
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(send_batch_thread_pool_queue_size, MetricUnit::NOUNIT);
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(download_cache_thread_pool_thread_num, MetricUnit::NOUNIT);
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(download_cache_thread_pool_queue_size, MetricUnit::NOUNIT);
-DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(query_mem_consumption, MetricUnit::BYTES, "", mem_consumption,
-                                   Labels({{"type", "query"}}));
-DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(load_mem_consumption, MetricUnit::BYTES, "", mem_consumption,
-                                   Labels({{"type", "load"}}));
 
 Status ExecEnv::init(ExecEnv* env, const std::vector<StorePath>& store_paths) {
     return env->_init(store_paths);
@@ -100,7 +95,6 @@ Status ExecEnv::_init(const std::vector<StorePath>& store_paths) {
     _backend_client_cache = new BackendServiceClientCache(config::max_client_cache_size_per_host);
     _frontend_client_cache = new FrontendServiceClientCache(config::max_client_cache_size_per_host);
     _broker_client_cache = new BrokerServiceClientCache(config::max_client_cache_size_per_host);
-    _task_pool_mem_tracker_registry = new MemTrackerTaskPool();
     _thread_mgr = new ThreadResourceMgr();
     if (config::doris_enable_scanner_thread_pool_per_disk &&
         config::doris_scanner_thread_pool_thread_num >= store_paths.size() &&
@@ -169,42 +163,22 @@ Status ExecEnv::_init(const std::vector<StorePath>& store_paths) {
     _small_file_mgr->init();
     _scanner_scheduler->init(this);
 
-    _init_mem_tracker();
+    _init_mem_env();
 
-    RETURN_IF_ERROR(
-            _load_channel_mgr->init(ExecEnv::GetInstance()->process_mem_tracker()->limit()));
+    RETURN_IF_ERROR(_load_channel_mgr->init(MemInfo::mem_limit()));
     _heartbeat_flags = new HeartbeatFlags();
     _register_metrics();
     _is_init = true;
     return Status::OK();
 }
 
-Status ExecEnv::_init_mem_tracker() {
-    // 1. init global memory limit.
-    int64_t global_memory_limit_bytes = 0;
+Status ExecEnv::_init_mem_env() {
     bool is_percent = false;
     std::stringstream ss;
-    global_memory_limit_bytes =
-            ParseUtil::parse_mem_spec(config::mem_limit, -1, MemInfo::physical_mem(), &is_percent);
-    if (global_memory_limit_bytes <= 0) {
-        ss << "Failed to parse mem limit from '" + config::mem_limit + "'.";
-        return Status::InternalError(ss.str());
-    }
-
-    if (global_memory_limit_bytes > MemInfo::physical_mem()) {
-        LOG(WARNING) << "Memory limit "
-                     << PrettyPrinter::print(global_memory_limit_bytes, TUnit::BYTES)
-                     << " exceeds physical memory of "
-                     << PrettyPrinter::print(MemInfo::physical_mem(), TUnit::BYTES)
-                     << ". Using physical memory instead";
-        global_memory_limit_bytes = MemInfo::physical_mem();
-    }
-    _process_mem_tracker =
-            std::make_shared<MemTrackerLimiter>(global_memory_limit_bytes, "Process");
-    _orphan_mem_tracker = std::make_shared<MemTrackerLimiter>(-1, "Orphan", _process_mem_tracker);
+    // 1. init mem tracker
+    _orphan_mem_tracker =
+            std::make_shared<MemTrackerLimiter>(MemTrackerLimiter::Type::GLOBAL, "Orphan");
     _orphan_mem_tracker_raw = _orphan_mem_tracker.get();
-    _nursery_mem_tracker = std::make_shared<MemTrackerLimiter>(-1, "Nursery", _orphan_mem_tracker);
-    _bthread_mem_tracker = std::make_shared<MemTrackerLimiter>(-1, "Bthread", _orphan_mem_tracker);
     thread_context()->_thread_mem_tracker_mgr->init();
     thread_context()->_thread_mem_tracker_mgr->set_check_attach(false);
 #if defined(USE_MEM_TRACKER) && !defined(__SANITIZE_ADDRESS__) && !defined(ADDRESS_SANITIZER) && \
@@ -213,18 +187,6 @@ Status ExecEnv::_init_mem_tracker() {
         init_hook();
     }
 #endif
-    _allocator_cache_mem_tracker = std::make_shared<MemTracker>("Tc/JemallocAllocatorCache");
-    _query_pool_mem_tracker =
-            std::make_shared<MemTrackerLimiter>(-1, "QueryPool", _process_mem_tracker);
-    REGISTER_HOOK_METRIC(query_mem_consumption,
-                         [this]() { return _query_pool_mem_tracker->consumption(); });
-    _load_pool_mem_tracker =
-            std::make_shared<MemTrackerLimiter>(-1, "LoadPool", _process_mem_tracker);
-    REGISTER_HOOK_METRIC(load_mem_consumption,
-                         [this]() { return _load_pool_mem_tracker->consumption(); });
-    LOG(INFO) << "Using global memory limit: "
-              << PrettyPrinter::print(global_memory_limit_bytes, TUnit::BYTES)
-              << ", origin config value: " << config::mem_limit;
 
     // 2. init buffer pool
     if (!BitUtil::IsPowerOf2(config::min_buffer_size)) {
@@ -232,9 +194,8 @@ Status ExecEnv::_init_mem_tracker() {
         return Status::InternalError(ss.str());
     }
 
-    int64_t buffer_pool_limit =
-            ParseUtil::parse_mem_spec(config::buffer_pool_limit, global_memory_limit_bytes,
-                                      MemInfo::physical_mem(), &is_percent);
+    int64_t buffer_pool_limit = ParseUtil::parse_mem_spec(
+            config::buffer_pool_limit, MemInfo::mem_limit(), MemInfo::physical_mem(), &is_percent);
     if (buffer_pool_limit <= 0) {
         ss << "Invalid config buffer_pool_limit value, must be a percentage or "
               "positive bytes value or percentage: "
@@ -242,7 +203,7 @@ Status ExecEnv::_init_mem_tracker() {
         return Status::InternalError(ss.str());
     }
     buffer_pool_limit = BitUtil::RoundDown(buffer_pool_limit, config::min_buffer_size);
-    while (!is_percent && buffer_pool_limit > global_memory_limit_bytes / 2) {
+    while (!is_percent && buffer_pool_limit > MemInfo::mem_limit() / 2) {
         // If buffer_pool_limit is not a percentage, and the value exceeds 50% of the total memory limit,
         // it is forced to be reduced to less than 50% of the total memory limit.
         // This is to ensure compatibility. In principle, buffer_pool_limit should be set as a percentage.
@@ -271,9 +232,9 @@ Status ExecEnv::_init_mem_tracker() {
 
     // 3. init storage page cache
     int64_t storage_cache_limit =
-            ParseUtil::parse_mem_spec(config::storage_page_cache_limit, global_memory_limit_bytes,
+            ParseUtil::parse_mem_spec(config::storage_page_cache_limit, MemInfo::mem_limit(),
                                       MemInfo::physical_mem(), &is_percent);
-    while (!is_percent && storage_cache_limit > global_memory_limit_bytes / 2) {
+    while (!is_percent && storage_cache_limit > MemInfo::mem_limit() / 2) {
         // Reason same as buffer_pool_limit
         storage_cache_limit = storage_cache_limit / 2;
     }
@@ -301,7 +262,7 @@ Status ExecEnv::_init_mem_tracker() {
     SegmentLoader::create_global_instance(segment_cache_capacity);
 
     // 4. init other managers
-    RETURN_IF_ERROR(_disk_io_mgr->init(global_memory_limit_bytes));
+    RETURN_IF_ERROR(_disk_io_mgr->init(MemInfo::mem_limit()));
     RETURN_IF_ERROR(_tmp_file_mgr->init());
 
     // 5. init chunk allocator
@@ -312,7 +273,7 @@ Status ExecEnv::_init_mem_tracker() {
     }
 
     int64_t chunk_reserved_bytes_limit =
-            ParseUtil::parse_mem_spec(config::chunk_reserved_bytes_limit, global_memory_limit_bytes,
+            ParseUtil::parse_mem_spec(config::chunk_reserved_bytes_limit, MemInfo::mem_limit(),
                                       MemInfo::physical_mem(), &is_percent);
     if (chunk_reserved_bytes_limit <= 0) {
         ss << "Invalid config chunk_reserved_bytes_limit value, must be a percentage or "
@@ -408,12 +369,8 @@ void ExecEnv::_destroy() {
     SAFE_DELETE(_routine_load_task_executor);
     SAFE_DELETE(_external_scan_context_mgr);
     SAFE_DELETE(_heartbeat_flags);
-    SAFE_DELETE(_task_pool_mem_tracker_registry);
     SAFE_DELETE(_scanner_scheduler);
 
-    DEREGISTER_HOOK_METRIC(query_mem_consumption);
-    DEREGISTER_HOOK_METRIC(load_mem_consumption);
-
     _is_init = false;
 }
 
diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp
index 8b5a1c5c44..8085ddf13b 100644
--- a/be/src/runtime/fragment_mgr.cpp
+++ b/be/src/runtime/fragment_mgr.cpp
@@ -640,6 +640,30 @@ Status FragmentMgr::exec_plan_fragment(const TExecPlanFragmentParams& params, Fi
         fragments_ctx->timeout_second = params.query_options.query_timeout;
         _set_scan_concurrency(params, fragments_ctx.get());
 
+        bool has_query_mem_tracker =
+                params.query_options.__isset.mem_limit && (params.query_options.mem_limit > 0);
+        int64_t bytes_limit = has_query_mem_tracker ? params.query_options.mem_limit : -1;
+        if (bytes_limit > MemInfo::mem_limit()) {
+            VLOG_NOTICE << "Query memory limit " << PrettyPrinter::print(bytes_limit, TUnit::BYTES)
+                        << " exceeds process memory limit of "
+                        << PrettyPrinter::print(MemInfo::mem_limit(), TUnit::BYTES)
+                        << ". Using process memory limit instead";
+            bytes_limit = MemInfo::mem_limit();
+        }
+        if (params.query_options.query_type == TQueryType::SELECT) {
+            fragments_ctx->query_mem_tracker = std::make_shared<MemTrackerLimiter>(
+                    MemTrackerLimiter::Type::QUERY,
+                    fmt::format("Query#Id={}", print_id(fragments_ctx->query_id)), bytes_limit);
+        } else if (params.query_options.query_type == TQueryType::LOAD) {
+            fragments_ctx->query_mem_tracker = std::make_shared<MemTrackerLimiter>(
+                    MemTrackerLimiter::Type::LOAD,
+                    fmt::format("Load#Id={}", print_id(fragments_ctx->query_id)), bytes_limit);
+        }
+        if (params.query_options.__isset.is_report_success &&
+            params.query_options.is_report_success) {
+            fragments_ctx->query_mem_tracker->enable_print_log_usage();
+        }
+
         {
             // Find _fragments_ctx_map again, in case some other request has already
             // create the query fragments context.
@@ -647,6 +671,9 @@ Status FragmentMgr::exec_plan_fragment(const TExecPlanFragmentParams& params, Fi
             auto search = _fragments_ctx_map.find(params.params.query_id);
             if (search == _fragments_ctx_map.end()) {
                 _fragments_ctx_map.insert(std::make_pair(fragments_ctx->query_id, fragments_ctx));
+                LOG(INFO) << "Register query/load memory tracker, query/load id: "
+                          << print_id(fragments_ctx->query_id)
+                          << " limit: " << PrettyPrinter::print(bytes_limit, TUnit::BYTES);
             } else {
                 // Already has a query fragmentscontext, use it
                 fragments_ctx = search->second;
diff --git a/be/src/runtime/load_channel_mgr.cpp b/be/src/runtime/load_channel_mgr.cpp
index 8292b25656..6eef349fe1 100644
--- a/be/src/runtime/load_channel_mgr.cpp
+++ b/be/src/runtime/load_channel_mgr.cpp
@@ -71,6 +71,8 @@ Status LoadChannelMgr::init(int64_t process_mem_limit) {
     _load_hard_mem_limit = calc_process_max_load_memory(process_mem_limit);
     _load_soft_mem_limit = _load_hard_mem_limit * config::load_process_soft_mem_limit_percent / 100;
     _mem_tracker = std::make_unique<MemTracker>("LoadChannelMgr");
+    _mem_tracker_set = std::make_unique<MemTrackerLimiter>(MemTrackerLimiter::Type::LOAD,
+                                                           "LoadChannelMgrTrackerSet");
     REGISTER_HOOK_METRIC(load_channel_mem_consumption,
                          [this]() { return _mem_tracker->consumption(); });
     _last_success_channel = new_lru_cache("LastestSuccessChannelCache", 1024);
@@ -94,8 +96,15 @@ Status LoadChannelMgr::open(const PTabletWriterOpenRequest& params) {
             bool is_high_priority = (params.has_is_high_priority() && params.is_high_priority());
 
             // Use the same mem limit as LoadChannelMgr for a single load channel
+#ifndef BE_TEST
+            auto channel_mem_tracker = std::make_unique<MemTracker>(
+                    fmt::format("LoadChannel#senderIp={}#loadID={}", params.sender_ip(),
+                                load_id.to_string()),
+                    nullptr, ExecEnv::GetInstance()->load_channel_mgr()->mem_tracker_set());
+#else
             auto channel_mem_tracker = std::make_unique<MemTracker>(fmt::format(
                     "LoadChannel#senderIp={}#loadID={}", params.sender_ip(), load_id.to_string()));
+#endif
             channel.reset(new LoadChannel(load_id, std::move(channel_mem_tracker),
                                           channel_timeout_s, is_high_priority, params.sender_ip(),
                                           params.is_vectorized()));
diff --git a/be/src/runtime/load_channel_mgr.h b/be/src/runtime/load_channel_mgr.h
index 6b30503f0c..3f27eafd0e 100644
--- a/be/src/runtime/load_channel_mgr.h
+++ b/be/src/runtime/load_channel_mgr.h
@@ -67,6 +67,7 @@ public:
         }
         _mem_tracker->set_consumption(mem_usage);
     }
+    MemTrackerLimiter* mem_tracker_set() { return _mem_tracker_set.get(); }
 
 private:
     template <typename Request>
@@ -91,6 +92,8 @@ protected:
 
     // check the total load channel mem consumption of this Backend
     std::unique_ptr<MemTracker> _mem_tracker;
+    // Associate load channel tracker and memtable tracker, avoid default association to Orphan tracker.
+    std::unique_ptr<MemTrackerLimiter> _mem_tracker_set;
     int64_t _load_hard_mem_limit = -1;
     int64_t _load_soft_mem_limit = -1;
 
diff --git a/be/src/runtime/mem_pool.cpp b/be/src/runtime/mem_pool.cpp
index d49389353d..6d2760b389 100644
--- a/be/src/runtime/mem_pool.cpp
+++ b/be/src/runtime/mem_pool.cpp
@@ -134,7 +134,7 @@ Status MemPool::find_chunk(size_t min_size, bool check_limits) {
 
     chunk_size = BitUtil::RoundUpToPowerOfTwo(chunk_size);
     if (check_limits &&
-        !thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->check_limit(
+        !thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->check_limit(
                 chunk_size)) {
         return Status::MemoryAllocFailed("MemPool find new chunk {} bytes faild, exceed limit",
                                          chunk_size);
diff --git a/be/src/runtime/memory/chunk_allocator.cpp b/be/src/runtime/memory/chunk_allocator.cpp
index 43acc79538..b812734b4c 100644
--- a/be/src/runtime/memory/chunk_allocator.cpp
+++ b/be/src/runtime/memory/chunk_allocator.cpp
@@ -135,7 +135,8 @@ ChunkAllocator::ChunkAllocator(size_t reserve_limit)
           _steal_arena_limit(reserve_limit * 0.1),
           _reserved_bytes(0),
           _arenas(CpuInfo::get_max_num_cores()) {
-    _mem_tracker = std::make_unique<MemTrackerLimiter>(-1, "ChunkAllocator");
+    _mem_tracker =
+            std::make_unique<MemTrackerLimiter>(MemTrackerLimiter::Type::GLOBAL, "ChunkAllocator");
     for (int i = 0; i < _arenas.size(); ++i) {
         _arenas[i].reset(new ChunkArena());
     }
diff --git a/be/src/runtime/memory/mem_tracker.cpp b/be/src/runtime/memory/mem_tracker.cpp
index 0604d538dc..bf7e308ff3 100644
--- a/be/src/runtime/memory/mem_tracker.cpp
+++ b/be/src/runtime/memory/mem_tracker.cpp
@@ -41,7 +41,8 @@ struct TrackerGroup {
 // Multiple groups are used to reduce the impact of locks.
 static std::vector<TrackerGroup> mem_tracker_pool(1000);
 
-MemTracker::MemTracker(const std::string& label, RuntimeProfile* profile) {
+MemTracker::MemTracker(const std::string& label, RuntimeProfile* profile, MemTrackerLimiter* parent)
+        : _label(label) {
     if (profile == nullptr) {
         _consumption = std::make_shared<RuntimeProfile::HighWaterMarkCounter>(TUnit::BYTES);
     } else {
@@ -57,75 +58,57 @@ MemTracker::MemTracker(const std::string& label, RuntimeProfile* profile) {
         _consumption = profile->AddSharedHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES);
     }
 
-    DCHECK(thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw() != nullptr);
-    MemTrackerLimiter* parent =
-            thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw();
-    _label = fmt::format("[Observer] {} | {}", label, parent->label());
-    _bind_group_num = parent->group_num();
+    if (parent) {
+        _parent_label = parent->label();
+        _parent_group_num = parent->group_num();
+    } else {
+        DCHECK(thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker() != nullptr);
+        _parent_label = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->label();
+        _parent_group_num =
+                thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->group_num();
+    }
     {
-        std::lock_guard<std::mutex> l(mem_tracker_pool[_bind_group_num].group_lock);
-        _tracker_group_it = mem_tracker_pool[_bind_group_num].trackers.insert(
-                mem_tracker_pool[_bind_group_num].trackers.end(), this);
+        std::lock_guard<std::mutex> l(mem_tracker_pool[_parent_group_num].group_lock);
+        _tracker_group_it = mem_tracker_pool[_parent_group_num].trackers.insert(
+                mem_tracker_pool[_parent_group_num].trackers.end(), this);
     }
 }
 
 MemTracker::~MemTracker() {
-    if (_bind_group_num != -1) {
-        std::lock_guard<std::mutex> l(mem_tracker_pool[_bind_group_num].group_lock);
-        if (_tracker_group_it != mem_tracker_pool[_bind_group_num].trackers.end()) {
-            mem_tracker_pool[_bind_group_num].trackers.erase(_tracker_group_it);
-            _tracker_group_it = mem_tracker_pool[_bind_group_num].trackers.end();
+    if (_parent_group_num != -1) {
+        std::lock_guard<std::mutex> l(mem_tracker_pool[_parent_group_num].group_lock);
+        if (_tracker_group_it != mem_tracker_pool[_parent_group_num].trackers.end()) {
+            mem_tracker_pool[_parent_group_num].trackers.erase(_tracker_group_it);
+            _tracker_group_it = mem_tracker_pool[_parent_group_num].trackers.end();
         }
     }
 }
 
-MemTracker::Snapshot MemTracker::make_snapshot(size_t level) const {
+MemTracker::Snapshot MemTracker::make_snapshot() const {
     Snapshot snapshot;
-    snapshot.label = split(_label, " | ")[0];
-    snapshot.parent = split(_label, " | ")[1];
-    snapshot.level = level;
+    snapshot.label = _label;
+    snapshot.parent_label = _parent_label;
     snapshot.limit = -1;
     snapshot.cur_consumption = _consumption->current_value();
     snapshot.peak_consumption = _consumption->value();
-    snapshot.child_count = 0;
     return snapshot;
 }
 
-void MemTracker::make_group_snapshot(std::vector<MemTracker::Snapshot>* snapshots, size_t level,
-                                     int64_t group_num, std::string related_label) {
+void MemTracker::make_group_snapshot(std::vector<MemTracker::Snapshot>* snapshots,
+                                     int64_t group_num, std::string parent_label) {
     std::lock_guard<std::mutex> l(mem_tracker_pool[group_num].group_lock);
     for (auto tracker : mem_tracker_pool[group_num].trackers) {
-        if (split(tracker->label(), " | ")[1] == related_label) {
-            snapshots->push_back(tracker->make_snapshot(level));
+        if (tracker->parent_label() == parent_label) {
+            snapshots->push_back(tracker->make_snapshot());
         }
     }
 }
 
 std::string MemTracker::log_usage(MemTracker::Snapshot snapshot) {
     return fmt::format("MemTracker Label={}, Parent Label={}, Used={}({} B), Peak={}({} B)",
-                       snapshot.label, snapshot.parent, print_bytes(snapshot.cur_consumption),
+                       snapshot.label, snapshot.type, print_bytes(snapshot.cur_consumption),
                        snapshot.cur_consumption, print_bytes(snapshot.peak_consumption),
                        snapshot.peak_consumption);
 }
 
-static std::unordered_map<std::string, std::shared_ptr<MemTracker>> global_mem_trackers;
-static std::mutex global_trackers_lock;
-
-std::shared_ptr<MemTracker> MemTracker::get_global_mem_tracker(const std::string& label) {
-    std::lock_guard<std::mutex> l(global_trackers_lock);
-    if (global_mem_trackers.find(label) != global_mem_trackers.end()) {
-        return global_mem_trackers[label];
-    } else {
-        global_mem_trackers.emplace(
-                label, std::make_shared<MemTracker>(fmt::format("[Global] {}", label)));
-        return global_mem_trackers[label];
-    }
-}
-
-void MemTracker::make_global_mem_tracker_snapshot(std::vector<MemTracker::Snapshot>* snapshots) {
-    std::lock_guard<std::mutex> l(global_trackers_lock);
-    for (auto& v : global_mem_trackers) {
-        snapshots->push_back(v.second->make_snapshot(1));
-    }
-}
 } // namespace doris
\ No newline at end of file
diff --git a/be/src/runtime/memory/mem_tracker.h b/be/src/runtime/memory/mem_tracker.h
index 01a0d58cdb..5ffcaa30a1 100644
--- a/be/src/runtime/memory/mem_tracker.h
+++ b/be/src/runtime/memory/mem_tracker.h
@@ -24,6 +24,8 @@
 
 namespace doris {
 
+class MemTrackerLimiter;
+
 // Used to track memory usage.
 //
 // MemTracker can be consumed manually by consume()/release(), or put into SCOPED_CONSUME_MEM_TRACKER,
@@ -33,30 +35,22 @@ namespace doris {
 class MemTracker {
 public:
     struct Snapshot {
+        std::string type = "";
         std::string label;
-        // For MemTracker, it is only weakly related to parent through label, ensuring MemTracker Independence.
-        // For MemTrackerLimiter, it is strongly related to parent and saves pointer objects to each other.
-        std::string parent = "";
-        size_t level = 0;
+        std::string parent_label = "";
         int64_t limit = 0;
         int64_t cur_consumption = 0;
         int64_t peak_consumption = 0;
-        size_t child_count = 0;
     };
 
     // Creates and adds the tracker to the mem_tracker_pool.
-    MemTracker(const std::string& label, RuntimeProfile* profile = nullptr);
+    MemTracker(const std::string& label, RuntimeProfile* profile = nullptr,
+               MemTrackerLimiter* parent = nullptr);
     // For MemTrackerLimiter
-    MemTracker() { _bind_group_num = -1; }
+    MemTracker() { _parent_group_num = -1; }
 
     ~MemTracker();
 
-    // Get a global tracker with a specified label, and the tracker will be created when the label is first get.
-    // use SCOPED_CONSUME_MEM_TRACKER count the memory in the scope to a global tracker with the specified label name.
-    // which is usually used for debugging, to finding memory hotspots.
-    static std::shared_ptr<MemTracker> get_global_mem_tracker(const std::string& label);
-    static void make_global_mem_tracker_snapshot(std::vector<MemTracker::Snapshot>* snapshots);
-
     static std::string print_bytes(int64_t bytes) {
         return bytes >= 0 ? PrettyPrinter::print(bytes, TUnit::BYTES)
                           : "-" + PrettyPrinter::print(std::abs(bytes), TUnit::BYTES);
@@ -64,28 +58,23 @@ public:
 
 public:
     const std::string& label() const { return _label; }
+    const std::string& parent_label() const { return _parent_label; }
     // Returns the memory consumed in bytes.
     int64_t consumption() const { return _consumption->current_value(); }
     int64_t peak_consumption() const { return _consumption->value(); }
 
-    void consume(int64_t bytes);
+    void consume(int64_t bytes) {
+        if (bytes == 0) return;
+        _consumption->add(bytes);
+    }
     void release(int64_t bytes) { consume(-bytes); }
-    // Transfer 'bytes' of consumption from this tracker to 'dst'.
-    void transfer_to(MemTracker* dst, int64_t bytes);
     void set_consumption(int64_t bytes) { _consumption->set(bytes); }
 
 public:
-    bool limit_exceeded(int64_t limit) const { return limit >= 0 && limit < consumption(); }
-    // Return true, no exceeded limit
-    bool check_limit(int64_t limit, int64_t bytes) const {
-        return limit >= 0 && limit > consumption() + bytes;
-    }
-
-    Snapshot make_snapshot(size_t level) const;
-    // Specify group_num from mem_tracker_pool to generate snapshot, requiring tracker.label to be related
-    // with parameter related_label
-    static void make_group_snapshot(std::vector<Snapshot>* snapshots, size_t level,
-                                    int64_t group_num, std::string related_label);
+    Snapshot make_snapshot() const;
+    // Specify group_num from mem_tracker_pool to generate snapshot.
+    static void make_group_snapshot(std::vector<Snapshot>* snapshots, int64_t group_num,
+                                    std::string parent_label);
     static std::string log_usage(MemTracker::Snapshot snapshot);
 
     std::string debug_string() {
@@ -105,23 +94,11 @@ protected:
     std::shared_ptr<RuntimeProfile::HighWaterMarkCounter> _consumption; // in bytes
 
     // Tracker is located in group num in mem_tracker_pool
-    int64_t _bind_group_num;
+    int64_t _parent_group_num;
+    std::string _parent_label;
 
     // Iterator into mem_tracker_pool for this object. Stored to have O(1) remove.
     std::list<MemTracker*>::iterator _tracker_group_it;
 };
 
-inline void MemTracker::consume(int64_t bytes) {
-    if (bytes == 0) {
-        return;
-    } else {
-        _consumption->add(bytes);
-    }
-}
-
-inline void MemTracker::transfer_to(MemTracker* dst, int64_t bytes) {
-    release(bytes);
-    dst->consume(bytes);
-}
-
 } // namespace doris
\ No newline at end of file
diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp
index 20639326f8..1fca6009ee 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.cpp
+++ b/be/src/runtime/memory/mem_tracker_limiter.cpp
@@ -22,7 +22,6 @@
 #include <boost/stacktrace.hpp>
 
 #include "gutil/once.h"
-#include "gutil/walltime.h"
 #include "runtime/runtime_state.h"
 #include "runtime/thread_context.h"
 #include "util/pretty_printer.h"
@@ -30,8 +29,19 @@
 
 namespace doris {
 
-MemTrackerLimiter::MemTrackerLimiter(int64_t byte_limit, const std::string& label,
-                                     const std::shared_ptr<MemTrackerLimiter>& parent,
+struct TrackerLimiterGroup {
+    std::list<MemTrackerLimiter*> trackers;
+    std::mutex group_lock;
+};
+
+// Save all MemTrackerLimiters in use.
+// Each group corresponds to several MemTrackerLimiters and has a lock.
+// Multiple groups are used to reduce the impact of locks.
+static std::vector<TrackerLimiterGroup> mem_tracker_limiter_pool(1000);
+
+std::atomic<bool> MemTrackerLimiter::_enable_print_log_process_usage {true};
+
+MemTrackerLimiter::MemTrackerLimiter(Type type, const std::string& label, int64_t byte_limit,
                                      RuntimeProfile* profile) {
     DCHECK_GE(byte_limit, -1);
     if (profile == nullptr) {
@@ -39,268 +49,215 @@ MemTrackerLimiter::MemTrackerLimiter(int64_t byte_limit, const std::string& labe
     } else {
         _consumption = profile->AddSharedHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES);
     }
+    _type = type;
     _label = label;
     _limit = byte_limit;
-    _group_num = GetCurrentTimeMicros() % 1000;
-    if (parent || label == "Process") {
-        _parent = parent;
-    } else if (thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->label() ==
-               "Orphan") {
-        _parent = ExecEnv::GetInstance()->process_mem_tracker();
+    if (_type == Type::GLOBAL) {
+        _group_num = 0;
     } else {
-        _parent = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker();
+        _group_num = random() % 999 + 1;
     }
-    DCHECK(_parent || label == "Process");
-
-    // Walks the MemTrackerLimiter hierarchy and populates _all_ancestors and _limited_ancestors
-    MemTrackerLimiter* tracker = this;
-    while (tracker != nullptr) {
-        _all_ancestors.push_back(tracker);
-        // Process tracker does not participate in the process memory limit, process tracker consumption is virtual memory,
-        // and there is a diff between the real physical memory value of the process. It is replaced by check_sys_mem_info.
-        if (tracker->has_limit() && tracker->label() != "Process")
-            _limited_ancestors.push_back(tracker);
-        tracker = tracker->_parent.get();
-    }
-    DCHECK_GT(_all_ancestors.size(), 0);
-    DCHECK_EQ(_all_ancestors[0], this);
-    if (_parent) {
-        std::lock_guard<std::mutex> l(_parent->_child_tracker_limiter_lock);
-        _child_tracker_it = _parent->_child_tracker_limiters.insert(
-                _parent->_child_tracker_limiters.end(), this);
-        _had_child_count++;
+    {
+        std::lock_guard<std::mutex> l(mem_tracker_limiter_pool[_group_num].group_lock);
+        _tracker_limiter_group_it = mem_tracker_limiter_pool[_group_num].trackers.insert(
+                mem_tracker_limiter_pool[_group_num].trackers.end(), this);
     }
 }
 
 MemTrackerLimiter::~MemTrackerLimiter() {
-    // TCMalloc hook will be triggered during destructor memtracker, may cause crash.
-    if (_label == "Process") doris::thread_context_ptr.init = false;
-    DCHECK(remain_child_count() == 0 || _label == "Process");
+    // mem hook record tracker cannot guarantee that the final consumption is 0,
+    // nor can it guarantee that the memory alloc and free are recorded in a one-to-one correspondence.
     // In order to ensure `consumption of all limiter trackers` + `orphan tracker consumption` = `process tracker consumption`
     // in real time. Merge its consumption into orphan when parent is process, to avoid repetition.
-    if (_parent && _parent->label() == "Process") {
-        ExecEnv::GetInstance()->orphan_mem_tracker_raw()->cache_consume_local(
-                _consumption->current_value());
-    }
-    if (_reset_zero) {
-        reset_zero();
-        _all_ancestors.clear();
-        _all_ancestors.push_back(ExecEnv::GetInstance()->orphan_mem_tracker_raw());
-    }
-    consume_local(_untracked_mem);
-    if (_parent) {
-        std::lock_guard<std::mutex> l(_parent->_child_tracker_limiter_lock);
-        if (_child_tracker_it != _parent->_child_tracker_limiters.end()) {
-            _parent->_child_tracker_limiters.erase(_child_tracker_it);
-            _child_tracker_it = _parent->_child_tracker_limiters.end();
+    ExecEnv::GetInstance()->orphan_mem_tracker()->consume(_consumption->current_value());
+    _consumption->set(0);
+    {
+        std::lock_guard<std::mutex> l(mem_tracker_limiter_pool[_group_num].group_lock);
+        if (_tracker_limiter_group_it != mem_tracker_limiter_pool[_group_num].trackers.end()) {
+            mem_tracker_limiter_pool[_group_num].trackers.erase(_tracker_limiter_group_it);
+            _tracker_limiter_group_it = mem_tracker_limiter_pool[_group_num].trackers.end();
         }
     }
 }
 
-MemTracker::Snapshot MemTrackerLimiter::make_snapshot(size_t level) const {
+MemTracker::Snapshot MemTrackerLimiter::make_snapshot() const {
     Snapshot snapshot;
+    snapshot.type = TypeString[_type];
     snapshot.label = _label;
-    snapshot.parent = _parent != nullptr ? _parent->label() : "Root";
-    snapshot.level = level;
     snapshot.limit = _limit;
     snapshot.cur_consumption = _consumption->current_value();
     snapshot.peak_consumption = _consumption->value();
-    snapshot.child_count = remain_child_count();
     return snapshot;
 }
 
-void MemTrackerLimiter::make_snapshot(std::vector<MemTracker::Snapshot>* snapshots,
-                                      size_t cur_level, size_t upper_level) const {
-    Snapshot snapshot = MemTrackerLimiter::make_snapshot(cur_level);
+void MemTrackerLimiter::refresh_global_counter() {
+    std::unordered_map<Type, int64_t> type_mem_sum = {
+            {Type::GLOBAL, 0},     {Type::QUERY, 0},         {Type::LOAD, 0},
+            {Type::COMPACTION, 0}, {Type::SCHEMA_CHANGE, 0}, {Type::CLONE, 0},
+            {Type::BATCHLOAD, 0},  {Type::CONSISTENCY, 0}};
+    for (unsigned i = 0; i < mem_tracker_limiter_pool.size(); ++i) {
+        std::lock_guard<std::mutex> l(mem_tracker_limiter_pool[i].group_lock);
+        for (auto tracker : mem_tracker_limiter_pool[i].trackers) {
+            type_mem_sum[tracker->type()] += tracker->consumption();
+        }
+    }
+    for (auto it : type_mem_sum) {
+        MemTrackerLimiter::TypeMemSum[it.first]->set(it.second);
+    }
+}
+
+void MemTrackerLimiter::make_process_snapshots(std::vector<MemTracker::Snapshot>* snapshots) {
+    MemTrackerLimiter::refresh_global_counter();
+    int64_t process_mem_sum = 0;
+    Snapshot snapshot;
+    for (auto it : MemTrackerLimiter::TypeMemSum) {
+        snapshot.type = TypeString[it.first];
+        snapshot.label = "";
+        snapshot.limit = -1;
+        snapshot.cur_consumption = it.second->current_value();
+        snapshot.peak_consumption = it.second->value();
+        (*snapshots).emplace_back(snapshot);
+        process_mem_sum += it.second->current_value();
+    }
+
+    snapshot.type = "tc/jemalloc_cache";
+    snapshot.label = "";
+    snapshot.limit = -1;
+    snapshot.cur_consumption = MemInfo::allocator_cache_mem();
+    snapshot.peak_consumption = -1;
     (*snapshots).emplace_back(snapshot);
-    if (cur_level < upper_level) {
-        {
-            std::lock_guard<std::mutex> l(_child_tracker_limiter_lock);
-            for (const auto& child : _child_tracker_limiters) {
-                child->make_snapshot(snapshots, cur_level + 1, upper_level);
+    process_mem_sum += MemInfo::allocator_cache_mem();
+
+    snapshot.type = "process";
+    snapshot.label = "";
+    snapshot.limit = -1;
+    snapshot.cur_consumption = process_mem_sum;
+    snapshot.peak_consumption = -1;
+    (*snapshots).emplace_back(snapshot);
+}
+
+void MemTrackerLimiter::make_type_snapshots(std::vector<MemTracker::Snapshot>* snapshots,
+                                            MemTrackerLimiter::Type type) {
+    if (type == Type::GLOBAL) {
+        std::lock_guard<std::mutex> l(mem_tracker_limiter_pool[0].group_lock);
+        for (auto tracker : mem_tracker_limiter_pool[0].trackers) {
+            (*snapshots).emplace_back(tracker->make_snapshot());
+            MemTracker::make_group_snapshot(snapshots, tracker->group_num(), tracker->label());
+        }
+    } else {
+        for (unsigned i = 1; i < mem_tracker_limiter_pool.size(); ++i) {
+            std::lock_guard<std::mutex> l(mem_tracker_limiter_pool[i].group_lock);
+            for (auto tracker : mem_tracker_limiter_pool[i].trackers) {
+                if (tracker->type() == type) {
+                    (*snapshots).emplace_back(tracker->make_snapshot());
+                    MemTracker::make_group_snapshot(snapshots, tracker->group_num(),
+                                                    tracker->label());
+                }
             }
         }
-        MemTracker::make_group_snapshot(snapshots, cur_level + 1, _group_num, _label);
     }
 }
 
-int64_t MemTrackerLimiter::spare_capacity() const {
-    int64_t result = std::numeric_limits<int64_t>::max();
-    for (const auto& tracker : _limited_ancestors) {
-        int64_t mem_left = tracker->limit() - tracker->consumption();
-        result = std::min(result, mem_left);
-    }
-    return result;
-}
-
-int64_t MemTrackerLimiter::get_lowest_limit() const {
-    if (_limited_ancestors.empty()) return -1;
-    int64_t min_limit = std::numeric_limits<int64_t>::max();
-    for (const auto& tracker : _limited_ancestors) {
-        DCHECK(tracker->has_limit());
-        min_limit = std::min(min_limit, tracker->limit());
-    }
-    return min_limit;
-}
-
-// Calling this on the query tracker results in output like:
-//
-//  Query(4a4c81fedaed337d:4acadfda00000000) Limit=10.00 GB Total=508.28 MB Peak=508.45 MB
-//    Fragment 4a4c81fedaed337d:4acadfda00000000: Total=8.00 KB Peak=8.00 KB
-//      EXCHANGE_NODE (id=4): Total=0 Peak=0
-//      DataStreamRecvr: Total=0 Peak=0
-//    Block Manager: Limit=6.68 GB Total=394.00 MB Peak=394.00 MB
-//    Fragment 4a4c81fedaed337d:4acadfda00000006: Total=233.72 MB Peak=242.24 MB
-//      AGGREGATION_NODE (id=1): Total=139.21 MB Peak=139.84 MB
-//      HDFS_SCAN_NODE (id=0): Total=93.94 MB Peak=102.24 MB
-//      DataStreamSender (dst_id=2): Total=45.99 KB Peak=85.99 KB
-//    Fragment 4a4c81fedaed337d:4acadfda00000003: Total=274.55 MB Peak=274.62 MB
-//      AGGREGATION_NODE (id=3): Total=274.50 MB Peak=274.50 MB
-//      EXCHANGE_NODE (id=2): Total=0 Peak=0
-//      DataStreamRecvr: Total=45.91 KB Peak=684.07 KB
-//      DataStreamSender (dst_id=4): Total=680.00 B Peak=680.00 B
-//
-// If 'reservation_metrics_' are set, we ge a more granular breakdown:
-//   TrackerName: Limit=5.00 MB Reservation=5.00 MB OtherMemory=1.04 MB
-//                Total=6.04 MB Peak=6.45 MB
-//
-std::string MemTrackerLimiter::log_usage(int max_recursive_depth, int64_t* logged_consumption) {
-    int64_t curr_consumption = consumption();
-    int64_t peak_consumption = _consumption->value();
-    if (logged_consumption != nullptr) *logged_consumption = curr_consumption;
-
-    std::string detail =
-            "MemTrackerLimiter Label={}, Limit={}({} B), Used={}({} B), Peak={}({} B), Exceeded={}";
-    detail = fmt::format(detail, _label, print_bytes(_limit), _limit, print_bytes(curr_consumption),
-                         curr_consumption, print_bytes(peak_consumption), peak_consumption,
-                         limit_exceeded() ? "true" : "false");
-
-    // This call does not need the children, so return early.
-    if (max_recursive_depth == 0) return detail;
-
-    // Recurse and get information about the children
-    int64_t child_consumption;
-    std::string child_trackers_usage;
-    {
-        std::lock_guard<std::mutex> l(_child_tracker_limiter_lock);
-        child_trackers_usage =
-                log_usage(max_recursive_depth - 1, _child_tracker_limiters, &child_consumption);
-    }
-    std::vector<MemTracker::Snapshot> snapshots;
-    MemTracker::make_group_snapshot(&snapshots, 0, _group_num, _label);
-    for (const auto& snapshot : snapshots) {
-        child_trackers_usage += "\n    " + MemTracker::log_usage(snapshot);
-    }
-    if (!child_trackers_usage.empty()) detail += child_trackers_usage;
-    return detail;
-}
-
-std::string MemTrackerLimiter::log_usage(int max_recursive_depth,
-                                         const std::list<MemTrackerLimiter*>& trackers,
-                                         int64_t* logged_consumption) {
-    *logged_consumption = 0;
-    std::vector<std::string> usage_strings;
-    for (const auto& tracker : trackers) {
-        int64_t tracker_consumption;
-        std::string usage_string = tracker->log_usage(max_recursive_depth, &tracker_consumption);
-        if (!usage_string.empty()) usage_strings.push_back(usage_string);
-        *logged_consumption += tracker_consumption;
-    }
-    return usage_strings.size() == 0 ? "" : "\n    " + join(usage_strings, "\n    ");
+std::string MemTrackerLimiter::log_usage(MemTracker::Snapshot snapshot) {
+    return fmt::format(
+            "MemTrackerLimiter Label={}, Type={}, Limit={}({} B), Used={}({} B), Peak={}({} B)",
+            snapshot.label, snapshot.type, print_bytes(snapshot.limit), snapshot.limit,
+            print_bytes(snapshot.cur_consumption), snapshot.cur_consumption,
+            print_bytes(snapshot.peak_consumption), snapshot.peak_consumption);
 }
 
 void MemTrackerLimiter::print_log_usage(const std::string& msg) {
-    // only print the tracker log_usage in be log.
     std::string detail = msg;
-    detail += "\n    " + fmt::format(
-                                 "process memory used {}, limit {}, hard limit {}, tc/jemalloc "
-                                 "allocator cache {}",
-                                 PerfCounters::get_vm_rss_str(), MemInfo::mem_limit_str(),
-                                 print_bytes(MemInfo::hard_mem_limit()),
-                                 MemInfo::allocator_cache_mem_str());
-    if (_print_log_usage) {
-        if (_label == "Process") {
-            // Dumping the process MemTracker is expensive. Limiting the recursive depth to two
-            // levels limits the level of detail to a one-line summary for each query MemTracker.
-            detail += "\n    " + log_usage(2);
-        } else {
-            detail += "\n    " + log_usage();
+    detail += "\n    " + MemTrackerLimiter::process_mem_log_str();
+    if (_enable_print_log_usage) {
+        detail += log_usage();
+        std::string child_trackers_usage;
+        std::vector<MemTracker::Snapshot> snapshots;
+        MemTracker::make_group_snapshot(&snapshots, _group_num, _label);
+        for (const auto& snapshot : snapshots) {
+            child_trackers_usage += "\n    " + MemTracker::log_usage(snapshot);
         }
+        if (!child_trackers_usage.empty()) detail += child_trackers_usage;
+
         // TODO: memory leak by calling `boost::stacktrace` in tcmalloc hook,
         // test whether overwriting malloc/free is the same problem in jemalloc/tcmalloc.
         // detail += "\n" + boost::stacktrace::to_string(boost::stacktrace::stacktrace());
         LOG(WARNING) << detail;
-        _print_log_usage = false;
+        _enable_print_log_usage = false;
     }
 }
 
-std::string MemTrackerLimiter::mem_limit_exceeded(const std::string& msg,
-                                                  int64_t failed_allocation_size) {
-    STOP_CHECK_THREAD_MEM_TRACKER_LIMIT();
-    std::string detail = fmt::format("Memory limit exceeded:<consuming tracker:<{}>, ", _label);
-    MemTrackerLimiter* exceeded_tracker = nullptr;
-    MemTrackerLimiter* max_consumption_tracker = nullptr;
-    int64_t free_size = INT64_MAX;
-    // Find the tracker that exceed limit and has the least free.
-    for (const auto& tracker : _limited_ancestors) {
-        int64_t max_consumption = tracker->peak_consumption() > tracker->consumption()
-                                          ? tracker->peak_consumption()
-                                          : tracker->consumption();
-        if (tracker->limit() < max_consumption + failed_allocation_size) {
-            exceeded_tracker = tracker;
-            break;
-        }
-        if (tracker->limit() - max_consumption < free_size) {
-            free_size = tracker->limit() - max_consumption;
-            max_consumption_tracker = tracker;
-        }
+void MemTrackerLimiter::print_log_process_usage(const std::string& msg) {
+    MemTrackerLimiter::_enable_print_log_process_usage = false;
+    std::string detail = msg;
+    detail += "\n    " + MemTrackerLimiter::process_mem_log_str();
+    std::vector<MemTracker::Snapshot> snapshots;
+    MemTrackerLimiter::make_process_snapshots(&snapshots);
+    MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::GLOBAL);
+    for (const auto& snapshot : snapshots) {
+        detail += "\n    " + MemTrackerLimiter::log_usage(snapshot);
     }
-
-    MemTrackerLimiter* print_log_usage_tracker = nullptr;
-    if (exceeded_tracker != nullptr) {
-        detail += limit_exceeded_errmsg_prefix_str(failed_allocation_size, exceeded_tracker);
-        print_log_usage_tracker = exceeded_tracker;
-    } else if (sys_mem_exceed_limit_check(failed_allocation_size)) {
-        detail += fmt::format("{}>, executing msg:<{}>",
-                              limit_exceeded_errmsg_sys_str(failed_allocation_size), msg);
-    } else if (max_consumption_tracker != nullptr) {
-        // must after check_sys_mem_info false
-        detail += fmt::format(
-                "failed alloc size {}, max consumption tracker:<{}>, limit {}, peak used {}, "
-                "current used {}>, executing msg:<{}>",
-                print_bytes(failed_allocation_size), max_consumption_tracker->label(),
-                print_bytes(max_consumption_tracker->limit()),
-                print_bytes(max_consumption_tracker->peak_consumption()),
-                print_bytes(max_consumption_tracker->consumption()), msg);
-        print_log_usage_tracker = max_consumption_tracker;
-    } else {
-        // The limit of the current tracker and parents is less than 0, the consume will not fail,
-        // and the current process memory has no excess limit.
-        detail += fmt::format("unknown exceed reason, executing msg:<{}>", msg);
-        print_log_usage_tracker = ExecEnv::GetInstance()->process_mem_tracker().get();
-    }
-    auto failed_msg = MemTrackerLimiter::limit_exceeded_errmsg_suffix_str(detail);
-    if (print_log_usage_tracker != nullptr) print_log_usage_tracker->print_log_usage(failed_msg);
-    return failed_msg;
+    LOG(WARNING) << detail;
 }
 
 std::string MemTrackerLimiter::mem_limit_exceeded(const std::string& msg,
-                                                  MemTrackerLimiter* failed_tracker,
                                                   const std::string& limit_exceeded_errmsg_prefix) {
+    DCHECK(_limit != -1);
     STOP_CHECK_THREAD_MEM_TRACKER_LIMIT();
-    std::string detail =
-            fmt::format("Memory limit exceeded:<consuming tracker:<{}>, {}>, executing msg:<{}>",
-                        _label, limit_exceeded_errmsg_prefix, msg);
-    auto failed_msg = MemTrackerLimiter::limit_exceeded_errmsg_suffix_str(detail);
-    failed_tracker->print_log_usage(failed_msg);
-    return failed_msg;
+    std::string detail = fmt::format(
+            "Memory limit exceeded:<consuming tracker:<{}>, {}>, executing msg:<{}>. backend {} "
+            "process memory used {}, limit {}. If query tracker exceed, `set "
+            "exec_mem_limit=8G` to change limit, details mem usage see be.INFO.",
+            _label, limit_exceeded_errmsg_prefix, msg, BackendOptions::get_localhost(),
+            PerfCounters::get_vm_rss_str(), MemInfo::mem_limit_str());
+    print_log_usage(detail);
+    return detail;
 }
 
-Status MemTrackerLimiter::mem_limit_exceeded(RuntimeState* state, const std::string& msg,
-                                             int64_t failed_alloc_size) {
-    auto failed_msg = mem_limit_exceeded(msg, failed_alloc_size);
+Status MemTrackerLimiter::fragment_mem_limit_exceeded(RuntimeState* state, const std::string& msg,
+                                                      int64_t failed_alloc_size) {
+    auto failed_msg =
+            mem_limit_exceeded(msg, tracker_limit_exceeded_errmsg_str(failed_alloc_size, this));
     state->log_error(failed_msg);
     return Status::MemoryLimitExceeded(failed_msg);
 }
 
+// TODO(zxy) More observable methods
+// /// Logs the usage of 'limit' number of queries based on maximum total memory
+// /// consumption.
+// std::string MemTracker::LogTopNQueries(int limit) {
+//     if (limit == 0) return "";
+//     priority_queue<pair<int64_t, string>, std::vector<pair<int64_t, string>>,
+//                    std::greater<pair<int64_t, string>>>
+//             min_pq;
+//     GetTopNQueries(min_pq, limit);
+//     std::vector<string> usage_strings(min_pq.size());
+//     while (!min_pq.empty()) {
+//         usage_strings.push_back(min_pq.top().second);
+//         min_pq.pop();
+//     }
+//     std::reverse(usage_strings.begin(), usage_strings.end());
+//     return join(usage_strings, "\n");
+// }
+
+// /// Helper function for LogTopNQueries that iterates through the MemTracker hierarchy
+// /// and populates 'min_pq' with 'limit' number of elements (that contain state related
+// /// to query MemTrackers) based on maximum total memory consumption.
+// void MemTracker::GetTopNQueries(
+//         priority_queue<pair<int64_t, string>, std::vector<pair<int64_t, string>>,
+//                        greater<pair<int64_t, string>>>& min_pq,
+//         int limit) {
+//     list<weak_ptr<MemTracker>> children;
+//     {
+//         lock_guard<SpinLock> l(child_trackers_lock_);
+//         children = child_trackers_;
+//     }
+//     for (const auto& child_weak : children) {
+//         shared_ptr<MemTracker> child = child_weak.lock();
+//         if (child) {
+//             child->GetTopNQueries(min_pq, limit);
+//         }
+//     }
+// }
+
 } // namespace doris
diff --git a/be/src/runtime/memory/mem_tracker_limiter.h b/be/src/runtime/memory/mem_tracker_limiter.h
index 6bc9449c20..2182e84f00 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.h
+++ b/be/src/runtime/memory/mem_tracker_limiter.h
@@ -31,45 +31,59 @@ namespace doris {
 class RuntimeState;
 
 // Track and limit the memory usage of process and query.
-// Contains an limit, arranged into a tree structure, the consumption also tracked by its ancestors.
+// Contains an limit, arranged into a tree structure.
 //
 // Automatically track every once malloc/free of the system memory allocator (Currently, based on TCMlloc hook).
 // Put Query MemTrackerLimiter into SCOPED_ATTACH_TASK when the thread starts,all memory used by this thread
-// will be recorded on this Query, otherwise it will be recorded in Process Tracker by default.
-//
-// We use a five-level hierarchy of mem trackers: process, query pool, query, instance, node.
-// The first four layers are MemTrackerLimiter with limit, and the fifth layer is MemTracker without limit.
-// Specific parts of the fragment (exec nodes, sinks, etc) will add a fifth level when they are initialized.
+// will be recorded on this Query, otherwise it will be recorded in Orphan Tracker by default.
 class MemTrackerLimiter final : public MemTracker {
 public:
-    // Creates and adds the tracker limiter to the tree
-    MemTrackerLimiter(
-            int64_t byte_limit = -1, const std::string& label = std::string(),
-            const std::shared_ptr<MemTrackerLimiter>& parent = std::shared_ptr<MemTrackerLimiter>(),
-            RuntimeProfile* profile = nullptr);
+    enum Type {
+        GLOBAL = 0,        // Life cycle is the same as the process, e.g. Cache and default Orphan
+        QUERY = 1,         // Count the memory consumption of all Query tasks.
+        LOAD = 2,          // Count the memory consumption of all Load tasks.
+        COMPACTION = 3,    // Count the memory consumption of all Base and Cumulative tasks.
+        SCHEMA_CHANGE = 4, // Count the memory consumption of all SchemaChange tasks.
+        CLONE = 5, // Count the memory consumption of all EngineCloneTask. Note: Memory that does not contain make/release snapshots.
+        BATCHLOAD = 6,  // Count the memory consumption of all EngineBatchLoadTask.
+        CONSISTENCY = 7 // Count the memory consumption of all EngineChecksumTask.
+    };
 
-    // If the final consumption is not as expected, this usually means that the same memory is calling
-    // consume and release on different trackers. If the two trackers have a parent-child relationship,
-    // the parent tracker consumption is correct, and the child tracker is wrong; if the two trackers have
-    // no parent-child relationship, the two tracker consumptions are wrong.
-    ~MemTrackerLimiter();
+    inline static std::unordered_map<Type, std::shared_ptr<RuntimeProfile::HighWaterMarkCounter>>
+            TypeMemSum = {{Type::GLOBAL,
+                           std::make_shared<RuntimeProfile::HighWaterMarkCounter>(TUnit::BYTES)},
+                          {Type::QUERY,
+                           std::make_shared<RuntimeProfile::HighWaterMarkCounter>(TUnit::BYTES)},
+                          {Type::LOAD,
+                           std::make_shared<RuntimeProfile::HighWaterMarkCounter>(TUnit::BYTES)},
+                          {Type::COMPACTION,
+                           std::make_shared<RuntimeProfile::HighWaterMarkCounter>(TUnit::BYTES)},
+                          {Type::SCHEMA_CHANGE,
+                           std::make_shared<RuntimeProfile::HighWaterMarkCounter>(TUnit::BYTES)},
+                          {Type::CLONE,
+                           std::make_shared<RuntimeProfile::HighWaterMarkCounter>(TUnit::BYTES)},
+                          {Type::BATCHLOAD,
+                           std::make_shared<RuntimeProfile::HighWaterMarkCounter>(TUnit::BYTES)},
+                          {Type::CONSISTENCY,
+                           std::make_shared<RuntimeProfile::HighWaterMarkCounter>(TUnit::BYTES)}};
 
-    std::shared_ptr<MemTrackerLimiter> parent() const { return _parent; }
-
-    size_t remain_child_count() const { return _child_tracker_limiters.size(); }
-    size_t had_child_count() const { return _had_child_count; }
-
-    Snapshot make_snapshot(size_t level) const;
-    // Returns a list of all the valid tracker snapshots.
-    void make_snapshot(std::vector<MemTracker::Snapshot>* snapshots, size_t cur_level,
-                       size_t upper_level) const;
+    inline static const std::string TypeString[] = {"global",     "query",         "load",
+                                                    "compaction", "schema_change", "clone",
+                                                    "batch_load", "consistency"};
 
 public:
+    // byte_limit equal to -1 means no consumption limit, only participate in process memory statistics.
+    MemTrackerLimiter(Type type, const std::string& label = std::string(), int64_t byte_limit = -1,
+                      RuntimeProfile* profile = nullptr);
+
+    ~MemTrackerLimiter();
+
     static bool sys_mem_exceed_limit_check(int64_t bytes) {
         // Limit process memory usage using the actual physical memory of the process in `/proc/self/status`.
         // This is independent of the consumption value of the mem tracker, which counts the virtual memory
         // of the process malloc.
         // for fast, expect MemInfo::initialized() to be true.
+        //
         // tcmalloc/jemalloc allocator cache does not participate in the mem check as part of the process physical memory.
         // because `new/malloc` will trigger mem hook when using tcmalloc/jemalloc allocator cache,
         // but it may not actually alloc physical memory, which is not expected in mem hook fail.
@@ -77,127 +91,78 @@ public:
         // TODO: In order to ensure no OOM, currently reserve 200M, and then use the free mem in /proc/meminfo to ensure no OOM.
         if (MemInfo::proc_mem_no_allocator_cache() + bytes >= MemInfo::mem_limit() ||
             PerfCounters::get_vm_rss() + bytes >= MemInfo::hard_mem_limit()) {
+            print_log_process_usage("sys_mem_exceed_limit_check");
             return true;
         }
         return false;
     }
 
     void set_consumption() { LOG(FATAL) << "MemTrackerLimiter set_consumption not supported"; }
+    Type type() const { return _type; }
     int64_t group_num() const { return _group_num; }
     bool has_limit() const { return _limit >= 0; }
     int64_t limit() const { return _limit; }
-    void update_limit(int64_t limit) {
-        DCHECK(has_limit());
-        _limit = limit;
-    }
     bool limit_exceeded() const { return _limit >= 0 && _limit < consumption(); }
 
-    // Returns true if a valid limit of this tracker limiter or one of its ancestors is exceeded.
-    bool any_limit_exceeded() const {
-        for (const auto& tracker : _limited_ancestors) {
-            if (tracker->limit_exceeded()) {
-                return true;
-            }
-        }
-        return false;
-    }
-
     Status check_limit(int64_t bytes);
 
     // Returns the maximum consumption that can be made without exceeding the limit on
-    // this tracker limiter or any of its parents. Returns int64_t::max() if there are no
-    // limits and a negative value if any limit is already exceeded.
-    int64_t spare_capacity() const;
-
-    // Returns the lowest limit for this tracker limiter and its ancestors. Returns -1 if there is no limit.
-    int64_t get_lowest_limit() const;
+    // this tracker limiter.
+    int64_t spare_capacity() const { return _limit - consumption(); }
 
 public:
-    // up to (but not including) end_tracker.
-    // This happens when we want to update tracking on a particular mem tracker but the consumption
-    // against the limit recorded in one of its ancestors already happened.
-    // It is used for revise mem tracker consumption.
-    // If the location of memory alloc and free is different, the consumption value of mem tracker will be inaccurate.
-    // But the consumption value of the process mem tracker is not affecte
-    void cache_consume_local(int64_t bytes);
+    // If need to consume the tracker frequently, use it
+    void cache_consume(int64_t bytes);
 
-    // Will not change the value of process_mem_tracker, even though mem_tracker == process_mem_tracker.
+    // Transfer 'bytes' of consumption from this tracker to 'dst'.
     void transfer_to(int64_t size, MemTrackerLimiter* dst) {
-        cache_consume_local(-size);
-        dst->cache_consume_local(size);
+        cache_consume(-size);
+        dst->cache_consume(size);
     }
 
-    void enable_print_log_usage() { _print_log_usage = true; }
-    void enable_reset_zero() { _reset_zero = true; }
+    static void refresh_global_counter();
+    Snapshot make_snapshot() const;
+    // Returns a list of all the valid tracker snapshots.
+    static void make_process_snapshots(std::vector<MemTracker::Snapshot>* snapshots);
+    static void make_type_snapshots(std::vector<MemTracker::Snapshot>* snapshots, Type type);
 
-    void reset_zero() {
-        ExecEnv::GetInstance()->orphan_mem_tracker_raw()->cache_consume_local(
-                _consumption->current_value());
-        cache_consume_local(-_consumption->current_value());
-    }
-
-    // Logs the usage of this tracker limiter and optionally its children (recursively).
-    // If 'logged_consumption' is non-nullptr, sets the consumption value logged.
-    // 'max_recursive_depth' specifies the maximum number of levels of children
-    // to include in the dump. If it is zero, then no children are dumped.
-    // Limiting the recursive depth reduces the cost of dumping, particularly
-    // for the process tracker limiter.
-    std::string log_usage(int max_recursive_depth = INT_MAX, int64_t* logged_consumption = nullptr);
+    static std::string log_usage(MemTracker::Snapshot snapshot);
+    std::string log_usage() { return log_usage(make_snapshot()); }
     void print_log_usage(const std::string& msg);
+    void enable_print_log_usage() { _enable_print_log_usage = true; }
+    static void enable_print_log_process_usage() { _enable_print_log_process_usage = true; }
+    static void print_log_process_usage(const std::string& msg);
 
-    // Log the memory usage when memory limit is exceeded and return a status object with
-    // msg of the allocation which caused the limit to be exceeded.
-    // If 'failed_allocation_size' is greater than zero, logs the allocation size. If
-    // 'failed_allocation_size' is zero, nothing about the allocation size is logged.
-    // If 'state' is non-nullptr, logs the error to 'state'.
-    std::string mem_limit_exceeded(const std::string& msg, int64_t failed_allocation_size = 0);
-    std::string mem_limit_exceeded(const std::string& msg, MemTrackerLimiter* failed_tracker,
+    // Log the memory usage when memory limit is exceeded.
+    std::string mem_limit_exceeded(const std::string& msg,
                                    const std::string& limit_exceeded_errmsg_prefix);
-    Status mem_limit_exceeded(RuntimeState* state, const std::string& msg,
-                              int64_t failed_allocation_size = 0);
+    Status fragment_mem_limit_exceeded(RuntimeState* state, const std::string& msg,
+                                       int64_t failed_allocation_size = 0);
 
     std::string debug_string() {
         std::stringstream msg;
         msg << "limit: " << _limit << "; "
             << "consumption: " << _consumption->current_value() << "; "
             << "label: " << _label << "; "
-            << "all ancestor size: " << _all_ancestors.size() - 1 << "; "
-            << "limited ancestor size: " << _limited_ancestors.size() - 1 << "; ";
+            << "type: " << TypeString[_type] << "; ";
         return msg.str();
     }
 
 private:
-    // The following func, for automatic memory tracking and limiting based on system memory allocation.
     friend class ThreadMemTrackerMgr;
 
-    // Increases consumption of this tracker and its ancestors by 'bytes'.
-    void consume(int64_t bytes);
-
-    // Decreases consumption of this tracker and its ancestors by 'bytes'.
-    void release(int64_t bytes) { consume(-bytes); }
-
-    // Increases consumption of this tracker and its ancestors by 'bytes' only if
-    // they can all consume 'bytes' without exceeding limit. If limit would be exceed,
-    // no MemTrackerLimiters are updated. Returns true if the consumption was successfully updated.
+    // Increases consumption of this tracker by 'bytes' only if will not exceeding limit.
+    // Returns true if the consumption was successfully updated.
     WARN_UNUSED_RESULT
     bool try_consume(int64_t bytes, std::string& failed_msg);
 
-    void consume_local(int64_t bytes);
-
     // When the accumulated untracked memory value exceeds the upper limit,
     // the current value is returned and set to 0.
     // Thread safety.
     int64_t add_untracked_mem(int64_t bytes);
 
-    // Log consumption of all the trackers provided. Returns the sum of consumption in
-    // 'logged_consumption'. 'max_recursive_depth' specifies the maximum number of levels
-    // of children to include in the dump. If it is zero, then no children are dumped.
-    static std::string log_usage(int max_recursive_depth,
-                                 const std::list<MemTrackerLimiter*>& trackers,
-                                 int64_t* logged_consumption);
-
-    static std::string limit_exceeded_errmsg_prefix_str(int64_t bytes,
-                                                        MemTrackerLimiter* exceed_tracker) {
+    static std::string tracker_limit_exceeded_errmsg_str(int64_t bytes,
+                                                         MemTrackerLimiter* exceed_tracker) {
         return fmt::format(
                 "failed alloc size {}, exceeded tracker:<{}>, limit {}, peak "
                 "used {}, current used {}",
@@ -206,68 +171,43 @@ private:
                 print_bytes(exceed_tracker->_consumption->current_value()));
     }
 
-    static std::string limit_exceeded_errmsg_suffix_str(const std::string& msg) {
+    static std::string process_limit_exceeded_errmsg_str(int64_t bytes) {
         return fmt::format(
-                "{}. backend {} process memory used {}, limit {}. If query tracker exceed, `set "
-                "exec_mem_limit=8G` to change limit, details mem usage see be.INFO.",
-                msg, BackendOptions::get_localhost(), PerfCounters::get_vm_rss_str(),
-                MemInfo::mem_limit_str());
-    }
-
-    static std::string limit_exceeded_errmsg_sys_str(int64_t bytes) {
-        auto err_msg = fmt::format(
                 "process memory used {}, tc/jemalloc allocator cache {}, exceed limit {}, failed "
                 "alloc size {}",
                 PerfCounters::get_vm_rss_str(), MemInfo::allocator_cache_mem_str(),
                 MemInfo::mem_limit_str(), print_bytes(bytes));
-        ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err_msg);
-        return err_msg;
+    }
+
+    static std::string process_mem_log_str() {
+        return fmt::format(
+                "process memory used {}, limit {}, hard limit {}, tc/jemalloc "
+                "allocator cache {}",
+                PerfCounters::get_vm_rss_str(), MemInfo::mem_limit_str(),
+                print_bytes(MemInfo::hard_mem_limit()), MemInfo::allocator_cache_mem_str());
     }
 
 private:
-    // Limit on memory consumption, in bytes. If limit_ == -1, there is no consumption limit. Used in log_usage。
+    Type _type;
+
+    // Limit on memory consumption, in bytes.
     int64_t _limit;
 
-    // Group number in MemTracker::mem_tracker_pool, generated by the timestamp.
+    // Group number in MemTracker::mem_tracker_limiter_pool and MemTracker::mem_tracker_pool, generated by the timestamp.
     int64_t _group_num;
 
-    std::shared_ptr<MemTrackerLimiter> _parent; // The parent of this tracker.
-
-    // this tracker limiter plus all of its ancestors
-    std::vector<MemTrackerLimiter*> _all_ancestors;
-    // _all_ancestors with valid limits, except process tracker
-    std::vector<MemTrackerLimiter*> _limited_ancestors;
-
     // Consume size smaller than mem_tracker_consume_min_size_bytes will continue to accumulate
     // to avoid frequent calls to consume/release of MemTracker.
     std::atomic<int64_t> _untracked_mem = 0;
 
-    // Child trackers of this tracker limiter. Used for error reporting and
-    // listing only (i.e. updating the consumption of a parent tracker limiter does not
-    // update that of its children).
-    mutable std::mutex _child_tracker_limiter_lock;
-    std::list<MemTrackerLimiter*> _child_tracker_limiters;
-    // Iterator into parent_->_child_tracker_limiters for this object. Stored to have O(1) remove.
-    std::list<MemTrackerLimiter*>::iterator _child_tracker_it;
+    // Avoid frequent printing.
+    bool _enable_print_log_usage = false;
+    static std::atomic<bool> _enable_print_log_process_usage;
 
-    // The number of child trackers that have been added.
-    std::atomic_size_t _had_child_count = 0;
-
-    bool _print_log_usage = false;
-    // mem hook record tracker cannot guarantee that the final consumption is 0,
-    // nor can it guarantee that the memory alloc and free are recorded in a one-to-one correspondence.
-    // In some cases, in order to avoid the cumulative error of the upper global tracker,
-    // the consumption of the current tracker is reset to zero.
-    bool _reset_zero = false;
+    // Iterator into mem_tracker_limiter_pool for this object. Stored to have O(1) remove.
+    std::list<MemTrackerLimiter*>::iterator _tracker_limiter_group_it;
 };
 
-inline void MemTrackerLimiter::consume(int64_t bytes) {
-    if (bytes == 0) return;
-    for (auto& tracker : _all_ancestors) {
-        tracker->_consumption->add(bytes);
-    }
-}
-
 inline int64_t MemTrackerLimiter::add_untracked_mem(int64_t bytes) {
     _untracked_mem += bytes;
     if (std::abs(_untracked_mem) >= config::mem_tracker_consume_min_size_bytes) {
@@ -276,18 +216,10 @@ inline int64_t MemTrackerLimiter::add_untracked_mem(int64_t bytes) {
     return 0;
 }
 
-inline void MemTrackerLimiter::consume_local(int64_t bytes) {
-    if (bytes == 0) return;
-    for (auto& tracker : _all_ancestors) {
-        if (tracker->label() == "Process") return;
-        tracker->_consumption->add(bytes);
-    }
-}
-
-inline void MemTrackerLimiter::cache_consume_local(int64_t bytes) {
+inline void MemTrackerLimiter::cache_consume(int64_t bytes) {
     if (bytes == 0) return;
     int64_t consume_bytes = add_untracked_mem(bytes);
-    consume_local(consume_bytes);
+    consume(consume_bytes);
 }
 
 inline bool MemTrackerLimiter::try_consume(int64_t bytes, std::string& failed_msg) {
@@ -297,30 +229,20 @@ inline bool MemTrackerLimiter::try_consume(int64_t bytes, std::string& failed_ms
         return true;
     }
     if (sys_mem_exceed_limit_check(bytes)) {
-        failed_msg = limit_exceeded_errmsg_sys_str(bytes);
+        failed_msg = process_limit_exceeded_errmsg_str(bytes);
         return false;
     }
-    int i;
-    // Walk the tracker tree top-down.
-    for (i = _all_ancestors.size() - 1; i >= 0; --i) {
-        MemTrackerLimiter* tracker = _all_ancestors[i];
-        // Process tracker does not participate in the process memory limit, process tracker consumption is virtual memory,
-        // and there is a diff between the real physical memory value of the process. It is replaced by check_sys_mem_info.
-        if (tracker->limit() < 0 || tracker->label() == "Process") {
-            tracker->_consumption->add(bytes); // No limit at this tracker.
-        } else {
-            if (!tracker->_consumption->try_add(bytes, tracker->limit())) {
-                // Failed for this mem tracker. Roll back the ones that succeeded.
-                for (int j = _all_ancestors.size() - 1; j > i; --j) {
-                    _all_ancestors[j]->_consumption->add(-bytes);
-                }
-                failed_msg = limit_exceeded_errmsg_prefix_str(bytes, tracker);
-                return false;
-            }
+
+    if (_limit < 0) {
+        _consumption->add(bytes); // No limit at this tracker.
+    } else {
+        if (!_consumption->try_add(bytes, _limit)) {
+            // Failed for this mem tracker. Roll back the ones that succeeded.
+            _consumption->add(-bytes);
+            failed_msg = tracker_limit_exceeded_errmsg_str(bytes, this);
+            return false;
         }
     }
-    // Everyone succeeded, return.
-    DCHECK_EQ(i, -1);
     failed_msg = std::string();
     return true;
 }
@@ -328,15 +250,10 @@ inline bool MemTrackerLimiter::try_consume(int64_t bytes, std::string& failed_ms
 inline Status MemTrackerLimiter::check_limit(int64_t bytes) {
     if (bytes <= 0) return Status::OK();
     if (sys_mem_exceed_limit_check(bytes)) {
-        return Status::MemoryLimitExceeded(limit_exceeded_errmsg_sys_str(bytes));
+        return Status::MemoryLimitExceeded(process_limit_exceeded_errmsg_str(bytes));
     }
-    int i;
-    // Walk the tracker tree top-down.
-    for (i = _limited_ancestors.size() - 1; i >= 0; --i) {
-        MemTrackerLimiter* tracker = _limited_ancestors[i];
-        if (tracker->_consumption->current_value() + bytes > tracker->limit()) {
-            return Status::MemoryLimitExceeded(limit_exceeded_errmsg_prefix_str(bytes, tracker));
-        }
+    if (_limit > 0 && _consumption->current_value() + bytes > _limit) {
+        return Status::MemoryLimitExceeded(tracker_limit_exceeded_errmsg_str(bytes, this));
     }
     return Status::OK();
 }
diff --git a/be/src/runtime/memory/mem_tracker_task_pool.cpp b/be/src/runtime/memory/mem_tracker_task_pool.cpp
deleted file mode 100644
index 28539703b4..0000000000
--- a/be/src/runtime/memory/mem_tracker_task_pool.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "runtime/memory/mem_tracker_task_pool.h"
-
-#include "common/config.h"
-#include "runtime/exec_env.h"
-#include "util/pretty_printer.h"
-
-namespace doris {
-
-std::shared_ptr<MemTrackerLimiter> MemTrackerTaskPool::register_task_mem_tracker_impl(
-        const std::string& task_id, int64_t mem_limit, const std::string& label,
-        const std::shared_ptr<MemTrackerLimiter>& parent) {
-    DCHECK(!task_id.empty());
-    std::lock_guard<std::mutex> l(_task_tracker_lock);
-    // First time this task_id registered, make a new object, otherwise do nothing.
-    // Combine new tracker and emplace into one operation to avoid the use of locks
-    // Name for task MemTrackers. '$0' is replaced with the task id.
-    std::shared_ptr<MemTrackerLimiter> tracker;
-    bool new_emplace = _task_mem_trackers.lazy_emplace_l(
-            task_id, [&](const std::shared_ptr<MemTrackerLimiter>& v) { tracker = v; },
-            [&](const auto& ctor) {
-                tracker = std::make_shared<MemTrackerLimiter>(mem_limit, label, parent);
-                ctor(task_id, tracker);
-            });
-    if (new_emplace) {
-        LOG(INFO) << "Register query/load memory tracker, query/load id: " << task_id
-                  << " limit: " << PrettyPrinter::print(mem_limit, TUnit::BYTES);
-    }
-    return tracker;
-}
-
-std::shared_ptr<MemTrackerLimiter> MemTrackerTaskPool::register_query_mem_tracker(
-        const std::string& query_id, int64_t mem_limit) {
-    return register_task_mem_tracker_impl(query_id, mem_limit, fmt::format("Query#Id={}", query_id),
-                                          ExecEnv::GetInstance()->query_pool_mem_tracker());
-}
-
-std::shared_ptr<MemTrackerLimiter> MemTrackerTaskPool::register_query_scanner_mem_tracker(
-        const std::string& query_id) {
-    return register_task_mem_tracker_impl("Scanner#" + query_id, -1,
-                                          fmt::format("Scanner#Query#Id={}", query_id),
-                                          get_task_mem_tracker(query_id));
-}
-
-std::shared_ptr<MemTrackerLimiter> MemTrackerTaskPool::register_load_mem_tracker(
-        const std::string& load_id, int64_t mem_limit) {
-    // In load, the query id of the fragment is executed, which is the same as the load id of the load channel.
-    return register_task_mem_tracker_impl(load_id, mem_limit, fmt::format("Load#Id={}", load_id),
-                                          ExecEnv::GetInstance()->load_pool_mem_tracker());
-}
-
-std::shared_ptr<MemTrackerLimiter> MemTrackerTaskPool::register_load_scanner_mem_tracker(
-        const std::string& load_id) {
-    return register_task_mem_tracker_impl("Scanner#" + load_id, -1,
-                                          fmt::format("Scanner#Load#Id={}", load_id),
-                                          get_task_mem_tracker(load_id));
-}
-
-std::shared_ptr<MemTrackerLimiter> MemTrackerTaskPool::get_task_mem_tracker(
-        const std::string& task_id) {
-    DCHECK(!task_id.empty());
-    std::shared_ptr<MemTrackerLimiter> tracker = nullptr;
-    // Avoid using locks to resolve erase conflicts
-    _task_mem_trackers.if_contains(
-            task_id, [&tracker](const std::shared_ptr<MemTrackerLimiter>& v) { tracker = v; });
-    return tracker;
-}
-
-void MemTrackerTaskPool::logout_task_mem_tracker() {
-    std::lock_guard<std::mutex> l(_task_tracker_lock);
-    std::vector<std::string> expired_task_ids;
-    for (auto it = _task_mem_trackers.begin(); it != _task_mem_trackers.end(); it++) {
-        if (!it->second) {
-            // Unknown exception case with high concurrency, after _task_mem_trackers.erase,
-            // the key still exists in _task_mem_trackers. https://github.com/apache/incubator-doris/issues/10006
-            expired_task_ids.emplace_back(it->first);
-        } else if (it->second.use_count() == 1 && it->second->had_child_count() != 0) {
-            // No RuntimeState uses this task MemTrackerLimiter, it is only referenced by this map,
-            // and tracker was not created soon, delete it.
-            //
-            // If consumption is not equal to 0 before query mem tracker is destructed,
-            // there are two possibilities in theory.
-            // 1. A memory leak occurs.
-            // 2. memory consumed on query mem tracker, released on other trackers, and no manual transfer
-            //  between the two trackers.
-            // At present, it is impossible to effectively locate which memory consume and release on different trackers,
-            // so query memory leaks cannot be found.
-            LOG(INFO) << fmt::format(
-                    "Deregister query/load memory tracker, queryId={}, Limit={}, CurrUsed={}, "
-                    "PeakUsed={}",
-                    it->first, MemTracker::print_bytes(it->second->limit()),
-                    MemTracker::print_bytes(it->second->consumption()),
-                    MemTracker::print_bytes(it->second->peak_consumption()));
-            expired_task_ids.emplace_back(it->first);
-        } else if (config::memory_debug) {
-            it->second->print_log_usage("query routine");
-            it->second->enable_print_log_usage();
-        }
-    }
-    for (auto tid : expired_task_ids) {
-        // Verify the condition again to make sure the tracker is not being used again.
-        _task_mem_trackers.erase_if(tid, [&](const std::shared_ptr<MemTrackerLimiter>& v) {
-            return !v || v.use_count() == 1;
-        });
-    }
-}
-
-// TODO(zxy) More observable methods
-// /// Logs the usage of 'limit' number of queries based on maximum total memory
-// /// consumption.
-// std::string MemTracker::LogTopNQueries(int limit) {
-//     if (limit == 0) return "";
-//     priority_queue<pair<int64_t, string>, std::vector<pair<int64_t, string>>,
-//                    std::greater<pair<int64_t, string>>>
-//             min_pq;
-//     GetTopNQueries(min_pq, limit);
-//     std::vector<string> usage_strings(min_pq.size());
-//     while (!min_pq.empty()) {
-//         usage_strings.push_back(min_pq.top().second);
-//         min_pq.pop();
-//     }
-//     std::reverse(usage_strings.begin(), usage_strings.end());
-//     return join(usage_strings, "\n");
-// }
-
-// /// Helper function for LogTopNQueries that iterates through the MemTracker hierarchy
-// /// and populates 'min_pq' with 'limit' number of elements (that contain state related
-// /// to query MemTrackers) based on maximum total memory consumption.
-// void MemTracker::GetTopNQueries(
-//         priority_queue<pair<int64_t, string>, std::vector<pair<int64_t, string>>,
-//                        greater<pair<int64_t, string>>>& min_pq,
-//         int limit) {
-//     list<weak_ptr<MemTracker>> children;
-//     {
-//         lock_guard<SpinLock> l(child_trackers_lock_);
-//         children = child_trackers_;
-//     }
-//     for (const auto& child_weak : children) {
-//         shared_ptr<MemTracker> child = child_weak.lock();
-//         if (child) {
-//             child->GetTopNQueries(min_pq, limit);
-//         }
-//     }
-// }
-
-} // namespace doris
diff --git a/be/src/runtime/memory/mem_tracker_task_pool.h b/be/src/runtime/memory/mem_tracker_task_pool.h
deleted file mode 100644
index 9e5813ba03..0000000000
--- a/be/src/runtime/memory/mem_tracker_task_pool.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <parallel_hashmap/phmap.h>
-
-#include "runtime/memory/mem_tracker_limiter.h"
-
-namespace doris {
-
-// TODO: phmap `parallel_flat_hash_map` is not thread-safe. If it is not fixed in the future,
-//       can consider using other maps instead.
-using TaskTrackersMap = phmap::parallel_flat_hash_map<
-        std::string, std::shared_ptr<MemTrackerLimiter>,
-        phmap::priv::hash_default_hash<std::string>, phmap::priv::hash_default_eq<std::string>,
-        std::allocator<std::pair<const std::string, std::shared_ptr<MemTrackerLimiter>>>, 12,
-        std::mutex>;
-
-// Global task pool for query MemTrackers. Owned by ExecEnv.
-class MemTrackerTaskPool {
-public:
-    // Construct a MemTrackerLimiter object for 'task_id' with 'mem_limit' as the memory limit.
-    // The MemTrackerLimiter is a child of the pool MemTrackerLimiter, Calling this with the same
-    // 'task_id' will return the same MemTrackerLimiter object. This is used to track the local
-    // memory usage of all tasks executing. The first time this is called for a task,
-    // a new MemTrackerLimiter object is created with the pool tracker as its parent.
-    // Newly created trackers will always have a limit of -1.
-    std::shared_ptr<MemTrackerLimiter> register_task_mem_tracker_impl(
-            const std::string& task_id, int64_t mem_limit, const std::string& label,
-            const std::shared_ptr<MemTrackerLimiter>& parent);
-    std::shared_ptr<MemTrackerLimiter> register_query_mem_tracker(const std::string& query_id,
-                                                                  int64_t mem_limit);
-    std::shared_ptr<MemTrackerLimiter> register_query_scanner_mem_tracker(
-            const std::string& query_id);
-    std::shared_ptr<MemTrackerLimiter> register_load_mem_tracker(const std::string& load_id,
-                                                                 int64_t mem_limit);
-    std::shared_ptr<MemTrackerLimiter> register_load_scanner_mem_tracker(
-            const std::string& load_id);
-
-    std::shared_ptr<MemTrackerLimiter> get_task_mem_tracker(const std::string& task_id);
-
-    // Remove the mem tracker that has ended the query.
-    void logout_task_mem_tracker();
-
-private:
-    // All per-task MemTrackerLimiter objects.
-    // The life cycle of task MemTrackerLimiter in the process is the same as task runtime state,
-    // MemTrackers will be removed from this map after query finish or cancel.
-    TaskTrackersMap _task_mem_trackers;
-    std::mutex _task_tracker_lock;
-};
-
-} // namespace doris
\ No newline at end of file
diff --git a/be/src/runtime/memory/system_allocator.cpp b/be/src/runtime/memory/system_allocator.cpp
index 78a9e7911e..f8dd402ad1 100644
--- a/be/src/runtime/memory/system_allocator.cpp
+++ b/be/src/runtime/memory/system_allocator.cpp
@@ -45,7 +45,7 @@ uint8_t* SystemAllocator::allocate_via_malloc(size_t length) {
         char buf[64];
         auto err = fmt::format("fail to allocate mem via posix_memalign, res={}, errmsg={}.", res,
                                strerror_r(res, buf, 64));
-        ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err);
+        MemTrackerLimiter::print_log_process_usage(err);
         LOG(ERROR) << err;
         return nullptr;
     }
diff --git a/be/src/runtime/memory/thread_mem_tracker_mgr.cpp b/be/src/runtime/memory/thread_mem_tracker_mgr.cpp
index b19f9181b0..4273860468 100644
--- a/be/src/runtime/memory/thread_mem_tracker_mgr.cpp
+++ b/be/src/runtime/memory/thread_mem_tracker_mgr.cpp
@@ -19,35 +19,32 @@
 
 #include "runtime/exec_env.h"
 #include "runtime/fragment_mgr.h"
-#include "runtime/memory/mem_tracker_task_pool.h"
 #include "service/backend_options.h"
 
 namespace doris {
 
 void ThreadMemTrackerMgr::attach_limiter_tracker(
-        const std::string& task_id, const TUniqueId& fragment_instance_id,
-        const std::shared_ptr<MemTrackerLimiter>& mem_tracker) {
+        const std::shared_ptr<MemTrackerLimiter>& mem_tracker,
+        const TUniqueId& fragment_instance_id) {
     DCHECK(mem_tracker);
     flush_untracked_mem<false>();
-    _task_id_stack.push_back(task_id);
-    _fragment_instance_id_stack.push_back(fragment_instance_id);
-    _limiter_tracker_stack.push_back(mem_tracker);
+    _fragment_instance_id = fragment_instance_id;
+    _limiter_tracker = mem_tracker;
     _limiter_tracker_raw = mem_tracker.get();
 }
 
-void ThreadMemTrackerMgr::detach_limiter_tracker() {
-    DCHECK(!_limiter_tracker_stack.empty());
+void ThreadMemTrackerMgr::detach_limiter_tracker(
+        const std::shared_ptr<MemTrackerLimiter>& old_mem_tracker) {
     flush_untracked_mem<false>();
-    _task_id_stack.pop_back();
-    _fragment_instance_id_stack.pop_back();
-    _limiter_tracker_stack.pop_back();
-    _limiter_tracker_raw = _limiter_tracker_stack.back().get();
+    _fragment_instance_id = TUniqueId();
+    _limiter_tracker = old_mem_tracker;
+    _limiter_tracker_raw = old_mem_tracker.get();
 }
 
 void ThreadMemTrackerMgr::exceeded_cancel_task(const std::string& cancel_details) {
-    if (_fragment_instance_id_stack.back() != TUniqueId()) {
+    if (_fragment_instance_id != TUniqueId()) {
         ExecEnv::GetInstance()->fragment_mgr()->cancel(
-                _fragment_instance_id_stack.back(), PPlanFragmentCancelReason::MEMORY_LIMIT_EXCEED,
+                _fragment_instance_id, PPlanFragmentCancelReason::MEMORY_LIMIT_EXCEED,
                 cancel_details);
     }
 }
@@ -57,8 +54,7 @@ void ThreadMemTrackerMgr::exceeded(const std::string& failed_msg) {
         _cb_func();
     }
     auto cancel_msg = _limiter_tracker_raw->mem_limit_exceeded(
-            fmt::format("exec node:<{}>", last_consumer_tracker()),
-            _limiter_tracker_raw->parent().get(), failed_msg);
+            fmt::format("execute:<{}>", last_consumer_tracker()), failed_msg);
     if (is_attach_query()) {
         exceeded_cancel_task(cancel_msg);
     }
diff --git a/be/src/runtime/memory/thread_mem_tracker_mgr.h b/be/src/runtime/memory/thread_mem_tracker_mgr.h
index 6c8cae39f9..1ab3e7bce4 100644
--- a/be/src/runtime/memory/thread_mem_tracker_mgr.h
+++ b/be/src/runtime/memory/thread_mem_tracker_mgr.h
@@ -27,56 +27,34 @@
 
 namespace doris {
 
-extern bthread_key_t btls_key;
-static const bthread_key_t EMPTY_BTLS_KEY = {0, 0};
-
 using ExceedCallBack = void (*)();
 
-// TCMalloc new/delete Hook is counted in the memory_tracker of the current thread.
-//
-// In the original design, the MemTracker consume method is called before the memory is allocated.
-// If the consume succeeds, the memory is actually allocated, otherwise an exception is thrown.
-// But the statistics of memory through TCMalloc new/delete Hook are after the memory is actually allocated,
-// which is different from the previous behavior. Therefore, when alloc for some large memory.
+// Memory Hook is counted in the memory tracker of the current thread.
 class ThreadMemTrackerMgr {
 public:
     ThreadMemTrackerMgr() {}
 
     ~ThreadMemTrackerMgr() {
         // if _init == false, exec env is not initialized when init(). and never consumed mem tracker once.
-        if (_init) {
-            flush_untracked_mem<false>();
-            if (bthread_self() == 0) {
-                DCHECK(_consumer_tracker_stack.empty());
-                DCHECK(_limiter_tracker_stack.size() == 1)
-                        << ", limiter_tracker_stack.size(): " << _limiter_tracker_stack.size();
-            }
-        }
+        if (_init) flush_untracked_mem<false>();
     }
 
-    // only for tcmalloc hook
+    // only for memory hook
     static void consume_no_attach(int64_t size) {
         if (ExecEnv::GetInstance()->initialized()) {
-            ExecEnv::GetInstance()->orphan_mem_tracker_raw()->consume(size);
+            ExecEnv::GetInstance()->orphan_mem_tracker()->consume(size);
         }
     }
 
-    // After thread initialization, calling `init` again must call `clear_untracked_mems` first
-    // to avoid memory tracking loss.
     void init();
-    void init_impl();
-    void clear();
 
-    // After attach, the current thread TCMalloc Hook starts to consume/release task mem_tracker
-    void attach_limiter_tracker(const std::string& task_id, const TUniqueId& fragment_instance_id,
-                                const std::shared_ptr<MemTrackerLimiter>& mem_tracker);
-    void detach_limiter_tracker();
-    // Usually there are only two layers, the first is the default trackerOrphan;
-    // the second is the query tracker or bthread tracker.
-    int64_t get_attach_layers() { return _limiter_tracker_stack.size(); }
+    // After attach, the current thread Memory Hook starts to consume/release task mem_tracker
+    void attach_limiter_tracker(const std::shared_ptr<MemTrackerLimiter>& mem_tracker,
+                                const TUniqueId& fragment_instance_id);
+    void detach_limiter_tracker(const std::shared_ptr<MemTrackerLimiter>& old_mem_tracker =
+                                        ExecEnv::GetInstance()->orphan_mem_tracker());
 
     // Must be fast enough! Thread update_tracker may be called very frequently.
-    // So for performance, add tracker as early as possible, and then call update_tracker<Existed>.
     bool push_consumer_tracker(MemTracker* mem_tracker);
     void pop_consumer_tracker();
     std::string last_consumer_tracker() {
@@ -96,7 +74,7 @@ public:
 
     void set_exceed_call_back(ExceedCallBack cb_func) { _cb_func = cb_func; }
 
-    // Note that, If call the memory allocation operation in TCMalloc new/delete Hook,
+    // Note that, If call the memory allocation operation in Memory Hook,
     // such as calling LOG/iostream/sstream/stringstream/etc. related methods,
     // must increase the control to avoid entering infinite recursion, otherwise it may cause crash or stuck,
     void consume(int64_t size);
@@ -104,11 +82,11 @@ public:
     template <bool CheckLimit>
     void flush_untracked_mem();
 
-    bool is_attach_query() { return _fragment_instance_id_stack.back() != TUniqueId(); }
+    bool is_attach_query() { return _fragment_instance_id != TUniqueId(); }
 
     std::shared_ptr<MemTrackerLimiter> limiter_mem_tracker() {
-        if (!_init) init();
-        return _limiter_tracker_stack.back();
+        if (!_init) init(); // ExecEnv not initialized when thread is created.
+        return _limiter_tracker;
     }
     MemTrackerLimiter* limiter_mem_tracker_raw() {
         if (!_init) init();
@@ -122,14 +100,13 @@ public:
     std::string print_debug_string() {
         fmt::memory_buffer consumer_tracker_buf;
         for (const auto& v : _consumer_tracker_stack) {
-            fmt::format_to(consumer_tracker_buf, "{}, ",
-                           MemTracker::log_usage(v->make_snapshot(0)));
+            fmt::format_to(consumer_tracker_buf, "{}, ", MemTracker::log_usage(v->make_snapshot()));
         }
         return fmt::format(
-                "ThreadMemTrackerMgr debug, _untracked_mem:{}, _task_id:{}, "
+                "ThreadMemTrackerMgr debug, _untracked_mem:{}, "
                 "_limiter_tracker:<{}>, _consumer_tracker_stack:<{}>",
-                std::to_string(_untracked_mem), _task_id_stack.back(),
-                _limiter_tracker_raw->log_usage(1), fmt::to_string(consumer_tracker_buf));
+                std::to_string(_untracked_mem), _limiter_tracker_raw->log_usage(),
+                fmt::to_string(consumer_tracker_buf));
     }
 
 private:
@@ -141,8 +118,7 @@ private:
 private:
     // is false: ExecEnv::GetInstance()->initialized() = false when thread local is initialized
     bool _init = false;
-    // Cache untracked mem, only update to _untracked_mems when switching mem tracker.
-    // Frequent calls to unordered_map _untracked_mems[] in consume will degrade performance.
+    // Cache untracked mem.
     int64_t _untracked_mem = 0;
     int64_t old_untracked_mem = 0;
 
@@ -151,8 +127,7 @@ private:
 
     std::string failed_msg = std::string();
 
-    // _limiter_tracker_stack[0] = orphan_mem_tracker
-    std::vector<std::shared_ptr<MemTrackerLimiter>> _limiter_tracker_stack;
+    std::shared_ptr<MemTrackerLimiter> _limiter_tracker;
     MemTrackerLimiter* _limiter_tracker_raw = nullptr;
     std::vector<MemTracker*> _consumer_tracker_stack;
 
@@ -161,35 +136,18 @@ private:
     // If there is a memory new/delete operation in the consume method, it may enter infinite recursion.
     bool _stop_consume = false;
     bool _check_attach = true;
-    std::vector<std::string> _task_id_stack;
-    std::vector<TUniqueId> _fragment_instance_id_stack;
+    TUniqueId _fragment_instance_id = TUniqueId();
     ExceedCallBack _cb_func = nullptr;
 };
 
 inline void ThreadMemTrackerMgr::init() {
-    DCHECK(_limiter_tracker_stack.size() == 0);
-    DCHECK(_limiter_tracker_raw == nullptr);
-    init_impl();
-}
-
-inline void ThreadMemTrackerMgr::init_impl() {
-    _limiter_tracker_stack.push_back(ExecEnv::GetInstance()->orphan_mem_tracker());
+    DCHECK(_limiter_tracker == nullptr);
+    _limiter_tracker = ExecEnv::GetInstance()->orphan_mem_tracker();
     _limiter_tracker_raw = ExecEnv::GetInstance()->orphan_mem_tracker_raw();
-    _task_id_stack.push_back("");
-    _fragment_instance_id_stack.push_back(TUniqueId());
     _check_limit = true;
     _init = true;
 }
 
-inline void ThreadMemTrackerMgr::clear() {
-    flush_untracked_mem<false>();
-    std::vector<std::shared_ptr<MemTrackerLimiter>>().swap(_limiter_tracker_stack);
-    std::vector<MemTracker*>().swap(_consumer_tracker_stack);
-    std::vector<std::string>().swap(_task_id_stack);
-    std::vector<TUniqueId>().swap(_fragment_instance_id_stack);
-    init_impl();
-}
-
 inline bool ThreadMemTrackerMgr::push_consumer_tracker(MemTracker* tracker) {
     DCHECK(tracker) << print_debug_string();
     if (std::count(_consumer_tracker_stack.begin(), _consumer_tracker_stack.end(), tracker)) {
@@ -226,9 +184,9 @@ inline void ThreadMemTrackerMgr::consume(int64_t size) {
 template <bool CheckLimit>
 inline void ThreadMemTrackerMgr::flush_untracked_mem() {
     // Temporary memory may be allocated during the consumption of the mem tracker, which will lead to entering
-    // the TCMalloc Hook again, so suspend consumption to avoid falling into an infinite loop.
+    // the Memory Hook again, so suspend consumption to avoid falling into an infinite loop.
     _stop_consume = true;
-    if (!_init) init();
+    if (!_init) init(); // ExecEnv not initialized when thread is created.
     DCHECK(_limiter_tracker_raw);
     old_untracked_mem = _untracked_mem;
     if (_count_scope_mem) _scope_mem += _untracked_mem;
diff --git a/be/src/runtime/plan_fragment_executor.cpp b/be/src/runtime/plan_fragment_executor.cpp
index 6e60c27cb9..5f83fe454a 100644
--- a/be/src/runtime/plan_fragment_executor.cpp
+++ b/be/src/runtime/plan_fragment_executor.cpp
@@ -95,10 +95,11 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request,
             fragments_ctx == nullptr ? request.query_globals : fragments_ctx->query_globals;
     _runtime_state.reset(new RuntimeState(params, request.query_options, query_globals, _exec_env));
     _runtime_state->set_query_fragments_ctx(fragments_ctx);
+    _runtime_state->set_query_mem_tracker(fragments_ctx->query_mem_tracker);
     _runtime_state->set_tracer(std::move(tracer));
 
-    RETURN_IF_ERROR(_runtime_state->init_mem_trackers(_query_id));
     SCOPED_ATTACH_TASK(_runtime_state.get());
+    _runtime_state->init_scanner_mem_trackers();
     _runtime_state->runtime_filter_mgr()->init();
     _runtime_state->set_be_number(request.backend_num);
     if (request.__isset.backend_id) {
@@ -230,7 +231,7 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request,
 }
 
 Status PlanFragmentExecutor::open() {
-    int64_t mem_limit = _runtime_state->instance_mem_tracker()->limit();
+    int64_t mem_limit = _runtime_state->query_mem_tracker()->limit();
     LOG_INFO("PlanFragmentExecutor::open")
             .tag("query_id", _query_id)
             .tag("instance_id", _runtime_state->fragment_instance_id())
@@ -459,7 +460,7 @@ void PlanFragmentExecutor::_collect_node_statistics() {
     DCHECK(_runtime_state->backend_id() != -1);
     NodeStatistics* node_statistics =
             _query_statistics->add_nodes_statistics(_runtime_state->backend_id());
-    node_statistics->add_peak_memory(_runtime_state->instance_mem_tracker()->peak_consumption());
+    node_statistics->add_peak_memory(_runtime_state->query_mem_tracker()->peak_consumption());
 }
 
 void PlanFragmentExecutor::report_profile() {
diff --git a/be/src/runtime/query_fragments_ctx.h b/be/src/runtime/query_fragments_ctx.h
index 8f9ceb38d6..1fc58f2f28 100644
--- a/be/src/runtime/query_fragments_ctx.h
+++ b/be/src/runtime/query_fragments_ctx.h
@@ -26,6 +26,8 @@
 #include "gen_cpp/Types_types.h"               // for TUniqueId
 #include "runtime/datetime_value.h"
 #include "runtime/exec_env.h"
+#include "runtime/memory/mem_tracker_limiter.h"
+#include "util/pretty_printer.h"
 #include "util/threadpool.h"
 
 namespace doris {
@@ -41,6 +43,21 @@ public:
         _start_time = DateTimeValue::local_time();
     }
 
+    ~QueryFragmentsCtx() {
+        // query mem tracker consumption is equal to 0, it means that after QueryFragmentsCtx is created,
+        // it is found that query already exists in _fragments_ctx_map, and query mem tracker is not used.
+        // query mem tracker consumption is not equal to 0 after use, because there is memory consumed
+        // on query mem tracker, released on other trackers.
+        if (query_mem_tracker->consumption() != 0) {
+            LOG(INFO) << fmt::format(
+                    "Deregister query/load memory tracker, queryId={}, Limit={}, CurrUsed={}, "
+                    "PeakUsed={}",
+                    print_id(query_id), MemTracker::print_bytes(query_mem_tracker->limit()),
+                    MemTracker::print_bytes(query_mem_tracker->consumption()),
+                    MemTracker::print_bytes(query_mem_tracker->peak_consumption()));
+        }
+    }
+
     bool countdown() { return fragment_num.fetch_sub(1) == 1; }
 
     bool is_timeout(const DateTimeValue& now) const {
@@ -99,6 +116,8 @@ public:
     std::atomic<int> fragment_num;
     int timeout_second;
     ObjectPool obj_pool;
+    // MemTracker that is shared by all fragment instances running on this host.
+    std::shared_ptr<MemTrackerLimiter> query_mem_tracker;
 
 private:
     ExecEnv* _exec_env;
diff --git a/be/src/runtime/runtime_filter_mgr.cpp b/be/src/runtime/runtime_filter_mgr.cpp
index 8087d6efc5..7bc894e1da 100644
--- a/be/src/runtime/runtime_filter_mgr.cpp
+++ b/be/src/runtime/runtime_filter_mgr.cpp
@@ -44,7 +44,7 @@ RuntimeFilterMgr::RuntimeFilterMgr(const UniqueId& query_id, RuntimeState* state
 RuntimeFilterMgr::~RuntimeFilterMgr() {}
 
 Status RuntimeFilterMgr::init() {
-    DCHECK(_state->instance_mem_tracker() != nullptr);
+    DCHECK(_state->query_mem_tracker() != nullptr);
     _tracker = std::make_unique<MemTracker>("RuntimeFilterMgr");
     return Status::OK();
 }
diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp
index 3766431f50..07b04f5f09 100644
--- a/be/src/runtime/runtime_state.cpp
+++ b/be/src/runtime/runtime_state.cpp
@@ -34,7 +34,6 @@
 #include "runtime/exec_env.h"
 #include "runtime/load_path_mgr.h"
 #include "runtime/memory/mem_tracker.h"
-#include "runtime/memory/mem_tracker_task_pool.h"
 #include "runtime/runtime_filter_mgr.h"
 #include "util/file_utils.h"
 #include "util/load_error_hub.h"
@@ -160,7 +159,6 @@ RuntimeState::~RuntimeState() {
         _error_hub->close();
     }
 
-    // Manually release the child mem tracker before _instance_mem_tracker is destructed.
     _obj_pool->clear();
     _runtime_filter_mgr.reset();
 }
@@ -216,53 +214,10 @@ Status RuntimeState::init(const TUniqueId& fragment_instance_id, const TQueryOpt
 }
 
 Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) {
-    bool has_query_mem_tracker = _query_options.__isset.mem_limit && (_query_options.mem_limit > 0);
-    int64_t bytes_limit = has_query_mem_tracker ? _query_options.mem_limit : -1;
-    if (bytes_limit > ExecEnv::GetInstance()->process_mem_tracker()->limit()) {
-        VLOG_NOTICE << "Query memory limit " << PrettyPrinter::print(bytes_limit, TUnit::BYTES)
-                    << " exceeds process memory limit of "
-                    << PrettyPrinter::print(ExecEnv::GetInstance()->process_mem_tracker()->limit(),
-                                            TUnit::BYTES)
-                    << ". Using process memory limit instead";
-        bytes_limit = ExecEnv::GetInstance()->process_mem_tracker()->limit();
-    }
-    auto mem_tracker_counter = ADD_COUNTER(&_profile, "MemoryLimit", TUnit::BYTES);
-    mem_tracker_counter->set(bytes_limit);
-
-    if (query_type() == TQueryType::SELECT) {
-        _query_mem_tracker =
-                _exec_env->task_pool_mem_tracker_registry()->register_query_mem_tracker(
-                        print_id(query_id), bytes_limit);
-        _scanner_mem_tracker =
-                _exec_env->task_pool_mem_tracker_registry()->register_query_scanner_mem_tracker(
-                        print_id(query_id));
-    } else if (query_type() == TQueryType::LOAD) {
-        _query_mem_tracker = _exec_env->task_pool_mem_tracker_registry()->register_load_mem_tracker(
-                print_id(query_id), bytes_limit);
-        _scanner_mem_tracker =
-                _exec_env->task_pool_mem_tracker_registry()->register_load_scanner_mem_tracker(
-                        print_id(query_id));
-    } else {
-        DCHECK(false);
-        _query_mem_tracker = ExecEnv::GetInstance()->query_pool_mem_tracker();
-    }
-    _query_mem_tracker->enable_reset_zero();
-
-    _instance_mem_tracker = std::make_shared<MemTrackerLimiter>(
-            -1, "RuntimeState:instance:" + print_id(_fragment_instance_id), _query_mem_tracker,
-            &_profile);
-
-    if (_query_options.is_report_success) {
-        _query_mem_tracker->enable_print_log_usage();
-        _instance_mem_tracker->enable_print_log_usage();
-    }
-
-    return Status::OK();
-}
-
-Status RuntimeState::init_instance_mem_tracker() {
-    _query_mem_tracker = nullptr;
-    _instance_mem_tracker = std::make_shared<MemTrackerLimiter>(-1, "RuntimeState:instance");
+    _query_mem_tracker = std::make_shared<MemTrackerLimiter>(
+            MemTrackerLimiter::Type::QUERY, fmt::format("TestQuery#Id={}", print_id(query_id)));
+    _scanner_mem_tracker =
+            std::make_shared<MemTracker>(fmt::format("TestScanner#QueryId={}", print_id(query_id)));
     return Status::OK();
 }
 
@@ -326,9 +281,7 @@ Status RuntimeState::set_mem_limit_exceeded(const std::string& msg) {
 Status RuntimeState::check_query_state(const std::string& msg) {
     // TODO: it would be nice if this also checked for cancellation, but doing so breaks
     // cases where we use Status::Cancelled("Cancelled") to indicate that the limit was reached.
-    if (thread_context()
-                ->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()
-                ->any_limit_exceeded()) {
+    if (thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker()->limit_exceeded()) {
         RETURN_LIMIT_EXCEEDED(this, msg);
     }
     return query_status();
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index 83f651725c..69b40f6c6f 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -76,14 +76,13 @@ public:
     Status init(const TUniqueId& fragment_instance_id, const TQueryOptions& query_options,
                 const TQueryGlobals& query_globals, ExecEnv* exec_env);
 
-    // Set up four-level hierarchy of mem trackers: process, query, fragment instance.
-    // The instance tracker is tied to our profile.
-    // Specific parts of the fragment (i.e. exec nodes, sinks, data stream senders, etc)
-    // will add a fourth level when they are initialized.
-    Status init_mem_trackers(const TUniqueId& query_id);
-
-    // for ut only
-    Status init_instance_mem_tracker();
+    // after SCOPED_ATTACH_TASK;
+    void init_scanner_mem_trackers() {
+        _scanner_mem_tracker = std::make_shared<MemTracker>(
+                fmt::format("Scanner#QueryId={}", print_id(_query_id)));
+    }
+    // for ut and non-query.
+    Status init_mem_trackers(const TUniqueId& query_id = TUniqueId());
 
     // Gets/Creates the query wide block mgr.
     Status create_block_mgr();
@@ -116,8 +115,7 @@ public:
     const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; }
     ExecEnv* exec_env() { return _exec_env; }
     std::shared_ptr<MemTrackerLimiter> query_mem_tracker() { return _query_mem_tracker; }
-    std::shared_ptr<MemTrackerLimiter> instance_mem_tracker() { return _instance_mem_tracker; }
-    std::shared_ptr<MemTrackerLimiter> scanner_mem_tracker() { return _scanner_mem_tracker; }
+    std::shared_ptr<MemTracker> scanner_mem_tracker() { return _scanner_mem_tracker; }
     ThreadResourceMgr::ResourcePool* resource_pool() { return _resource_pool; }
 
     void set_fragment_root_id(PlanNodeId id) {
@@ -382,6 +380,10 @@ public:
 
     QueryFragmentsCtx* get_query_fragments_ctx() { return _query_ctx; }
 
+    void set_query_mem_tracker(const std::shared_ptr<MemTrackerLimiter>& tracker) {
+        _query_mem_tracker = tracker;
+    }
+
     OpentelemetryTracer get_tracer() { return _tracer; }
 
     void set_tracer(OpentelemetryTracer&& tracer) { _tracer = std::move(tracer); }
@@ -398,15 +400,9 @@ private:
 
     static const int DEFAULT_BATCH_SIZE = 2048;
 
-    // MemTracker that is shared by all fragment instances running on this host.
-    // The query mem tracker must be released after the _instance_mem_tracker.
     std::shared_ptr<MemTrackerLimiter> _query_mem_tracker;
-    // Memory usage of this fragment instance
-    std::shared_ptr<MemTrackerLimiter> _instance_mem_tracker;
-    // Count the memory consumption of Scanner, independent and unique for each query,
-    // this means that scnner memory does not count into query mem tracker,
-    // label is `Scanner#{queryId}`.
-    std::shared_ptr<MemTrackerLimiter> _scanner_mem_tracker;
+    // Count the memory consumption of Scanner
+    std::shared_ptr<MemTracker> _scanner_mem_tracker;
 
     // put runtime state before _obj_pool, so that it will be deconstructed after
     // _obj_pool. Because some of object in _obj_pool will use profile when deconstructing.
diff --git a/be/src/runtime/sorted_run_merger.cc b/be/src/runtime/sorted_run_merger.cc
index 28d347462f..32bdad9a6d 100644
--- a/be/src/runtime/sorted_run_merger.cc
+++ b/be/src/runtime/sorted_run_merger.cc
@@ -183,7 +183,7 @@ private:
     std::condition_variable _batch_prepared_cv;
 
     void process_sorted_run_task(const std::shared_ptr<MemTrackerLimiter>& mem_tracker) {
-        SCOPED_ATTACH_TASK(mem_tracker, ThreadContext::TaskType::QUERY);
+        SCOPED_ATTACH_TASK(mem_tracker);
         std::unique_lock<std::mutex> lock(_mutex);
         while (true) {
             _batch_prepared_cv.wait(lock, [this]() { return !_backup_ready.load(); });
diff --git a/be/src/runtime/stream_load/stream_load_pipe.h b/be/src/runtime/stream_load/stream_load_pipe.h
index 106426653e..fac16b81ff 100644
--- a/be/src/runtime/stream_load/stream_load_pipe.h
+++ b/be/src/runtime/stream_load/stream_load_pipe.h
@@ -47,7 +47,7 @@ public:
               _use_proto(use_proto) {}
 
     virtual ~StreamLoadPipe() {
-        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->nursery_mem_tracker());
+        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
         while (!_buf_queue.empty()) _buf_queue.pop_front();
     }
 
@@ -119,7 +119,7 @@ public:
     }
 
     Status read(uint8_t* data, int64_t data_size, int64_t* bytes_read, bool* eof) override {
-        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->nursery_mem_tracker());
+        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
         *bytes_read = 0;
         while (*bytes_read < data_size) {
             std::unique_lock<std::mutex> l(_lock);
diff --git a/be/src/runtime/thread_context.cpp b/be/src/runtime/thread_context.cpp
index 442fe755a6..cd593f05ad 100644
--- a/be/src/runtime/thread_context.cpp
+++ b/be/src/runtime/thread_context.cpp
@@ -39,22 +39,14 @@ ScopeMemCount::~ScopeMemCount() {
 }
 
 AttachTask::AttachTask(const std::shared_ptr<MemTrackerLimiter>& mem_tracker,
-                       const ThreadContext::TaskType& type, const std::string& task_id,
-                       const TUniqueId& fragment_instance_id) {
-    DCHECK(mem_tracker);
-    thread_context()->attach_task(type, task_id, fragment_instance_id, mem_tracker);
+                       const std::string& task_id, const TUniqueId& fragment_instance_id) {
+    thread_context()->attach_task(task_id, fragment_instance_id, mem_tracker);
 }
 
 AttachTask::AttachTask(RuntimeState* runtime_state) {
-#ifndef BE_TEST
-    DCHECK(print_id(runtime_state->query_id()) != "");
-    DCHECK(runtime_state->fragment_instance_id() != TUniqueId());
-#endif // BE_TEST
-    DCHECK(runtime_state->instance_mem_tracker());
-    thread_context()->attach_task(ThreadContext::query_to_task_type(runtime_state->query_type()),
-                                  print_id(runtime_state->query_id()),
+    thread_context()->attach_task(print_id(runtime_state->query_id()),
                                   runtime_state->fragment_instance_id(),
-                                  runtime_state->instance_mem_tracker());
+                                  runtime_state->query_mem_tracker());
 }
 
 AttachTask::~AttachTask() {
@@ -65,14 +57,13 @@ AttachTask::~AttachTask() {
 }
 
 SwitchThreadMemTrackerLimiter::SwitchThreadMemTrackerLimiter(
-        const std::shared_ptr<MemTrackerLimiter>& mem_tracker_limiter) {
-    DCHECK(mem_tracker_limiter);
-    thread_context()->_thread_mem_tracker_mgr->attach_limiter_tracker("", TUniqueId(),
-                                                                      mem_tracker_limiter);
+        const std::shared_ptr<MemTrackerLimiter>& mem_tracker) {
+    _old_mem_tracker = thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker();
+    thread_context()->_thread_mem_tracker_mgr->attach_limiter_tracker(mem_tracker, TUniqueId());
 }
 
 SwitchThreadMemTrackerLimiter::~SwitchThreadMemTrackerLimiter() {
-    thread_context()->_thread_mem_tracker_mgr->detach_limiter_tracker();
+    thread_context()->_thread_mem_tracker_mgr->detach_limiter_tracker(_old_mem_tracker);
 }
 
 AddThreadMemTrackerConsumer::AddThreadMemTrackerConsumer(MemTracker* mem_tracker) {
diff --git a/be/src/runtime/thread_context.h b/be/src/runtime/thread_context.h
index ca09df0ecf..d07cbe7056 100644
--- a/be/src/runtime/thread_context.h
+++ b/be/src/runtime/thread_context.h
@@ -68,8 +68,8 @@
 // Usually used after SCOPED_ATTACH_TASK, during query execution.
 #define RETURN_LIMIT_EXCEEDED(state, msg, ...)                                              \
     return doris::thread_context()                                                          \
-            ->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()                            \
-            ->mem_limit_exceeded(                                                           \
+            ->_thread_mem_tracker_mgr->limiter_mem_tracker()                                \
+            ->fragment_mem_limit_exceeded(                                                  \
                     state,                                                                  \
                     fmt::format("exec node:<{}>, {}",                                       \
                                 doris::thread_context()                                     \
@@ -140,82 +140,36 @@ inline thread_local bthread_t bthread_id;
 //
 // There may be other optional info to be added later.
 class ThreadContext {
-public:
-    enum TaskType {
-        UNKNOWN = 0,
-        QUERY = 1,
-        LOAD = 2,
-        COMPACTION = 3,
-        STORAGE = 4,
-        BRPC = 5
-        // to be added ...
-    };
-    inline static const std::string TaskTypeStr[] = {"UNKNOWN",    "QUERY",   "LOAD",
-                                                     "COMPACTION", "STORAGE", "BRPC"};
-
 public:
     ThreadContext() {
         _thread_mem_tracker_mgr.reset(new ThreadMemTrackerMgr());
-        init();
-    }
-
-    ~ThreadContext() {
-        // Restore to the memory state before init=true to ensure accurate overall memory statistics.
-        // Thereby ensuring that the memory alloc size is not tracked during the initialization of the
-        // ThreadContext before `init = true in ThreadContextPtr()`,
-        // Equal to the size of the memory release that is not tracked during the destruction of the
-        // ThreadContext after `init = false in ~ThreadContextPtr()`,
-        if (ExecEnv::GetInstance()->initialized()) _thread_mem_tracker_mgr->clear();
-        thread_context_ptr.init = false;
-    }
-
-    void init() {
-        _type = TaskType::UNKNOWN;
         if (ExecEnv::GetInstance()->initialized()) _thread_mem_tracker_mgr->init();
-        _thread_id = get_thread_id();
     }
 
-    void attach_task(const TaskType& type, const std::string& task_id,
-                     const TUniqueId& fragment_instance_id,
+    ~ThreadContext() { thread_context_ptr.init = false; }
+
+    void attach_task(const std::string& task_id, const TUniqueId& fragment_instance_id,
                      const std::shared_ptr<MemTrackerLimiter>& mem_tracker) {
 #ifndef BE_TEST
         // will only attach_task at the beginning of the thread function, there should be no duplicate attach_task.
-        DCHECK((_type == TaskType::UNKNOWN || _type == TaskType::BRPC) &&
-               type != TaskType::UNKNOWN && _task_id == "" && mem_tracker != nullptr)
-                << ",new tracker label: " << mem_tracker->label() << ",old tracker label: "
-                << _thread_mem_tracker_mgr->limiter_mem_tracker_raw()->label();
+        DCHECK(mem_tracker);
+        // Orphan is thread default tracker.
+        DCHECK(_thread_mem_tracker_mgr->limiter_mem_tracker()->label() == "Orphan")
+                << ", attach mem tracker label: " << mem_tracker->label();
 #endif
-        _type = type;
         _task_id = task_id;
         _fragment_instance_id = fragment_instance_id;
-        _thread_mem_tracker_mgr->attach_limiter_tracker(task_id, fragment_instance_id, mem_tracker);
+        _thread_mem_tracker_mgr->attach_limiter_tracker(mem_tracker, fragment_instance_id);
     }
 
     void detach_task() {
-        _type = TaskType::UNKNOWN;
         _task_id = "";
         _fragment_instance_id = TUniqueId();
         _thread_mem_tracker_mgr->detach_limiter_tracker();
     }
 
-    const TaskType& type() const { return _type; }
-    const void set_type(const TaskType& type) { _type = type; }
-    const std::string& task_id() const { return _task_id; }
-    const std::string& thread_id_str() const { return _thread_id; }
     const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; }
 
-    static TaskType query_to_task_type(const TQueryType::type& query_type) {
-        switch (query_type) {
-        case TQueryType::SELECT:
-            return TaskType::QUERY;
-        case TQueryType::LOAD:
-            return TaskType::LOAD;
-        default:
-            DCHECK(false);
-            return TaskType::UNKNOWN;
-        }
-    }
-
     std::string get_thread_id() {
         std::stringstream ss;
         ss << std::this_thread::get_id();
@@ -231,23 +185,23 @@ public:
     std::unique_ptr<ThreadMemTrackerMgr> _thread_mem_tracker_mgr;
 
 private:
-    std::string _thread_id;
-    TaskType _type;
-    std::string _task_id;
+    std::string _task_id = "";
     TUniqueId _fragment_instance_id;
 };
 
-static void attach_bthread() {
+// Cache the pointer of bthread local in pthead local,
+// Avoid calling bthread_getspecific frequently to get bthread local, which has performance problems.
+static void pthread_attach_bthread() {
     bthread_id = bthread_self();
     bthread_context = static_cast<ThreadContext*>(bthread_getspecific(btls_key));
     if (bthread_context == nullptr) {
         // A new bthread starts, two scenarios:
         // 1. First call to bthread_getspecific (and before any bthread_setspecific) returns NULL
         // 2. There are not enough reusable btls in btls pool.
-#ifndef BE_TEST
-        DCHECK(ExecEnv::GetInstance()->initialized());
-#endif
-        // Create thread-local data on demand.
+        // else, two scenarios:
+        // 1. A new bthread starts, but get a reuses btls.
+        // 2. A pthread switch occurs. Because the pthread switch cannot be accurately identified at the moment.
+        // So tracker call reset 0 like reuses btls.
         bthread_context = new ThreadContext;
         // set the data so that next time bthread_getspecific in the thread returns the data.
         CHECK_EQ(0, bthread_setspecific(btls_key, bthread_context));
@@ -257,9 +211,9 @@ static void attach_bthread() {
 static ThreadContext* thread_context() {
     if (bthread_self() != 0) {
         if (bthread_self() != bthread_id) {
-            // A new bthread starts or pthread switch occurs.
+            // A new bthread starts or pthread switch occurs, during this period, stop the use of thread_context.
             thread_context_ptr.init = false;
-            attach_bthread();
+            pthread_attach_bthread();
             thread_context_ptr.init = true;
         }
         return bthread_context;
@@ -281,7 +235,6 @@ private:
 class AttachTask {
 public:
     explicit AttachTask(const std::shared_ptr<MemTrackerLimiter>& mem_tracker,
-                        const ThreadContext::TaskType& type = ThreadContext::TaskType::UNKNOWN,
                         const std::string& task_id = "",
                         const TUniqueId& fragment_instance_id = TUniqueId());
 
@@ -292,10 +245,12 @@ public:
 
 class SwitchThreadMemTrackerLimiter {
 public:
-    explicit SwitchThreadMemTrackerLimiter(
-            const std::shared_ptr<MemTrackerLimiter>& mem_tracker_limiter);
+    explicit SwitchThreadMemTrackerLimiter(const std::shared_ptr<MemTrackerLimiter>& mem_tracker);
 
     ~SwitchThreadMemTrackerLimiter();
+
+private:
+    std::shared_ptr<MemTrackerLimiter> _old_mem_tracker;
 };
 
 class AddThreadMemTrackerConsumer {
@@ -345,6 +300,10 @@ private:
             size, doris::thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw())
 
 // Mem Hook to consume thread mem tracker
+// TODO: In the original design, the MemTracker consume method is called before the memory is allocated.
+// If the consume succeeds, the memory is actually allocated, otherwise an exception is thrown.
+// But the statistics of memory through TCMalloc new/delete Hook are after the memory is actually allocated,
+// which is different from the previous behavior.
 #define MEM_MALLOC_HOOK(size)                                                \
     do {                                                                     \
         if (doris::thread_context_ptr.init) {                                \
diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp
index 3b53e6dc31..714e4757bc 100644
--- a/be/src/service/doris_main.cpp
+++ b/be/src/service/doris_main.cpp
@@ -53,7 +53,6 @@
 #include "runtime/exec_env.h"
 #include "runtime/heartbeat_flags.h"
 #include "runtime/load_channel_mgr.h"
-#include "runtime/memory/mem_tracker_task_pool.h"
 #include "service/backend_options.h"
 #include "service/backend_service.h"
 #include "service/brpc_service.h"
@@ -500,29 +499,16 @@ int main(int argc, char** argv) {
         __lsan_do_leak_check();
 #endif
         doris::PerfCounters::refresh_proc_status();
+        doris::MemTrackerLimiter::refresh_global_counter();
+        doris::ExecEnv::GetInstance()->load_channel_mgr()->refresh_mem_tracker();
 #if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER) && \
         !defined(USE_JEMALLOC)
         doris::MemInfo::refresh_allocator_mem();
 #endif
-        int64_t allocator_cache_mem_diff =
-                doris::MemInfo::allocator_cache_mem() -
-                doris::ExecEnv::GetInstance()->allocator_cache_mem_tracker()->consumption();
-        doris::ExecEnv::GetInstance()->allocator_cache_mem_tracker()->consume(
-                allocator_cache_mem_diff);
-        CONSUME_THREAD_MEM_TRACKER(allocator_cache_mem_diff);
-        doris::ExecEnv::GetInstance()->load_channel_mgr()->refresh_mem_tracker();
-
-        // 1s clear the expired task mem tracker, a query mem tracker is about 57 bytes.
-        // this will cause coredump for ASAN build when running regression test,
-        // disable temporarily.
-        doris::ExecEnv::GetInstance()->task_pool_mem_tracker_registry()->logout_task_mem_tracker();
-        // The process tracker print log usage interval is 1s to avoid a large number of tasks being
-        // canceled when the process exceeds the mem limit, resulting in too many duplicate logs.
-        doris::ExecEnv::GetInstance()->process_mem_tracker()->enable_print_log_usage();
         if (doris::config::memory_debug) {
-            doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage("main routine");
-            doris::ExecEnv::GetInstance()->process_mem_tracker()->enable_print_log_usage();
+            doris::MemTrackerLimiter::print_log_process_usage("memory_debug");
         }
+        doris::MemTrackerLimiter::enable_print_log_process_usage();
         sleep(1);
     }
 
diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp
index 3e672ab910..26301b8f4e 100644
--- a/be/src/service/internal_service.cpp
+++ b/be/src/service/internal_service.cpp
@@ -34,7 +34,6 @@
 #include "runtime/fold_constant_executor.h"
 #include "runtime/fragment_mgr.h"
 #include "runtime/load_channel_mgr.h"
-#include "runtime/memory/mem_tracker_task_pool.h"
 #include "runtime/result_buffer_mgr.h"
 #include "runtime/routine_load/routine_load_task_executor.h"
 #include "runtime/runtime_state.h"
@@ -69,10 +68,12 @@ static void thread_context_deleter(void* d) {
 template <typename T>
 class NewHttpClosure : public ::google::protobuf::Closure {
 public:
+    NewHttpClosure(google::protobuf::Closure* done) : _done(done) {}
     NewHttpClosure(T* request, google::protobuf::Closure* done) : _request(request), _done(done) {}
     ~NewHttpClosure() {}
 
     void Run() {
+        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
         if (_request != nullptr) {
             delete _request;
             _request = nullptr;
@@ -108,22 +109,23 @@ void PInternalServiceImpl::transmit_data(google::protobuf::RpcController* cntl_b
                                          PTransmitDataResult* response,
                                          google::protobuf::Closure* done) {
     // TODO(zxy) delete in 1.2 version
+    google::protobuf::Closure* new_done = new NewHttpClosure<PTransmitDataParams>(done);
     brpc::Controller* cntl = static_cast<brpc::Controller*>(cntl_base);
     attachment_transfer_request_row_batch<PTransmitDataParams>(request, cntl);
 
-    _transmit_data(cntl_base, request, response, done, Status::OK());
+    _transmit_data(cntl_base, request, response, new_done, Status::OK());
 }
 
 void PInternalServiceImpl::transmit_data_by_http(google::protobuf::RpcController* cntl_base,
                                                  const PEmptyRequest* request,
                                                  PTransmitDataResult* response,
                                                  google::protobuf::Closure* done) {
-    PTransmitDataParams* request_raw = new PTransmitDataParams();
-    google::protobuf::Closure* done_raw =
-            new NewHttpClosure<PTransmitDataParams>(request_raw, done);
+    PTransmitDataParams* new_request = new PTransmitDataParams();
+    google::protobuf::Closure* new_done =
+            new NewHttpClosure<PTransmitDataParams>(new_request, done);
     brpc::Controller* cntl = static_cast<brpc::Controller*>(cntl_base);
-    Status st = attachment_extract_request_contain_tuple<PTransmitDataParams>(request_raw, cntl);
-    _transmit_data(cntl_base, request_raw, response, done_raw, st);
+    Status st = attachment_extract_request_contain_tuple<PTransmitDataParams>(new_request, cntl);
+    _transmit_data(cntl_base, new_request, response, new_done, st);
 }
 
 void PInternalServiceImpl::_transmit_data(google::protobuf::RpcController* cntl_base,
@@ -133,17 +135,10 @@ void PInternalServiceImpl::_transmit_data(google::protobuf::RpcController* cntl_
                                           const Status& extract_st) {
     std::string query_id;
     TUniqueId finst_id;
-    std::shared_ptr<MemTrackerLimiter> transmit_tracker = nullptr;
     if (request->has_query_id()) {
         query_id = print_id(request->query_id());
         finst_id.__set_hi(request->finst_id().hi());
         finst_id.__set_lo(request->finst_id().lo());
-        transmit_tracker =
-                _exec_env->task_pool_mem_tracker_registry()->get_task_mem_tracker(query_id);
-    }
-    if (!transmit_tracker) {
-        query_id = "unkown_transmit_data";
-        transmit_tracker = std::make_shared<MemTrackerLimiter>(-1, "unkown_transmit_data");
     }
     VLOG_ROW << "transmit data: fragment_instance_id=" << print_id(request->finst_id())
              << " query_id=" << query_id << " node=" << request->node_id();
@@ -152,7 +147,6 @@ void PInternalServiceImpl::_transmit_data(google::protobuf::RpcController* cntl_
     Status st;
     st.to_protobuf(response->mutable_status());
     if (extract_st.ok()) {
-        SCOPED_ATTACH_TASK(transmit_tracker, ThreadContext::TaskType::QUERY, query_id, finst_id);
         st = _exec_env->stream_mgr()->transmit_data(request, &done);
         if (!st.ok()) {
             LOG(WARNING) << "transmit_data failed, message=" << st.get_error_msg()
@@ -225,23 +219,24 @@ void PInternalServiceImpl::tablet_writer_add_block(google::protobuf::RpcControll
                                                    PTabletWriterAddBlockResult* response,
                                                    google::protobuf::Closure* done) {
     // TODO(zxy) delete in 1.2 version
+    google::protobuf::Closure* new_done = new NewHttpClosure<PTransmitDataParams>(done);
     brpc::Controller* cntl = static_cast<brpc::Controller*>(cntl_base);
     attachment_transfer_request_block<PTabletWriterAddBlockRequest>(request, cntl);
 
-    _tablet_writer_add_block(cntl_base, request, response, done);
+    _tablet_writer_add_block(cntl_base, request, response, new_done);
 }
 
 void PInternalServiceImpl::tablet_writer_add_block_by_http(
         google::protobuf::RpcController* cntl_base, const ::doris::PEmptyRequest* request,
         PTabletWriterAddBlockResult* response, google::protobuf::Closure* done) {
-    PTabletWriterAddBlockRequest* request_raw = new PTabletWriterAddBlockRequest();
-    google::protobuf::Closure* done_raw =
-            new NewHttpClosure<PTabletWriterAddBlockRequest>(request_raw, done);
+    PTabletWriterAddBlockRequest* new_request = new PTabletWriterAddBlockRequest();
+    google::protobuf::Closure* new_done =
+            new NewHttpClosure<PTabletWriterAddBlockRequest>(new_request, done);
     brpc::Controller* cntl = static_cast<brpc::Controller*>(cntl_base);
-    Status st = attachment_extract_request_contain_block<PTabletWriterAddBlockRequest>(request_raw,
+    Status st = attachment_extract_request_contain_block<PTabletWriterAddBlockRequest>(new_request,
                                                                                        cntl);
     if (st.ok()) {
-        _tablet_writer_add_block(cntl_base, request_raw, response, done_raw);
+        _tablet_writer_add_block(cntl_base, new_request, response, new_done);
     } else {
         st.to_protobuf(response->mutable_status());
     }
@@ -280,20 +275,21 @@ void PInternalServiceImpl::tablet_writer_add_batch(google::protobuf::RpcControll
                                                    const PTabletWriterAddBatchRequest* request,
                                                    PTabletWriterAddBatchResult* response,
                                                    google::protobuf::Closure* done) {
-    _tablet_writer_add_batch(cntl_base, request, response, done);
+    google::protobuf::Closure* new_done = new NewHttpClosure<PTransmitDataParams>(done);
+    _tablet_writer_add_batch(cntl_base, request, response, new_done);
 }
 
 void PInternalServiceImpl::tablet_writer_add_batch_by_http(
         google::protobuf::RpcController* cntl_base, const ::doris::PEmptyRequest* request,
         PTabletWriterAddBatchResult* response, google::protobuf::Closure* done) {
-    PTabletWriterAddBatchRequest* request_raw = new PTabletWriterAddBatchRequest();
-    google::protobuf::Closure* done_raw =
-            new NewHttpClosure<PTabletWriterAddBatchRequest>(request_raw, done);
+    PTabletWriterAddBatchRequest* new_request = new PTabletWriterAddBatchRequest();
+    google::protobuf::Closure* new_done =
+            new NewHttpClosure<PTabletWriterAddBatchRequest>(new_request, done);
     brpc::Controller* cntl = static_cast<brpc::Controller*>(cntl_base);
-    Status st = attachment_extract_request_contain_tuple<PTabletWriterAddBatchRequest>(request_raw,
+    Status st = attachment_extract_request_contain_tuple<PTabletWriterAddBatchRequest>(new_request,
                                                                                        cntl);
     if (st.ok()) {
-        _tablet_writer_add_batch(cntl_base, request_raw, response, done_raw);
+        _tablet_writer_add_batch(cntl_base, new_request, response, new_done);
     } else {
         st.to_protobuf(response->mutable_status());
     }
@@ -689,22 +685,23 @@ void PInternalServiceImpl::transmit_block(google::protobuf::RpcController* cntl_
                                           PTransmitDataResult* response,
                                           google::protobuf::Closure* done) {
     // TODO(zxy) delete in 1.2 version
+    google::protobuf::Closure* new_done = new NewHttpClosure<PTransmitDataParams>(done);
     brpc::Controller* cntl = static_cast<brpc::Controller*>(cntl_base);
     attachment_transfer_request_block<PTransmitDataParams>(request, cntl);
 
-    _transmit_block(cntl_base, request, response, done, Status::OK());
+    _transmit_block(cntl_base, request, response, new_done, Status::OK());
 }
 
 void PInternalServiceImpl::transmit_block_by_http(google::protobuf::RpcController* cntl_base,
                                                   const PEmptyRequest* request,
                                                   PTransmitDataResult* response,
                                                   google::protobuf::Closure* done) {
-    PTransmitDataParams* request_raw = new PTransmitDataParams();
-    google::protobuf::Closure* done_raw =
-            new NewHttpClosure<PTransmitDataParams>(request_raw, done);
+    PTransmitDataParams* new_request = new PTransmitDataParams();
+    google::protobuf::Closure* new_done =
+            new NewHttpClosure<PTransmitDataParams>(new_request, done);
     brpc::Controller* cntl = static_cast<brpc::Controller*>(cntl_base);
-    Status st = attachment_extract_request_contain_block<PTransmitDataParams>(request_raw, cntl);
-    _transmit_block(cntl_base, request_raw, response, done_raw, st);
+    Status st = attachment_extract_request_contain_block<PTransmitDataParams>(new_request, cntl);
+    _transmit_block(cntl_base, new_request, response, new_done, st);
 }
 
 void PInternalServiceImpl::_transmit_block(google::protobuf::RpcController* cntl_base,
@@ -714,18 +711,10 @@ void PInternalServiceImpl::_transmit_block(google::protobuf::RpcController* cntl
                                            const Status& extract_st) {
     std::string query_id;
     TUniqueId finst_id;
-    std::shared_ptr<MemTrackerLimiter> transmit_tracker = nullptr;
     if (request->has_query_id()) {
         query_id = print_id(request->query_id());
         finst_id.__set_hi(request->finst_id().hi());
         finst_id.__set_lo(request->finst_id().lo());
-        // phmap `parallel_flat_hash_map` is not thread safe, so get query mem tracker may be null pointer.
-        transmit_tracker =
-                _exec_env->task_pool_mem_tracker_registry()->get_task_mem_tracker(query_id);
-    }
-    if (!transmit_tracker) {
-        query_id = "unkown_transmit_block";
-        transmit_tracker = std::make_shared<MemTrackerLimiter>(-1, "unkown_transmit_block");
     }
     VLOG_ROW << "transmit block: fragment_instance_id=" << print_id(request->finst_id())
              << " query_id=" << query_id << " node=" << request->node_id();
@@ -734,7 +723,6 @@ void PInternalServiceImpl::_transmit_block(google::protobuf::RpcController* cntl
     Status st;
     st.to_protobuf(response->mutable_status());
     if (extract_st.ok()) {
-        SCOPED_ATTACH_TASK(transmit_tracker, ThreadContext::TaskType::QUERY, query_id, finst_id);
         st = _exec_env->vstream_mgr()->transmit_block(request, &done);
         if (!st.ok()) {
             LOG(WARNING) << "transmit_block failed, message=" << st.get_error_msg()
diff --git a/be/src/util/mem_info.cpp b/be/src/util/mem_info.cpp
index ab17542100..9070b09527 100644
--- a/be/src/util/mem_info.cpp
+++ b/be/src/util/mem_info.cpp
@@ -104,11 +104,23 @@ void MemInfo::init() {
 
     bool is_percent = true;
     _s_mem_limit = ParseUtil::parse_mem_spec(config::mem_limit, -1, _s_physical_mem, &is_percent);
+    if (_s_mem_limit <= 0) {
+        LOG(WARNING) << "Failed to parse mem limit from '" + config::mem_limit + "'.";
+    }
+    if (_s_mem_limit > _s_physical_mem) {
+        LOG(WARNING) << "Memory limit " << PrettyPrinter::print(_s_mem_limit, TUnit::BYTES)
+                     << " exceeds physical memory of "
+                     << PrettyPrinter::print(_s_physical_mem, TUnit::BYTES)
+                     << ". Using physical memory instead";
+        _s_mem_limit = _s_physical_mem;
+    }
     _s_mem_limit_str = PrettyPrinter::print(_s_mem_limit, TUnit::BYTES);
     _s_hard_mem_limit =
             _s_physical_mem - std::max<int64_t>(209715200L, _s_physical_mem / 10); // 200M
 
-    LOG(INFO) << "Physical Memory: " << PrettyPrinter::print(_s_physical_mem, TUnit::BYTES);
+    LOG(INFO) << "Physical Memory: " << PrettyPrinter::print(_s_physical_mem, TUnit::BYTES)
+              << ", Mem Limit: " << _s_mem_limit_str
+              << ", origin config value: " << config::mem_limit;
     _s_initialized = true;
 }
 #else
diff --git a/be/src/util/ref_count_closure.h b/be/src/util/ref_count_closure.h
index b91d6225ed..c278dae9a3 100644
--- a/be/src/util/ref_count_closure.h
+++ b/be/src/util/ref_count_closure.h
@@ -37,6 +37,7 @@ public:
     bool unref() { return _refs.fetch_sub(1) == 1; }
 
     void Run() override {
+        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
         if (unref()) {
             delete this;
         }
diff --git a/be/src/vec/common/allocator.h b/be/src/vec/common/allocator.h
index 8f9eba9fae..36a11fc6f9 100644
--- a/be/src/vec/common/allocator.h
+++ b/be/src/vec/common/allocator.h
@@ -132,7 +132,7 @@ public:
             if (MAP_FAILED == buf) {
                 RELEASE_THREAD_MEM_TRACKER(size);
                 auto err = fmt::format("Allocator: Cannot mmap {}.", size);
-                doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err);
+                doris::MemTrackerLimiter::print_log_process_usage(err);
                 doris::vectorized::throwFromErrno(err,
                                                   doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY);
             }
@@ -142,7 +142,7 @@ public:
             doris::Chunk chunk;
             if (!doris::ChunkAllocator::instance()->allocate_align(size, &chunk)) {
                 auto err = fmt::format("Allocator: Cannot allocate chunk {}.", size);
-                doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err);
+                doris::MemTrackerLimiter::print_log_process_usage(err);
                 doris::vectorized::throwFromErrno(err,
                                                   doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY);
             }
@@ -157,7 +157,7 @@ public:
 
                 if (nullptr == buf) {
                     auto err = fmt::format("Allocator: Cannot malloc {}.", size);
-                    doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err);
+                    doris::MemTrackerLimiter::print_log_process_usage(err);
                     doris::vectorized::throwFromErrno(
                             err, doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY);
                 }
@@ -167,7 +167,7 @@ public:
 
                 if (0 != res) {
                     auto err = fmt::format("Cannot allocate memory (posix_memalign) {}.", size);
-                    doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err);
+                    doris::MemTrackerLimiter::print_log_process_usage(err);
                     doris::vectorized::throwFromErrno(
                             err, doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY, res);
                 }
@@ -183,7 +183,7 @@ public:
         if (size >= MMAP_THRESHOLD) {
             if (0 != munmap(buf, size)) {
                 auto err = fmt::format("Allocator: Cannot munmap {}.", size);
-                doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err);
+                doris::MemTrackerLimiter::print_log_process_usage(err);
                 doris::vectorized::throwFromErrno(err, doris::TStatusCode::VEC_CANNOT_MUNMAP);
             } else {
                 RELEASE_THREAD_MEM_TRACKER(size);
@@ -212,7 +212,7 @@ public:
             if (nullptr == new_buf) {
                 auto err =
                         fmt::format("Allocator: Cannot realloc from {} to {}.", old_size, new_size);
-                doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err);
+                doris::MemTrackerLimiter::print_log_process_usage(err);
                 doris::vectorized::throwFromErrno(err,
                                                   doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY);
             }
@@ -232,7 +232,7 @@ public:
                 RELEASE_THREAD_MEM_TRACKER(new_size - old_size);
                 auto err = fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.",
                                        old_size, new_size);
-                doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err);
+                doris::MemTrackerLimiter::print_log_process_usage(err);
                 doris::vectorized::throwFromErrno(err, doris::TStatusCode::VEC_CANNOT_MREMAP);
             }
 
diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp b/be/src/vec/exec/scan/scanner_scheduler.cpp
index dbb9cadb24..09578a2ba1 100644
--- a/be/src/vec/exec/scan/scanner_scheduler.cpp
+++ b/be/src/vec/exec/scan/scanner_scheduler.cpp
@@ -185,10 +185,8 @@ void ScannerScheduler::_scanner_scan(ScannerScheduler* scheduler, ScannerContext
                                      VScanner* scanner) {
     INIT_AND_SCOPE_REENTRANT_SPAN_IF(ctx->state()->enable_profile(), ctx->state()->get_tracer(),
                                      ctx->scan_span(), "VScanner::scan");
-    SCOPED_ATTACH_TASK(scanner->runtime_state()->scanner_mem_tracker(),
-                       ThreadContext::query_to_task_type(scanner->runtime_state()->query_type()),
-                       print_id(scanner->runtime_state()->query_id()),
-                       scanner->runtime_state()->fragment_instance_id());
+    SCOPED_ATTACH_TASK(scanner->runtime_state());
+    SCOPED_CONSUME_MEM_TRACKER(scanner->runtime_state()->scanner_mem_tracker().get());
     Thread::set_self_name("_scanner_scan");
     scanner->update_wait_worker_timer();
     // Do not use ScopedTimer. There is no guarantee that, the counter
diff --git a/be/src/vec/exec/volap_scan_node.cpp b/be/src/vec/exec/volap_scan_node.cpp
index 19ab63ae37..7e0abc1e77 100644
--- a/be/src/vec/exec/volap_scan_node.cpp
+++ b/be/src/vec/exec/volap_scan_node.cpp
@@ -394,10 +394,8 @@ void VOlapScanNode::transfer_thread(RuntimeState* state) {
 }
 
 void VOlapScanNode::scanner_thread(VOlapScanner* scanner) {
-    SCOPED_ATTACH_TASK(_runtime_state->scanner_mem_tracker(),
-                       ThreadContext::query_to_task_type(_runtime_state->query_type()),
-                       print_id(_runtime_state->query_id()),
-                       _runtime_state->fragment_instance_id());
+    SCOPED_ATTACH_TASK(_runtime_state);
+    SCOPED_CONSUME_MEM_TRACKER(_runtime_state->scanner_mem_tracker().get());
     Thread::set_self_name("volap_scanner");
     int64_t wait_time = scanner->update_wait_worker_timer();
     // Do not use ScopedTimer. There is no guarantee that, the counter
diff --git a/be/src/vec/runtime/vdata_stream_mgr.cpp b/be/src/vec/runtime/vdata_stream_mgr.cpp
index 511fbbe19d..4743d0933f 100644
--- a/be/src/vec/runtime/vdata_stream_mgr.cpp
+++ b/be/src/vec/runtime/vdata_stream_mgr.cpp
@@ -53,7 +53,7 @@ std::shared_ptr<VDataStreamRecvr> VDataStreamMgr::create_recvr(
     VLOG_FILE << "creating receiver for fragment=" << fragment_instance_id
               << ", node=" << dest_node_id;
     std::shared_ptr<VDataStreamRecvr> recvr(new VDataStreamRecvr(
-            this, row_desc, fragment_instance_id, dest_node_id, num_senders, is_merging,
+            this, state, row_desc, fragment_instance_id, dest_node_id, num_senders, is_merging,
             buffer_size, profile, sub_plan_query_statistics_recvr));
     uint32_t hash_value = get_hash_value(fragment_instance_id, dest_node_id);
     std::lock_guard<std::mutex> l(_lock);
diff --git a/be/src/vec/runtime/vdata_stream_recvr.cpp b/be/src/vec/runtime/vdata_stream_recvr.cpp
index a649816a1b..7a9a55ebc1 100644
--- a/be/src/vec/runtime/vdata_stream_recvr.cpp
+++ b/be/src/vec/runtime/vdata_stream_recvr.cpp
@@ -77,10 +77,7 @@ Status VDataStreamRecvr::SenderQueue::get_batch(Block** next_block) {
 
     if (!_pending_closures.empty()) {
         auto closure_pair = _pending_closures.front();
-        {
-            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
-            closure_pair.first->Run();
-        }
+        closure_pair.first->Run();
         _pending_closures.pop_front();
 
         closure_pair.second.stop();
@@ -224,11 +221,8 @@ void VDataStreamRecvr::SenderQueue::cancel() {
 
     {
         std::lock_guard<std::mutex> l(_lock);
-        {
-            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
-            for (auto closure_pair : _pending_closures) {
-                closure_pair.first->Run();
-            }
+        for (auto closure_pair : _pending_closures) {
+            closure_pair.first->Run();
         }
         _pending_closures.clear();
     }
@@ -242,11 +236,8 @@ void VDataStreamRecvr::SenderQueue::close() {
         std::lock_guard<std::mutex> l(_lock);
         _is_cancelled = true;
 
-        {
-            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
-            for (auto closure_pair : _pending_closures) {
-                closure_pair.first->Run();
-            }
+        for (auto closure_pair : _pending_closures) {
+            closure_pair.first->Run();
         }
         _pending_closures.clear();
     }
@@ -260,11 +251,12 @@ void VDataStreamRecvr::SenderQueue::close() {
 }
 
 VDataStreamRecvr::VDataStreamRecvr(
-        VDataStreamMgr* stream_mgr, const RowDescriptor& row_desc,
+        VDataStreamMgr* stream_mgr, RuntimeState* state, const RowDescriptor& row_desc,
         const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id, int num_senders,
         bool is_merging, int total_buffer_limit, RuntimeProfile* profile,
         std::shared_ptr<QueryStatisticsRecvr> sub_plan_query_statistics_recvr)
         : _mgr(stream_mgr),
+          _state(state),
           _fragment_instance_id(fragment_instance_id),
           _dest_node_id(dest_node_id),
           _total_buffer_limit(total_buffer_limit),
@@ -325,6 +317,8 @@ Status VDataStreamRecvr::create_merger(const std::vector<VExprContext*>& orderin
 
 void VDataStreamRecvr::add_block(const PBlock& pblock, int sender_id, int be_number,
                                  int64_t packet_seq, ::google::protobuf::Closure** done) {
+    SCOPED_ATTACH_TASK(_state->query_mem_tracker(), print_id(_state->query_id()),
+                       _fragment_instance_id);
     SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get());
     int use_sender_id = _is_merging ? sender_id : 0;
     _sender_queues[use_sender_id]->add_block(pblock, be_number, packet_seq, done);
diff --git a/be/src/vec/runtime/vdata_stream_recvr.h b/be/src/vec/runtime/vdata_stream_recvr.h
index 7372285125..254d85185c 100644
--- a/be/src/vec/runtime/vdata_stream_recvr.h
+++ b/be/src/vec/runtime/vdata_stream_recvr.h
@@ -28,6 +28,7 @@
 #include "common/status.h"
 #include "gen_cpp/Types_types.h"
 #include "runtime/descriptors.h"
+#include "runtime/query_fragments_ctx.h"
 #include "runtime/query_statistics.h"
 #include "util/runtime_profile.h"
 
@@ -50,7 +51,7 @@ class VExprContext;
 
 class VDataStreamRecvr {
 public:
-    VDataStreamRecvr(VDataStreamMgr* stream_mgr, const RowDescriptor& row_desc,
+    VDataStreamRecvr(VDataStreamMgr* stream_mgr, RuntimeState* state, const RowDescriptor& row_desc,
                      const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id,
                      int num_senders, bool is_merging, int total_buffer_limit,
                      RuntimeProfile* profile,
@@ -97,6 +98,8 @@ private:
     // DataStreamMgr instance used to create this recvr. (Not owned)
     VDataStreamMgr* _mgr;
 
+    RuntimeState* _state;
+
     // Fragment and node id of the destination exchange node this receiver is used by.
     TUniqueId _fragment_instance_id;
     PlanNodeId _dest_node_id;
diff --git a/be/src/vec/sink/vdata_stream_sender.cpp b/be/src/vec/sink/vdata_stream_sender.cpp
index 0ca087ea59..100341a197 100644
--- a/be/src/vec/sink/vdata_stream_sender.cpp
+++ b/be/src/vec/sink/vdata_stream_sender.cpp
@@ -139,7 +139,7 @@ Status VDataStreamSender::Channel::send_block(PBlock* block, bool eos) {
         _closure->ref();
     } else {
         RETURN_IF_ERROR(_wait_last_brpc());
-        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
+        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
         _closure->cntl.Reset();
     }
     VLOG_ROW << "Channel::send_batch() instance_id=" << _fragment_instance_id
@@ -162,7 +162,7 @@ Status VDataStreamSender::Channel::send_block(PBlock* block, bool eos) {
     if (_parent->_transfer_large_data_by_brpc && _brpc_request.has_block() &&
         _brpc_request.block().has_column_values() &&
         _brpc_request.ByteSizeLong() > MIN_HTTP_BRPC_SIZE) {
-        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
+        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
         Status st = request_embed_attachment_contain_block<PTransmitDataParams,
                                                            RefCountClosure<PTransmitDataResult>>(
                 &_brpc_request, _closure);
@@ -179,7 +179,7 @@ Status VDataStreamSender::Channel::send_block(PBlock* block, bool eos) {
         _brpc_http_stub->transmit_block_by_http(&_closure->cntl, nullptr, &_closure->result,
                                                 _closure);
     } else {
-        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
+        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
         _closure->cntl.http_request().Clear();
         _brpc_stub->transmit_block(&_closure->cntl, &_brpc_request, &_closure->result, _closure);
     }
diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp
index 126e65cf85..04ce7c54dc 100644
--- a/be/src/vec/sink/vtablet_sink.cpp
+++ b/be/src/vec/sink/vtablet_sink.cpp
@@ -355,7 +355,7 @@ void VNodeChannel::try_send_block(RuntimeState* state) {
         _add_block_closure->cntl.http_request().set_content_type("application/json");
 
         {
-            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
+            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
             _brpc_http_stub->tablet_writer_add_block_by_http(&_add_block_closure->cntl, NULL,
                                                              &_add_block_closure->result,
                                                              _add_block_closure);
@@ -363,7 +363,7 @@ void VNodeChannel::try_send_block(RuntimeState* state) {
     } else {
         _add_block_closure->cntl.http_request().Clear();
         {
-            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->bthread_mem_tracker());
+            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker());
             _stub->tablet_writer_add_block(&_add_block_closure->cntl, &request,
                                            &_add_block_closure->result, _add_block_closure);
         }
diff --git a/be/test/exec/broker_scan_node_test.cpp b/be/test/exec/broker_scan_node_test.cpp
index 8b8e376b34..5ca63810f2 100644
--- a/be/test/exec/broker_scan_node_test.cpp
+++ b/be/test/exec/broker_scan_node_test.cpp
@@ -40,7 +40,7 @@ class BrokerScanNodeTest : public testing::Test {
 public:
     BrokerScanNodeTest() : _runtime_state(TQueryGlobals()) {
         init();
-        _runtime_state.init_instance_mem_tracker();
+        _runtime_state.init_mem_trackers();
     }
     void init();
     static void SetUpTestCase() {
diff --git a/be/test/exec/broker_scanner_test.cpp b/be/test/exec/broker_scanner_test.cpp
index d750370aff..65387e575b 100644
--- a/be/test/exec/broker_scanner_test.cpp
+++ b/be/test/exec/broker_scanner_test.cpp
@@ -40,7 +40,7 @@ public:
     BrokerScannerTest() : _runtime_state(TQueryGlobals()) {
         init();
         _profile = _runtime_state.runtime_profile();
-        _runtime_state.init_instance_mem_tracker();
+        _runtime_state.init_mem_trackers();
     }
     void init();
 
diff --git a/be/test/exec/es_http_scan_node_test.cpp b/be/test/exec/es_http_scan_node_test.cpp
index 8dc0e71b79..7d0a63596b 100644
--- a/be/test/exec/es_http_scan_node_test.cpp
+++ b/be/test/exec/es_http_scan_node_test.cpp
@@ -40,7 +40,7 @@ namespace doris {
 class EsHttpScanNodeTest : public testing::Test {
 public:
     EsHttpScanNodeTest() : _runtime_state(TQueryGlobals()) {
-        _runtime_state.init_instance_mem_tracker();
+        _runtime_state.init_mem_trackers();
         TDescriptorTable t_desc_table;
 
         // table descriptors
diff --git a/be/test/exec/es_predicate_test.cpp b/be/test/exec/es_predicate_test.cpp
index 8cf009b991..5a91580b36 100644
--- a/be/test/exec/es_predicate_test.cpp
+++ b/be/test/exec/es_predicate_test.cpp
@@ -43,7 +43,7 @@ class RuntimeState;
 class EsPredicateTest : public testing::Test {
 public:
     EsPredicateTest() : _runtime_state(TQueryGlobals()) {
-        _runtime_state.init_instance_mem_tracker();
+        _runtime_state.init_mem_trackers();
         TDescriptorTable t_desc_table;
 
         // table descriptors
diff --git a/be/test/exec/hash_table_test.cpp b/be/test/exec/hash_table_test.cpp
index 2a5e8f62d6..3a5ab8e3f0 100644
--- a/be/test/exec/hash_table_test.cpp
+++ b/be/test/exec/hash_table_test.cpp
@@ -49,7 +49,7 @@ public:
     HashTableTest() {
         _mem_pool.reset(new MemPool());
         _state = _pool.add(new RuntimeState(TQueryGlobals()));
-        _state->init_instance_mem_tracker();
+        _state->init_mem_trackers();
         _state->_exec_env = ExecEnv::GetInstance();
     }
 
@@ -309,7 +309,7 @@ TEST_F(HashTableTest, GrowTableTest) {
     int64_t num_buckets = 4;
     HashTable hash_table(_build_expr, _probe_expr, 1, false, is_null_safe, initial_seed,
                          num_buckets);
-    EXPECT_FALSE(hash_table.mem_tracker()->limit_exceeded(mem_limit));
+    EXPECT_FALSE(hash_table.mem_tracker()->consumption() > mem_limit);
 
     for (int i = 0; i < LOOP_LESS_OR_MORE(1, 20); ++i) {
         for (int j = 0; j < num_to_add; ++build_row_val, ++j) {
@@ -323,7 +323,7 @@ TEST_F(HashTableTest, GrowTableTest) {
     LOG(INFO) << "consume:" << hash_table.mem_tracker()->consumption()
               << ",expected_size:" << expected_size;
 
-    EXPECT_EQ(LOOP_LESS_OR_MORE(0, 1), hash_table.mem_tracker()->limit_exceeded(mem_limit));
+    EXPECT_EQ(LOOP_LESS_OR_MORE(0, 1), hash_table.mem_tracker()->consumption() > mem_limit);
 
     // Validate that we can find the entries
     for (int i = 0; i < expected_size * 5; i += 100000) {
diff --git a/be/test/exec/json_scanner_test.cpp b/be/test/exec/json_scanner_test.cpp
index 5daa4ef19a..5fbc3b3c3d 100644
--- a/be/test/exec/json_scanner_test.cpp
+++ b/be/test/exec/json_scanner_test.cpp
@@ -42,7 +42,7 @@ class JsonScannerTest : public testing::Test {
 public:
     JsonScannerTest() : _runtime_state(TQueryGlobals()) {
         init();
-        _runtime_state.init_instance_mem_tracker();
+        _runtime_state.init_mem_trackers();
         _runtime_state._exec_env = ExecEnv::GetInstance();
     }
     void init();
diff --git a/be/test/exec/json_scanner_with_jsonpath_test.cpp b/be/test/exec/json_scanner_with_jsonpath_test.cpp
index 578cd10443..e54113c0bc 100644
--- a/be/test/exec/json_scanner_with_jsonpath_test.cpp
+++ b/be/test/exec/json_scanner_with_jsonpath_test.cpp
@@ -41,7 +41,7 @@ class JsonScannerWithJsonPathTest : public testing::Test {
 public:
     JsonScannerWithJsonPathTest() : _runtime_state(TQueryGlobals()) {
         init();
-        _runtime_state.init_instance_mem_tracker();
+        _runtime_state.init_mem_trackers();
         _runtime_state._exec_env = ExecEnv::GetInstance();
     }
     void init();
diff --git a/be/test/exec/orc_scanner_test.cpp b/be/test/exec/orc_scanner_test.cpp
index bcc4d79e53..7356458f47 100644
--- a/be/test/exec/orc_scanner_test.cpp
+++ b/be/test/exec/orc_scanner_test.cpp
@@ -44,7 +44,7 @@ class OrcScannerTest : public testing::Test {
 public:
     OrcScannerTest() : _runtime_state(TQueryGlobals()) {
         _profile = _runtime_state.runtime_profile();
-        _runtime_state.init_instance_mem_tracker();
+        _runtime_state.init_mem_trackers();
     }
 
     static void SetUpTestCase() {
diff --git a/be/test/exec/parquet_scanner_test.cpp b/be/test/exec/parquet_scanner_test.cpp
index 35d0f6e359..d330a44b3f 100644
--- a/be/test/exec/parquet_scanner_test.cpp
+++ b/be/test/exec/parquet_scanner_test.cpp
@@ -40,7 +40,7 @@ class ParquetScannerTest : public testing::Test {
 public:
     ParquetScannerTest() : _runtime_state(TQueryGlobals()) {
         init();
-        _runtime_state.init_instance_mem_tracker();
+        _runtime_state.init_mem_trackers();
     }
     void init();
     static void SetUpTestCase() {
diff --git a/be/test/exec/tablet_sink_test.cpp b/be/test/exec/tablet_sink_test.cpp
index 4e2d36bc88..566d9675be 100644
--- a/be/test/exec/tablet_sink_test.cpp
+++ b/be/test/exec/tablet_sink_test.cpp
@@ -25,7 +25,6 @@
 #include "runtime/decimalv2_value.h"
 #include "runtime/descriptor_helper.h"
 #include "runtime/exec_env.h"
-#include "runtime/memory/mem_tracker_task_pool.h"
 #include "runtime/result_queue_mgr.h"
 #include "runtime/row_batch.h"
 #include "runtime/runtime_state.h"
@@ -56,7 +55,6 @@ public:
         _env->_load_stream_mgr = new LoadStreamMgr();
         _env->_internal_client_cache = new BrpcClientCache<PBackendService_Stub>();
         _env->_function_client_cache = new BrpcClientCache<PFunctionService_Stub>();
-        _env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool();
         ThreadPoolBuilder("SendBatchThreadPool")
                 .set_min_threads(1)
                 .set_max_threads(5)
@@ -72,7 +70,6 @@ public:
         SAFE_DELETE(_env->_load_stream_mgr);
         SAFE_DELETE(_env->_master_info);
         SAFE_DELETE(_env->_thread_mgr);
-        SAFE_DELETE(_env->_task_pool_mem_tracker_registry);
         if (_server) {
             _server->Stop(100);
             _server->Join();
diff --git a/be/test/exprs/runtime_filter_test.cpp b/be/test/exprs/runtime_filter_test.cpp
index d4b66f295c..ee319ac6cb 100644
--- a/be/test/exprs/runtime_filter_test.cpp
+++ b/be/test/exprs/runtime_filter_test.cpp
@@ -42,7 +42,7 @@ public:
         exec_env = nullptr;
         _runtime_stat.reset(
                 new RuntimeState(_fragment_id, _query_options, _query_globals, exec_env));
-        _runtime_stat->init_instance_mem_tracker();
+        _runtime_stat->init_mem_trackers();
     }
     virtual void TearDown() { _obj_pool.clear(); }
 
diff --git a/be/test/olap/lru_cache_test.cpp b/be/test/olap/lru_cache_test.cpp
index c9fb0cbd93..bafe4e7288 100644
--- a/be/test/olap/lru_cache_test.cpp
+++ b/be/test/olap/lru_cache_test.cpp
@@ -21,6 +21,7 @@
 
 #include <vector>
 
+#include "runtime/memory/mem_tracker_limiter.h"
 #include "testutil/test_util.h"
 
 using namespace doris;
@@ -221,7 +222,7 @@ static void insert_LRUCache(LRUCache& cache, const CacheKey& key, int value,
                             CachePriority priority) {
     uint32_t hash = key.hash(key.data(), key.size(), 0);
     static std::unique_ptr<MemTrackerLimiter> lru_cache_tracker =
-            std::make_unique<MemTrackerLimiter>(-1, "TestLruCache");
+            std::make_unique<MemTrackerLimiter>(MemTrackerLimiter::Type::GLOBAL, "TestLruCache");
     cache.release(cache.insert(key, hash, EncodeValue(value), value, &deleter,
                                lru_cache_tracker.get(), priority));
 }
diff --git a/be/test/runtime/mem_limit_test.cpp b/be/test/runtime/mem_limit_test.cpp
index 811102b216..74b06033b0 100644
--- a/be/test/runtime/mem_limit_test.cpp
+++ b/be/test/runtime/mem_limit_test.cpp
@@ -24,7 +24,7 @@
 namespace doris {
 
 TEST(MemTrackerTest, SingleTrackerNoLimit) {
-    auto t = std::make_unique<MemTrackerLimiter>();
+    auto t = std::make_shared<MemTrackerLimiter>(MemTrackerLimiter::Type::GLOBAL);
     EXPECT_FALSE(t->has_limit());
     t->consume(10);
     EXPECT_EQ(t->consumption(), 10);
@@ -37,7 +37,8 @@ TEST(MemTrackerTest, SingleTrackerNoLimit) {
 }
 
 TEST(MemTestTest, SingleTrackerWithLimit) {
-    auto t = std::make_unique<MemTrackerLimiter>(11, "limit tracker");
+    auto t = std::make_unique<MemTrackerLimiter>(MemTrackerLimiter::Type::GLOBAL, "limit tracker",
+                                                 11);
     EXPECT_TRUE(t->has_limit());
     t->consume(10);
     EXPECT_EQ(t->consumption(), 10);
@@ -51,95 +52,4 @@ TEST(MemTestTest, SingleTrackerWithLimit) {
     t->release(5);
 }
 
-TEST(MemTestTest, TrackerHierarchy) {
-    auto p = std::make_shared<MemTrackerLimiter>(100);
-    auto c1 = std::make_unique<MemTrackerLimiter>(80, "c1", p);
-    auto c2 = std::make_unique<MemTrackerLimiter>(50, "c2", p);
-
-    // everything below limits
-    c1->consume(60);
-    EXPECT_EQ(c1->consumption(), 60);
-    EXPECT_FALSE(c1->limit_exceeded());
-    EXPECT_FALSE(c1->any_limit_exceeded());
-    EXPECT_EQ(c2->consumption(), 0);
-    EXPECT_FALSE(c2->limit_exceeded());
-    EXPECT_FALSE(c2->any_limit_exceeded());
-    EXPECT_EQ(p->consumption(), 60);
-    EXPECT_FALSE(p->limit_exceeded());
-    EXPECT_FALSE(p->any_limit_exceeded());
-
-    // p goes over limit
-    c2->consume(50);
-    EXPECT_EQ(c1->consumption(), 60);
-    EXPECT_FALSE(c1->limit_exceeded());
-    EXPECT_TRUE(c1->any_limit_exceeded());
-    EXPECT_EQ(c2->consumption(), 50);
-    EXPECT_FALSE(c2->limit_exceeded());
-    EXPECT_TRUE(c2->any_limit_exceeded());
-    EXPECT_EQ(p->consumption(), 110);
-    EXPECT_TRUE(p->limit_exceeded());
-
-    // c2 goes over limit, p drops below limit
-    c1->release(20);
-    c2->consume(10);
-    EXPECT_EQ(c1->consumption(), 40);
-    EXPECT_FALSE(c1->limit_exceeded());
-    EXPECT_FALSE(c1->any_limit_exceeded());
-    EXPECT_EQ(c2->consumption(), 60);
-    EXPECT_TRUE(c2->limit_exceeded());
-    EXPECT_TRUE(c2->any_limit_exceeded());
-    EXPECT_EQ(p->consumption(), 100);
-    EXPECT_FALSE(p->limit_exceeded());
-    c1->release(40);
-    c2->release(60);
-}
-
-TEST(MemTestTest, TrackerHierarchyTryConsume) {
-    auto p = std::make_shared<MemTrackerLimiter>(100);
-    auto c1 = std::make_unique<MemTrackerLimiter>(80, "c1", p);
-    auto c2 = std::make_unique<MemTrackerLimiter>(50, "c2", p);
-
-    // everything below limits
-    std::string err_msg = "";
-    bool consumption = c1->try_consume(60, err_msg);
-    EXPECT_EQ(consumption, true);
-    EXPECT_EQ(c1->consumption(), 60);
-    EXPECT_FALSE(c1->limit_exceeded());
-    EXPECT_FALSE(c1->any_limit_exceeded());
-    EXPECT_EQ(c2->consumption(), 0);
-    EXPECT_FALSE(c2->limit_exceeded());
-    EXPECT_FALSE(c2->any_limit_exceeded());
-    EXPECT_EQ(p->consumption(), 60);
-    EXPECT_FALSE(p->limit_exceeded());
-    EXPECT_FALSE(p->any_limit_exceeded());
-
-    // p goes over limit
-    consumption = c2->try_consume(50, err_msg);
-    EXPECT_EQ(consumption, false);
-    EXPECT_EQ(c1->consumption(), 60);
-    EXPECT_FALSE(c1->limit_exceeded());
-    EXPECT_FALSE(c1->any_limit_exceeded());
-    EXPECT_EQ(c2->consumption(), 0);
-    EXPECT_FALSE(c2->limit_exceeded());
-    EXPECT_FALSE(c2->any_limit_exceeded());
-    EXPECT_EQ(p->consumption(), 60);
-    EXPECT_FALSE(p->limit_exceeded());
-    EXPECT_FALSE(p->any_limit_exceeded());
-
-    // c2 goes over limit, p drops below limit
-    c1->release(20);
-    c2->consume(10);
-    EXPECT_EQ(c1->consumption(), 40);
-    EXPECT_FALSE(c1->limit_exceeded());
-    EXPECT_FALSE(c1->any_limit_exceeded());
-    EXPECT_EQ(c2->consumption(), 10);
-    EXPECT_FALSE(c2->limit_exceeded());
-    EXPECT_FALSE(c2->any_limit_exceeded());
-    EXPECT_EQ(p->consumption(), 50);
-    EXPECT_FALSE(p->limit_exceeded());
-
-    c1->release(40);
-    c2->release(10);
-}
-
 } // end namespace doris
diff --git a/be/test/runtime/test_env.cc b/be/test/runtime/test_env.cc
index db32d9a30e..dc2b53c9f6 100644
--- a/be/test/runtime/test_env.cc
+++ b/be/test/runtime/test_env.cc
@@ -24,7 +24,6 @@
 #include "olap/storage_engine.h"
 #include "runtime/bufferpool/buffer_pool.h"
 #include "runtime/fragment_mgr.h"
-#include "runtime/memory/mem_tracker_task_pool.h"
 #include "runtime/result_queue_mgr.h"
 #include "util/disk_info.h"
 #include "util/priority_thread_pool.hpp"
@@ -35,7 +34,6 @@ TestEnv::TestEnv() {
     // Some code will use ExecEnv::GetInstance(), so init the global ExecEnv singleton
     _exec_env = ExecEnv::GetInstance();
     _exec_env->_thread_mgr = new ThreadResourceMgr(2);
-    _exec_env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool();
     _exec_env->_disk_io_mgr = new DiskIoMgr(1, 1, 1, 10);
     _exec_env->disk_io_mgr()->init(-1);
     _exec_env->_scan_thread_pool = new PriorityThreadPool(1, 16, "ut_scan");
@@ -62,7 +60,6 @@ TestEnv::~TestEnv() {
     SAFE_DELETE(_exec_env->_buffer_pool);
     SAFE_DELETE(_exec_env->_scan_thread_pool);
     SAFE_DELETE(_exec_env->_disk_io_mgr);
-    SAFE_DELETE(_exec_env->_task_pool_mem_tracker_registry);
     SAFE_DELETE(_exec_env->_thread_mgr);
 
     if (_engine == StorageEngine::_s_instance) {
diff --git a/be/test/testutil/run_all_tests.cpp b/be/test/testutil/run_all_tests.cpp
index a1e53f7ed0..570f428720 100644
--- a/be/test/testutil/run_all_tests.cpp
+++ b/be/test/testutil/run_all_tests.cpp
@@ -28,16 +28,10 @@
 #include "util/mem_info.h"
 
 int main(int argc, char** argv) {
-    std::shared_ptr<doris::MemTrackerLimiter> process_mem_tracker =
-            std::make_shared<doris::MemTrackerLimiter>(-1, "Process");
     std::shared_ptr<doris::MemTrackerLimiter> orphan_mem_tracker =
-            std::make_shared<doris::MemTrackerLimiter>(-1, "Orphan", process_mem_tracker);
-    std::shared_ptr<doris::MemTrackerLimiter> nursery_mem_tracker =
-            std::make_shared<doris::MemTrackerLimiter>(-1, "Nursery", orphan_mem_tracker);
-    std::shared_ptr<doris::MemTrackerLimiter> bthread_mem_tracker =
-            std::make_shared<doris::MemTrackerLimiter>(-1, "Bthread", orphan_mem_tracker);
-    doris::ExecEnv::GetInstance()->set_global_mem_tracker(process_mem_tracker, orphan_mem_tracker,
-                                                          nursery_mem_tracker, bthread_mem_tracker);
+            std::make_shared<doris::MemTrackerLimiter>(doris::MemTrackerLimiter::Type::GLOBAL,
+                                                       "Orphan");
+    doris::ExecEnv::GetInstance()->set_orphan_mem_tracker(orphan_mem_tracker);
     doris::thread_context()->_thread_mem_tracker_mgr->init();
     doris::TabletSchemaCache::create_global_schema_cache();
     doris::StoragePageCache::create_global_cache(1 << 30, 10);
diff --git a/be/test/util/arrow/arrow_work_flow_test.cpp b/be/test/util/arrow/arrow_work_flow_test.cpp
index 0475d479a4..7acd5e58b7 100644
--- a/be/test/util/arrow/arrow_work_flow_test.cpp
+++ b/be/test/util/arrow/arrow_work_flow_test.cpp
@@ -29,7 +29,6 @@
 #include "gen_cpp/Types_types.h"
 #include "olap/row.h"
 #include "runtime/exec_env.h"
-#include "runtime/memory/mem_tracker_task_pool.h"
 #include "runtime/result_queue_mgr.h"
 #include "runtime/row_batch.h"
 #include "runtime/runtime_state.h"
@@ -66,7 +65,6 @@ protected:
         if (_exec_env) {
             delete _exec_env->_result_queue_mgr;
             delete _exec_env->_thread_mgr;
-            delete _exec_env->_task_pool_mem_tracker_registry;
         }
     }
 
@@ -92,7 +90,6 @@ void ArrowWorkFlowTest::init() {
 void ArrowWorkFlowTest::init_runtime_state() {
     _exec_env->_result_queue_mgr = new ResultQueueMgr();
     _exec_env->_thread_mgr = new ThreadResourceMgr();
-    _exec_env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool();
     _exec_env->_is_init = true;
     TQueryOptions query_options;
     query_options.batch_size = 1024;
@@ -100,7 +97,7 @@ void ArrowWorkFlowTest::init_runtime_state() {
     query_id.lo = 10;
     query_id.hi = 100;
     _state = new RuntimeState(query_id, query_options, TQueryGlobals(), _exec_env);
-    _state->init_instance_mem_tracker();
+    _state->init_mem_trackers();
     _state->set_desc_tbl(_desc_tbl);
     _state->_load_dir = "./test_run/output/";
     _state->init_mem_trackers(TUniqueId());
diff --git a/be/test/vec/exec/parquet/parquet_reader_test.cpp b/be/test/vec/exec/parquet/parquet_reader_test.cpp
index b2288338b3..480460f394 100644
--- a/be/test/vec/exec/parquet/parquet_reader_test.cpp
+++ b/be/test/vec/exec/parquet/parquet_reader_test.cpp
@@ -109,7 +109,7 @@ TEST_F(ParquetReaderTest, normal) {
     p_reader->set_file_reader(reader);
     RuntimeState runtime_state((TQueryGlobals()));
     runtime_state.set_desc_tbl(desc_tbl);
-    runtime_state.init_instance_mem_tracker();
+    runtime_state.init_mem_trackers();
 
     std::unordered_map<std::string, ColumnValueRangeType> colname_to_value_range;
     p_reader->init_reader(&colname_to_value_range);
diff --git a/be/test/vec/exec/vbroker_scan_node_test.cpp b/be/test/vec/exec/vbroker_scan_node_test.cpp
index 195f468841..5da2f255f3 100644
--- a/be/test/vec/exec/vbroker_scan_node_test.cpp
+++ b/be/test/vec/exec/vbroker_scan_node_test.cpp
@@ -45,7 +45,7 @@ class VBrokerScanNodeTest : public testing::Test {
 public:
     VBrokerScanNodeTest() : _runtime_state(TQueryGlobals()) {
         init();
-        _runtime_state.init_instance_mem_tracker();
+        _runtime_state.init_mem_trackers();
         _runtime_state._query_options.enable_vectorized_engine = true;
     }
     void init();
diff --git a/be/test/vec/exec/vbroker_scanner_test.cpp b/be/test/vec/exec/vbroker_scanner_test.cpp
index 5cb9afc4b2..225eeeb16b 100644
--- a/be/test/vec/exec/vbroker_scanner_test.cpp
+++ b/be/test/vec/exec/vbroker_scanner_test.cpp
@@ -40,7 +40,7 @@ public:
     VBrokerScannerTest() : _runtime_state(TQueryGlobals()) {
         init();
         _profile = _runtime_state.runtime_profile();
-        _runtime_state.init_instance_mem_tracker();
+        _runtime_state.init_mem_trackers();
 
         TUniqueId unique_id;
         TQueryOptions query_options;
diff --git a/be/test/vec/exec/vjson_scanner_test.cpp b/be/test/vec/exec/vjson_scanner_test.cpp
index f06b8233d6..55775a9555 100644
--- a/be/test/vec/exec/vjson_scanner_test.cpp
+++ b/be/test/vec/exec/vjson_scanner_test.cpp
@@ -47,7 +47,7 @@ class VJsonScannerTest : public testing::Test {
 public:
     VJsonScannerTest() : _runtime_state(TQueryGlobals()) {
         init();
-        _runtime_state.init_instance_mem_tracker();
+        _runtime_state.init_mem_trackers();
 
         TUniqueId unique_id;
         TQueryOptions query_options;
diff --git a/be/test/vec/exec/vorc_scanner_test.cpp b/be/test/vec/exec/vorc_scanner_test.cpp
index 7e9aff95ba..0362ebb080 100644
--- a/be/test/vec/exec/vorc_scanner_test.cpp
+++ b/be/test/vec/exec/vorc_scanner_test.cpp
@@ -46,7 +46,7 @@ class VOrcScannerTest : public testing::Test {
 public:
     VOrcScannerTest() : _runtime_state(TQueryGlobals()) {
         _profile = _runtime_state.runtime_profile();
-        _runtime_state.init_instance_mem_tracker();
+        _runtime_state.init_mem_trackers();
         _runtime_state._query_options.enable_vectorized_engine = true;
     }
     ~VOrcScannerTest() {}
diff --git a/be/test/vec/exec/vparquet_scanner_test.cpp b/be/test/vec/exec/vparquet_scanner_test.cpp
index bb1bb2c7f3..c08a69a005 100644
--- a/be/test/vec/exec/vparquet_scanner_test.cpp
+++ b/be/test/vec/exec/vparquet_scanner_test.cpp
@@ -41,7 +41,7 @@ class VParquetScannerTest : public testing::Test {
 public:
     VParquetScannerTest() : _runtime_state(TQueryGlobals()) {
         init();
-        _runtime_state.init_instance_mem_tracker();
+        _runtime_state.init_mem_trackers();
         _runtime_state._query_options.enable_vectorized_engine = true;
     }
     ~VParquetScannerTest() {}
diff --git a/be/test/vec/exec/vtablet_sink_test.cpp b/be/test/vec/exec/vtablet_sink_test.cpp
index ae1f615035..4e75c6808e 100644
--- a/be/test/vec/exec/vtablet_sink_test.cpp
+++ b/be/test/vec/exec/vtablet_sink_test.cpp
@@ -28,7 +28,6 @@
 #include "runtime/decimalv2_value.h"
 #include "runtime/descriptor_helper.h"
 #include "runtime/exec_env.h"
-#include "runtime/memory/mem_tracker_task_pool.h"
 #include "runtime/result_queue_mgr.h"
 #include "runtime/runtime_state.h"
 #include "runtime/stream_load/load_stream_mgr.h"
@@ -157,7 +156,6 @@ public:
         _env->_load_stream_mgr = new LoadStreamMgr();
         _env->_internal_client_cache = new BrpcClientCache<PBackendService_Stub>();
         _env->_function_client_cache = new BrpcClientCache<PFunctionService_Stub>();
-        _env->_task_pool_mem_tracker_registry = new MemTrackerTaskPool();
         ThreadPoolBuilder("SendBatchThreadPool")
                 .set_min_threads(1)
                 .set_max_threads(5)
@@ -173,7 +171,6 @@ public:
         SAFE_DELETE(_env->_load_stream_mgr);
         SAFE_DELETE(_env->_master_info);
         SAFE_DELETE(_env->_thread_mgr);
-        SAFE_DELETE(_env->_task_pool_mem_tracker_registry);
         if (_server) {
             _server->Stop(100);
             _server->Join();
diff --git a/be/test/vec/exprs/vexpr_test.cpp b/be/test/vec/exprs/vexpr_test.cpp
index 27c76c320f..593cfc932e 100644
--- a/be/test/vec/exprs/vexpr_test.cpp
+++ b/be/test/vec/exprs/vexpr_test.cpp
@@ -69,7 +69,7 @@ TEST(TEST_VEXPR, ABSTEST) {
 
     doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(),
                                      doris::TQueryGlobals(), nullptr);
-    runtime_stat.init_instance_mem_tracker();
+    runtime_stat.init_mem_trackers();
     runtime_stat.set_desc_tbl(desc_tbl);
     context->prepare(&runtime_stat, row_desc);
     context->open(&runtime_stat);
@@ -112,7 +112,7 @@ TEST(TEST_VEXPR, ABSTEST2) {
 
     doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(),
                                      doris::TQueryGlobals(), nullptr);
-    runtime_stat.init_instance_mem_tracker();
+    runtime_stat.init_mem_trackers();
     DescriptorTbl desc_tbl;
     desc_tbl._slot_desc_map[0] = tuple_desc->slots()[0];
     runtime_stat.set_desc_tbl(&desc_tbl);
diff --git a/be/test/vec/runtime/vdata_stream_test.cpp b/be/test/vec/runtime/vdata_stream_test.cpp
index 175fa64ee3..539dd51d74 100644
--- a/be/test/vec/runtime/vdata_stream_test.cpp
+++ b/be/test/vec/runtime/vdata_stream_test.cpp
@@ -114,7 +114,7 @@ TEST_F(VDataStreamTest, BasicTest) {
 
     doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(),
                                      doris::TQueryGlobals(), nullptr);
-    runtime_stat.init_instance_mem_tracker();
+    runtime_stat.init_mem_trackers();
     runtime_stat.set_desc_tbl(desc_tbl);
     runtime_stat.set_be_number(1);
     runtime_stat._exec_env = _object_pool.add(new ExecEnv);