diff --git a/be/src/runtime/mem_pool.cpp b/be/src/runtime/mem_pool.cpp
index 8b9a6e72de..76b2c941ad 100644
--- a/be/src/runtime/mem_pool.cpp
+++ b/be/src/runtime/mem_pool.cpp
@@ -26,248 +26,239 @@
 
 #include "common/names.h"
 
-using namespace doris;
+namespace doris {
 
 #define MEM_POOL_POISON (0x66aa77bb)
 
 const int MemPool::INITIAL_CHUNK_SIZE;
 const int MemPool::MAX_CHUNK_SIZE;
 
-const char* MemPool::LLVM_CLASS_NAME = "class.impala::MemPool";
+const char* MemPool::LLVM_CLASS_NAME = "class.doris::MemPool";
 const int MemPool::DEFAULT_ALIGNMENT;
-uint32_t MemPool::zero_length_region_  = MEM_POOL_POISON;
-
-MemPool::MemPool(MemTracker* mem_tracker)
-  : current_chunk_idx_(-1),
-    next_chunk_size_(INITIAL_CHUNK_SIZE),
-    total_allocated_bytes_(0),
-    total_reserved_bytes_(0),
-    peak_allocated_bytes_(0),
-    mem_tracker_(mem_tracker) {
-  DCHECK(mem_tracker != NULL);
-  DCHECK_EQ(zero_length_region_, MEM_POOL_POISON);
-}
+uint32_t MemPool::k_zero_length_region_ alignas(std::max_align_t) = MEM_POOL_POISON;
 
 MemPool::ChunkInfo::ChunkInfo(int64_t size, uint8_t* buf)
-  : data(buf),
-    size(size),
-    allocated_bytes(0) {
-   DorisMetrics::memory_pool_bytes_total.increment(size);
+        : data(buf),
+        size(size),
+        allocated_bytes(0) {
+    DorisMetrics::memory_pool_bytes_total.increment(size);
 }
 
 MemPool::~MemPool() {
-  int64_t total_bytes_released = 0;
-  for (size_t i = 0; i < chunks_.size(); ++i) {
-    total_bytes_released += chunks_[i].size;
-    free(chunks_[i].data);
-  }
- 
-  mem_tracker_->release(total_bytes_released);
-  //TODO chenhao , check all using MemPool and open it
-  //DCHECK(chunks_.empty()) << "Must call FreeAll() or AcquireData() for this pool";
-
-  DorisMetrics::memory_pool_bytes_total.increment(-total_bytes_released);
-
-  //DCHECK_EQ(zero_length_region_, MEM_POOL_POISON);
+    int64_t total_bytes_released = 0;
+    for (auto& chunk : chunks_) {
+        total_bytes_released += chunk.size;
+        free(chunk.data);
+    }
+    mem_tracker_->release(total_bytes_released);
+    DorisMetrics::memory_pool_bytes_total.increment(-total_bytes_released);
 }
 
 void MemPool::clear() {
-  current_chunk_idx_ = -1;
-  for (auto& chunk: chunks_) {
-    chunk.allocated_bytes = 0;
-    ASAN_POISON_MEMORY_REGION(chunk.data, chunk.size);
-  }
-  total_allocated_bytes_ = 0;
-  DCHECK(CheckIntegrity(false));
+    current_chunk_idx_ = -1;
+    for (auto& chunk: chunks_) {
+        chunk.allocated_bytes = 0;
+        ASAN_POISON_MEMORY_REGION(chunk.data, chunk.size);
+    }
+    total_allocated_bytes_ = 0;
+    DCHECK(check_integrity(false));
 }
 
 void MemPool::free_all() {
-  int64_t total_bytes_released = 0;
-  for (auto& chunk: chunks_) {
-    total_bytes_released += chunk.size;
-    free(chunk.data);
-  }
-  chunks_.clear();
-  next_chunk_size_ = INITIAL_CHUNK_SIZE;
-  current_chunk_idx_ = -1;
-  total_allocated_bytes_ = 0;
-  total_reserved_bytes_ = 0;
+    int64_t total_bytes_released = 0;
+    for (auto& chunk: chunks_) {
+        total_bytes_released += chunk.size;
+        free(chunk.data);
+    }
+    chunks_.clear();
+    next_chunk_size_ = INITIAL_CHUNK_SIZE;
+    current_chunk_idx_ = -1;
+    total_allocated_bytes_ = 0;
+    total_reserved_bytes_ = 0;
 
-  mem_tracker_->release(total_bytes_released);
-  DorisMetrics::memory_pool_bytes_total.increment(-total_bytes_released);
+    mem_tracker_->release(total_bytes_released);
+    DorisMetrics::memory_pool_bytes_total.increment(-total_bytes_released);
 }
 
-bool MemPool::FindChunk(size_t min_size, bool check_limits) {
-  // Try to allocate from a free chunk. We may have free chunks after the current chunk
-  // if Clear() was called. The current chunk may be free if ReturnPartialAllocation()
-  // was called. The first free chunk (if there is one) can therefore be either the
-  // current chunk or the chunk immediately after the current chunk.
-  int first_free_idx;
-  if (current_chunk_idx_ == -1) {
-    first_free_idx = 0;
-  } else {
-    DCHECK_GE(current_chunk_idx_, 0);
-    first_free_idx = current_chunk_idx_ +
-        (chunks_[current_chunk_idx_].allocated_bytes > 0);
-  }
-  for (int idx = current_chunk_idx_ + 1; idx < chunks_.size(); ++idx) {
-    // All chunks after 'current_chunk_idx_' should be free.
-    DCHECK_EQ(chunks_[idx].allocated_bytes, 0);
-    if (chunks_[idx].size >= min_size) {
-      // This chunk is big enough. Move it before the other free chunks.
-      if (idx != first_free_idx) std::swap(chunks_[idx], chunks_[first_free_idx]);
-      current_chunk_idx_ = first_free_idx;
-      DCHECK(CheckIntegrity(true));
-      return true;
+bool MemPool::find_chunk(size_t min_size, bool check_limits) {
+    // Try to allocate from a free chunk. We may have free chunks after the current chunk
+    // if Clear() was called. The current chunk may be free if ReturnPartialAllocation()
+    // was called. The first free chunk (if there is one) can therefore be either the
+    // current chunk or the chunk immediately after the current chunk.
+    int first_free_idx;
+    if (current_chunk_idx_ == -1) {
+        first_free_idx = 0;
+    } else {
+        DCHECK_GE(current_chunk_idx_, 0);
+        first_free_idx = current_chunk_idx_ +
+            (chunks_[current_chunk_idx_].allocated_bytes > 0);
+    }
+    for (int idx = current_chunk_idx_ + 1; idx < chunks_.size(); ++idx) {
+        // All chunks after 'current_chunk_idx_' should be free.
+        DCHECK_EQ(chunks_[idx].allocated_bytes, 0);
+        if (chunks_[idx].size >= min_size) {
+            // This chunk is big enough. Move it before the other free chunks.
+            if (idx != first_free_idx) std::swap(chunks_[idx], chunks_[first_free_idx]);
+            current_chunk_idx_ = first_free_idx;
+            DCHECK(check_integrity(true));
+            return true;
+        }
     }
-  }
 
-  // Didn't find a big enough free chunk - need to allocate new chunk.
-  size_t chunk_size = 0;
-  DCHECK_LE(next_chunk_size_, MAX_CHUNK_SIZE);
+    // Didn't find a big enough free chunk - need to allocate new chunk.
+    size_t chunk_size = 0;
+    DCHECK_LE(next_chunk_size_, MAX_CHUNK_SIZE);
 
-  if (config::disable_mem_pools) {
-    // Disable pooling by sizing the chunk to fit only this allocation.
-    // Make sure the alignment guarantees are respected.
-    chunk_size = std::max<size_t>(min_size, alignof(max_align_t));
-  } else {
-    DCHECK_GE(next_chunk_size_, INITIAL_CHUNK_SIZE);
-    chunk_size = max<size_t>(min_size, next_chunk_size_);
-  }
+    if (config::disable_mem_pools) {
+        // Disable pooling by sizing the chunk to fit only this allocation.
+        // Make sure the alignment guarantees are respected.
+        chunk_size = std::max<size_t>(min_size, alignof(max_align_t));
+    } else {
+        DCHECK_GE(next_chunk_size_, INITIAL_CHUNK_SIZE);
+        chunk_size = max<size_t>(min_size, next_chunk_size_);
+    }
 
-  if (check_limits) {
-    if (!mem_tracker_->try_consume(chunk_size)) return false;
-  } else {
-    mem_tracker_->consume(chunk_size);
-  }
+    if (check_limits) {
+        if (!mem_tracker_->try_consume(chunk_size)) return false;
+    } else {
+        mem_tracker_->consume(chunk_size);
+    }
 
-  // Allocate a new chunk. Return early if malloc fails.
-  uint8_t* buf = reinterpret_cast<uint8_t*>(malloc(chunk_size));
-  if (UNLIKELY(buf == NULL)) {
-    mem_tracker_->release(chunk_size);
-    return false;
-  }
+    // Allocate a new chunk. Return early if malloc fails.
+    uint8_t* buf = reinterpret_cast<uint8_t*>(malloc(chunk_size));
+    if (UNLIKELY(buf == nullptr)) {
+        mem_tracker_->release(chunk_size);
+        return false;
+    }
 
-  ASAN_POISON_MEMORY_REGION(buf, chunk_size);
+    ASAN_POISON_MEMORY_REGION(buf, chunk_size);
 
-  // Put it before the first free chunk. If no free chunks, it goes at the end.
-  if (first_free_idx == static_cast<int>(chunks_.size())) {
-    chunks_.push_back(ChunkInfo(chunk_size, buf));
-  } else {
-    chunks_.insert(chunks_.begin() + first_free_idx, ChunkInfo(chunk_size, buf));
-  }
-  current_chunk_idx_ = first_free_idx;
-  total_reserved_bytes_ += chunk_size;
-  // Don't increment the chunk size until the allocation succeeds: if an attempted
-  // large allocation fails we don't want to increase the chunk size further.
-  next_chunk_size_ = static_cast<int>(min<int64_t>(chunk_size * 2, MAX_CHUNK_SIZE));
+    // Put it before the first free chunk. If no free chunks, it goes at the end.
+    if (first_free_idx == static_cast<int>(chunks_.size())) {
+        chunks_.emplace_back(chunk_size, buf);
+    } else {
+        chunks_.insert(chunks_.begin() + first_free_idx, ChunkInfo(chunk_size, buf));
+    }
+    current_chunk_idx_ = first_free_idx;
+    total_reserved_bytes_ += chunk_size;
+    // Don't increment the chunk size until the allocation succeeds: if an attempted
+    // large allocation fails we don't want to increase the chunk size further.
+    next_chunk_size_ = static_cast<int>(min<int64_t>(chunk_size * 2, MAX_CHUNK_SIZE));
 
-  DCHECK(CheckIntegrity(true));
-  return true;
+    DCHECK(check_integrity(true));
+    return true;
 }
 
 void MemPool::acquire_data(MemPool* src, bool keep_current) {
-  DCHECK(src->CheckIntegrity(false));
-  int num_acquired_chunks;
-  if (keep_current) {
-    num_acquired_chunks = src->current_chunk_idx_;
-  } else if (src->GetFreeOffset() == 0) {
-    // nothing in the last chunk
-    num_acquired_chunks = src->current_chunk_idx_;
-  } else {
-    num_acquired_chunks = src->current_chunk_idx_ + 1;
-  }
-
-  if (num_acquired_chunks <= 0) {
-    if (!keep_current) src->free_all();
-    return;
-  }
-
-  vector<ChunkInfo>::iterator end_chunk = src->chunks_.begin() + num_acquired_chunks;
-  int64_t total_transfered_bytes = 0;
-  for (vector<ChunkInfo>::iterator i = src->chunks_.begin(); i != end_chunk; ++i) {
-    total_transfered_bytes += i->size;
-  }
-  src->total_reserved_bytes_ -= total_transfered_bytes;
-  total_reserved_bytes_ += total_transfered_bytes;
-
-  // Skip unnecessary atomic ops if the mem_trackers are the same.
-  if (src->mem_tracker_ != mem_tracker_) {
-    src->mem_tracker_->release(total_transfered_bytes);
-    mem_tracker_->consume(total_transfered_bytes);
-  }
-
-  // insert new chunks after current_chunk_idx_
-  vector<ChunkInfo>::iterator insert_chunk = chunks_.begin() + current_chunk_idx_ + 1;
-  chunks_.insert(insert_chunk, src->chunks_.begin(), end_chunk);
-  src->chunks_.erase(src->chunks_.begin(), end_chunk);
-  current_chunk_idx_ += num_acquired_chunks;
-
-  if (keep_current) {
-    src->current_chunk_idx_ = 0;
-    DCHECK(src->chunks_.size() == 1 || src->chunks_[1].allocated_bytes == 0);
-    total_allocated_bytes_ += src->total_allocated_bytes_ - src->GetFreeOffset();
-    src->total_allocated_bytes_ = src->GetFreeOffset();
-  } else {
-    src->current_chunk_idx_ = -1;
-    total_allocated_bytes_ += src->total_allocated_bytes_;
-    src->total_allocated_bytes_ = 0;
-  }
-
-  peak_allocated_bytes_ = std::max(total_allocated_bytes_, peak_allocated_bytes_);
-
-  if (!keep_current) src->free_all();
-  DCHECK(src->CheckIntegrity(false));
-  DCHECK(CheckIntegrity(false));
-}
-
-string MemPool::DebugString() {
-  stringstream out;
-  char str[16];
-  out << "MemPool(#chunks=" << chunks_.size() << " [";
-  for (int i = 0; i < chunks_.size(); ++i) {
-    sprintf(str, "0x%lx=", reinterpret_cast<size_t>(chunks_[i].data));
-    out << (i > 0 ? " " : "")
-        << str
-        << chunks_[i].size
-        << "/" << chunks_[i].allocated_bytes;
-  }
-  out << "] current_chunk=" << current_chunk_idx_
-      << " total_sizes=" << get_total_chunk_sizes()
-      << " total_alloc=" << total_allocated_bytes_
-      << ")";
-  return out.str();
-}
-
-int64_t MemPool::get_total_chunk_sizes() const {
-  int64_t result = 0;
-  for (int i = 0; i < chunks_.size(); ++i) {
-    result += chunks_[i].size;
-  }
-  return result;
-}
-
-bool MemPool::CheckIntegrity(bool check_current_chunk_empty) {
-  DCHECK_EQ(zero_length_region_, MEM_POOL_POISON);
-  DCHECK_LT(current_chunk_idx_, static_cast<int>(chunks_.size()));
-
-  // Without pooling, there are way too many chunks and this takes too long.
-  if (config::disable_mem_pools) return true;
-
-  // check that current_chunk_idx_ points to the last chunk with allocated data
-  int64_t total_allocated = 0;
-  for (int i = 0; i < chunks_.size(); ++i) {
-    DCHECK_GT(chunks_[i].size, 0);
-    if (i < current_chunk_idx_) {
-      DCHECK_GT(chunks_[i].allocated_bytes, 0);
-    } else if (i == current_chunk_idx_) {
-      DCHECK_GE(chunks_[i].allocated_bytes, 0);
-      if (check_current_chunk_empty) DCHECK_EQ(chunks_[i].allocated_bytes, 0);
+    DCHECK(src->check_integrity(false));
+    int num_acquired_chunks;
+    if (keep_current) {
+        num_acquired_chunks = src->current_chunk_idx_;
+    } else if (src->get_free_offset() == 0) {
+        // nothing in the last chunk
+        num_acquired_chunks = src->current_chunk_idx_;
     } else {
-      DCHECK_EQ(chunks_[i].allocated_bytes, 0);
+        num_acquired_chunks = src->current_chunk_idx_ + 1;
     }
-    total_allocated += chunks_[i].allocated_bytes;
-  }
-  DCHECK_EQ(total_allocated, total_allocated_bytes_);
-  return true;
+
+    if (num_acquired_chunks <= 0) {
+        if (!keep_current) src->free_all();
+        return;
+    }
+
+    auto end_chunk = src->chunks_.begin() + num_acquired_chunks;
+    int64_t total_transfered_bytes = 0;
+    for (auto i = src->chunks_.begin(); i != end_chunk; ++i) {
+        total_transfered_bytes += i->size;
+    }
+    src->total_reserved_bytes_ -= total_transfered_bytes;
+    total_reserved_bytes_ += total_transfered_bytes;
+
+    // Skip unnecessary atomic ops if the mem_trackers are the same.
+    if (src->mem_tracker_ != mem_tracker_) {
+        src->mem_tracker_->release(total_transfered_bytes);
+        mem_tracker_->consume(total_transfered_bytes);
+    }
+
+    // insert new chunks after current_chunk_idx_
+    auto insert_chunk = chunks_.begin() + current_chunk_idx_ + 1;
+    chunks_.insert(insert_chunk, src->chunks_.begin(), end_chunk);
+    src->chunks_.erase(src->chunks_.begin(), end_chunk);
+    current_chunk_idx_ += num_acquired_chunks;
+
+    if (keep_current) {
+        src->current_chunk_idx_ = 0;
+        DCHECK(src->chunks_.size() == 1 || src->chunks_[1].allocated_bytes == 0);
+        total_allocated_bytes_ += src->total_allocated_bytes_ - src->get_free_offset();
+        src->total_allocated_bytes_ = src->get_free_offset();
+    } else {
+        src->current_chunk_idx_ = -1;
+        total_allocated_bytes_ += src->total_allocated_bytes_;
+        src->total_allocated_bytes_ = 0;
+    }
+
+    peak_allocated_bytes_ = std::max(total_allocated_bytes_, peak_allocated_bytes_);
+
+    if (!keep_current) src->free_all();
+    DCHECK(src->check_integrity(false));
+    DCHECK(check_integrity(false));
+}
+
+void MemPool::exchange_data(MemPool* other) {
+    int64_t delta_size = other->total_reserved_bytes_ - total_reserved_bytes_;
+
+    std::swap(current_chunk_idx_, other->current_chunk_idx_);
+    std::swap(next_chunk_size_, other->next_chunk_size_);
+    std::swap(total_allocated_bytes_, other->total_allocated_bytes_);
+    std::swap(total_reserved_bytes_, other->total_reserved_bytes_);
+    std::swap(peak_allocated_bytes_, other->peak_allocated_bytes_);
+    std::swap(chunks_, other->chunks_);
+
+    // update MemTracker
+    mem_tracker_->consume(delta_size);
+    other->mem_tracker_->release(delta_size);
+}
+
+string MemPool::debug_string() {
+    stringstream out;
+    char str[16];
+    out << "MemPool(#chunks=" << chunks_.size() << " [";
+    for (int i = 0; i < chunks_.size(); ++i) {
+        sprintf(str, "0x%lx=", reinterpret_cast<size_t>(chunks_[i].data));
+        out << (i > 0 ? " " : "")
+            << str
+            << chunks_[i].size
+            << "/" << chunks_[i].allocated_bytes;
+    }
+    out << "] current_chunk=" << current_chunk_idx_
+        << " total_sizes=" << total_reserved_bytes_
+        << " total_alloc=" << total_allocated_bytes_
+        << ")";
+    return out.str();
+}
+
+bool MemPool::check_integrity(bool check_current_chunk_empty) {
+    DCHECK_LT(current_chunk_idx_, static_cast<int>(chunks_.size()));
+
+    // Without pooling, there are way too many chunks and this takes too long.
+    if (config::disable_mem_pools) return true;
+
+    // check that current_chunk_idx_ points to the last chunk with allocated data
+    int64_t total_allocated = 0;
+    for (int i = 0; i < chunks_.size(); ++i) {
+        DCHECK_GT(chunks_[i].size, 0);
+        if (i < current_chunk_idx_) {
+            DCHECK_GT(chunks_[i].allocated_bytes, 0);
+        } else if (i == current_chunk_idx_) {
+            DCHECK_GE(chunks_[i].allocated_bytes, 0);
+            if (check_current_chunk_empty) DCHECK_EQ(chunks_[i].allocated_bytes, 0);
+        } else {
+            DCHECK_EQ(chunks_[i].allocated_bytes, 0);
+        }
+        total_allocated += chunks_[i].allocated_bytes;
+    }
+    DCHECK_EQ(total_allocated, total_allocated_bytes_);
+    return true;
+}
+
 }
diff --git a/be/src/runtime/mem_pool.h b/be/src/runtime/mem_pool.h
index 23c328391c..0dd91d54f9 100644
--- a/be/src/runtime/mem_pool.h
+++ b/be/src/runtime/mem_pool.h
@@ -40,7 +40,7 @@ class MemTracker;
 //
 /// The caller registers a MemTracker with the pool; chunk allocations are counted
 /// against that tracker and all of its ancestors. If chunks get moved between pools
-/// during AcquireData() calls, the respective MemTrackers are updated accordingly.
+/// during acquire_data() calls, the respective MemTrackers are updated accordingly.
 /// Chunks freed up in the d'tor are subtracted from the registered trackers.
 //
 /// An Allocate() call will attempt to allocate memory from the chunk that was most
@@ -79,211 +79,204 @@ class MemTracker;
 //
 ///      MemPool* p2 = new MemPool();
 /// the new mempool receives all chunks containing data from p
-///      p2->AcquireData(p, false);
+///      p2->acquire_data(p, false);
 /// At this point p.total_allocated_bytes_ would be 0.
 /// The one remaining (empty) chunk is released:
 ///    delete p;
-
 class MemPool {
- public:
+public:
 
-  /// 'tracker' tracks the amount of memory allocated by this pool. Must not be NULL.
-  MemPool(MemTracker* mem_tracker);
-
-  /// Frees all chunks of memory and subtracts the total allocated bytes
-  /// from the registered limits.
-  ~MemPool();
-
-  /// Allocates a section of memory of 'size' bytes with DEFAULT_ALIGNMENT at the end
-  /// of the the current chunk. Creates a new chunk if there aren't any chunks
-  /// with enough capacity.
-  uint8_t* allocate(int64_t size) {
-    return allocate<false>(size, DEFAULT_ALIGNMENT);
-  }
-
-  /// Same as Allocate() except the mem limit is checked before the allocation and
-  /// this call will fail (returns NULL) if it does.
-  /// The caller must handle the NULL case. This should be used for allocations
-  /// where the size can be very big to bound the amount by which we exceed mem limits.
-  uint8_t* try_allocate(int64_t size) {
-    return allocate<true>(size, DEFAULT_ALIGNMENT);
-  }
-
-  /// Same as TryAllocate() except a non-default alignment can be specified. It
-  /// should be a power-of-two in [1, alignof(std::max_align_t)].
-  uint8_t* try_allocate_aligned(int64_t size, int alignment) {
-    DCHECK_GE(alignment, 1);
-    DCHECK_LE(alignment, config::memory_max_alignment);
-    DCHECK_EQ(BitUtil::RoundUpToPowerOfTwo(alignment), alignment);
-    return allocate<true>(size, alignment);
-  }
-
-  /// Same as TryAllocate() except returned memory is not aligned at all.
-  uint8_t* try_allocate_unaligned(int64_t size) {
-    // Call templated implementation directly so that it is inlined here and the
-    // alignment logic can be optimised out.
-    return allocate<true>(size, 1);
-  }
-
-  /// Returns 'byte_size' to the current chunk back to the mem pool. This can
-  /// only be used to return either all or part of the previous allocation returned
-  /// by Allocate().
-  void return_partial_allocation(int64_t byte_size) {
-    DCHECK_GE(byte_size, 0);
-    DCHECK(current_chunk_idx_ != -1);
-    ChunkInfo& info = chunks_[current_chunk_idx_];
-    DCHECK_GE(info.allocated_bytes, byte_size);
-    info.allocated_bytes -= byte_size;
-    ASAN_POISON_MEMORY_REGION(info.data + info.allocated_bytes, byte_size);
-    total_allocated_bytes_ -= byte_size;
-  }
-
-  /// Return a dummy pointer for zero-length allocations.
-  static uint8_t* empty_alloc_ptr() {
-    return reinterpret_cast<uint8_t*>(&zero_length_region_);
-  }
-
-  /// Makes all allocated chunks available for re-use, but doesn't delete any chunks.
-  void clear();
-
-  /// Deletes all allocated chunks. FreeAll() or AcquireData() must be called for
-  /// each mem pool
-  void free_all();
-
-  /// Absorb all chunks that hold data from src. If keep_current is true, let src hold on
-  /// to its last allocated chunk that contains data.
-  /// All offsets handed out by calls to GetCurrentOffset() for 'src' become invalid.
-  void acquire_data(MemPool* src, bool keep_current);
-
-  std::string DebugString();
-
-  int64_t total_allocated_bytes() const { return total_allocated_bytes_; }
-  int64_t total_reserved_bytes() const { return total_reserved_bytes_; }
-  int64_t peak_allocated_bytes() const { return peak_allocated_bytes_;}
-
-  MemTracker* mem_tracker() { return mem_tracker_; }
-
-  /// Return sum of chunk_sizes_.
-  int64_t get_total_chunk_sizes() const;
-
-  /// TODO: make a macro for doing this
-  /// For C++/IR interop, we need to be able to look up types by name.
-  static const char* LLVM_CLASS_NAME;
-
-  static const int DEFAULT_ALIGNMENT = 8;
-
- private:
-  friend class MemPoolTest;
-  static const int INITIAL_CHUNK_SIZE = 4 * 1024;
-
-  /// The maximum size of chunk that should be allocated. Allocations larger than this
-  /// size will get their own individual chunk.
-  static const int MAX_CHUNK_SIZE = 1024 * 1024;
-
-  struct ChunkInfo {
-    uint8_t* data; // Owned by the ChunkInfo.
-    int64_t size;  // in bytes
-
-    /// bytes allocated via Allocate() in this chunk
-    int64_t allocated_bytes;
-
-    explicit ChunkInfo(int64_t size, uint8_t* buf);
-
-    ChunkInfo()
-      : data(NULL),
-        size(0),
-        allocated_bytes(0) {}
-  };
-
-  /// A static field used as non-NULL pointer for zero length allocations. NULL is
-  /// reserved for allocation failures. It must be as aligned as max_align_t for
-  /// TryAllocateAligned().
-  static uint32_t zero_length_region_;
-
-  /// chunk from which we served the last Allocate() call;
-  /// always points to the last chunk that contains allocated data;
-  /// chunks 0..current_chunk_idx_ - 1 are guaranteed to contain data
-  /// (chunks_[i].allocated_bytes > 0 for i: 0..current_chunk_idx_ - 1);
-  /// chunks after 'current_chunk_idx_' are "free chunks" that contain no data.
-  /// -1 if no chunks present
-  int current_chunk_idx_;
-
-  /// The size of the next chunk to allocate.
-  int next_chunk_size_;
-
-  /// sum of allocated_bytes_
-  int64_t total_allocated_bytes_;
-
-  /// sum of all bytes allocated in chunks_
-  int64_t total_reserved_bytes_;
-
-  /// Maximum number of bytes allocated from this pool at one time.
-  int64_t peak_allocated_bytes_;
-
-  std::vector<ChunkInfo> chunks_;
-
-  /// The current and peak memory footprint of this pool. This is different from
-  /// total allocated_bytes_ since it includes bytes in chunks that are not used.
-  MemTracker* mem_tracker_;
-
-  /// Find or allocated a chunk with at least min_size spare capacity and update
-  /// current_chunk_idx_. Also updates chunks_, chunk_sizes_ and allocated_bytes_
-  /// if a new chunk needs to be created.
-  /// If check_limits is true, this call can fail (returns false) if adding a
-  /// new chunk exceeds the mem limits.
-  bool FindChunk(size_t min_size, bool check_limits);
-
-  /// Check integrity of the supporting data structures; always returns true but DCHECKs
-  /// all invariants.
-  /// If 'check_current_chunk_empty' is true, checks that the current chunk contains no
-  /// data. Otherwise the current chunk can be either empty or full.
-  bool CheckIntegrity(bool check_current_chunk_empty);
-
-  /// Return offset to unoccupied space in current chunk.
-  int64_t GetFreeOffset() const {
-    if (current_chunk_idx_ == -1) return 0;
-    return chunks_[current_chunk_idx_].allocated_bytes;
-  }
-
-  template <bool CHECK_LIMIT_FIRST>
-  uint8_t* ALWAYS_INLINE allocate(int64_t size, int alignment) {
-    DCHECK_GE(size, 0);
-    if (UNLIKELY(size == 0)) return reinterpret_cast<uint8_t*>(&zero_length_region_);
-
-    if (current_chunk_idx_ != -1) {
-      ChunkInfo& info = chunks_[current_chunk_idx_];
-      int64_t aligned_allocated_bytes = BitUtil::RoundUpToPowerOf2(
-          info.allocated_bytes, alignment);
-      if (aligned_allocated_bytes + size <= info.size) {
-        // Ensure the requested alignment is respected.
-        int64_t padding = aligned_allocated_bytes - info.allocated_bytes;
-        uint8_t* result = info.data + aligned_allocated_bytes;
-        ASAN_UNPOISON_MEMORY_REGION(result, size);
-        DCHECK_LE(info.allocated_bytes + size, info.size);
-        info.allocated_bytes += padding + size;
-        total_allocated_bytes_ += padding + size;
-        DCHECK_LE(current_chunk_idx_, chunks_.size() - 1);
-        return result;
-      }
+    /// 'tracker' tracks the amount of memory allocated by this pool. Must not be NULL.
+    MemPool(MemTracker* mem_tracker)
+            : current_chunk_idx_(-1),
+            next_chunk_size_(INITIAL_CHUNK_SIZE),
+            total_allocated_bytes_(0),
+            total_reserved_bytes_(0),
+            peak_allocated_bytes_(0),
+            mem_tracker_(mem_tracker) {
+        DCHECK(mem_tracker != nullptr);
     }
 
-    // If we couldn't allocate a new chunk, return NULL. malloc() guarantees alignment
-    // of alignof(std::max_align_t), so we do not need to do anything additional to
-    // guarantee alignment.
-    //static_assert(
-        //INITIAL_CHUNK_SIZE >= config::FLAGS_MEMORY_MAX_ALIGNMENT, "Min chunk size too low");
-    if (UNLIKELY(!FindChunk(size, CHECK_LIMIT_FIRST))) return NULL;
+    /// Frees all chunks of memory and subtracts the total allocated bytes
+    /// from the registered limits.
+    ~MemPool();
 
-    ChunkInfo& info = chunks_[current_chunk_idx_];
-    uint8_t* result = info.data + info.allocated_bytes;
-    ASAN_UNPOISON_MEMORY_REGION(result, size);
-    DCHECK_LE(info.allocated_bytes + size, info.size);
-    info.allocated_bytes += size;
-    total_allocated_bytes_ += size;
-    DCHECK_LE(current_chunk_idx_, chunks_.size() - 1);
-    peak_allocated_bytes_ = std::max(total_allocated_bytes_, peak_allocated_bytes_);
-    return result;
-  }
+    /// Allocates a section of memory of 'size' bytes with DEFAULT_ALIGNMENT at the end
+    /// of the the current chunk. Creates a new chunk if there aren't any chunks
+    /// with enough capacity.
+    uint8_t* allocate(int64_t size) {
+        return allocate<false>(size, DEFAULT_ALIGNMENT);
+    }
+
+    /// Same as Allocate() except the mem limit is checked before the allocation and
+    /// this call will fail (returns NULL) if it does.
+    /// The caller must handle the NULL case. This should be used for allocations
+    /// where the size can be very big to bound the amount by which we exceed mem limits.
+    uint8_t* try_allocate(int64_t size) {
+        return allocate<true>(size, DEFAULT_ALIGNMENT);
+    }
+
+    /// Same as TryAllocate() except a non-default alignment can be specified. It
+    /// should be a power-of-two in [1, alignof(std::max_align_t)].
+    uint8_t* try_allocate_aligned(int64_t size, int alignment) {
+        DCHECK_GE(alignment, 1);
+        DCHECK_LE(alignment, config::memory_max_alignment);
+        DCHECK_EQ(BitUtil::RoundUpToPowerOfTwo(alignment), alignment);
+        return allocate<true>(size, alignment);
+    }
+
+    /// Same as TryAllocate() except returned memory is not aligned at all.
+    uint8_t* try_allocate_unaligned(int64_t size) {
+        // Call templated implementation directly so that it is inlined here and the
+        // alignment logic can be optimised out.
+        return allocate<true>(size, 1);
+    }
+
+    /// Makes all allocated chunks available for re-use, but doesn't delete any chunks.
+    void clear();
+
+    /// Deletes all allocated chunks. free_all() or acquire_data() must be called for
+    /// each mem pool
+    void free_all();
+
+    /// Absorb all chunks that hold data from src. If keep_current is true, let src hold on
+    /// to its last allocated chunk that contains data.
+    /// All offsets handed out by calls to GetCurrentOffset() for 'src' become invalid.
+    void acquire_data(MemPool* src, bool keep_current);
+
+    // Exchange all chunks with input source, including reserved chunks.
+    // This funciton will keep its own MemTracker, and upate it after exchange.
+    // Why we need this other than std::swap? Because swap will swap MemTracker too, which would
+    // lead error. We only has MemTracker's pointer, which can be invalid after swap.
+    void exchange_data(MemPool* other);
+
+    std::string debug_string();
+
+    int64_t total_allocated_bytes() const { return total_allocated_bytes_; }
+    int64_t total_reserved_bytes() const { return total_reserved_bytes_; }
+    int64_t peak_allocated_bytes() const { return peak_allocated_bytes_;}
+
+    MemTracker* mem_tracker() { return mem_tracker_; }
+
+    /// TODO: make a macro for doing this
+    /// For C++/IR interop, we need to be able to look up types by name.
+    static const char* LLVM_CLASS_NAME;
+
+    static const int DEFAULT_ALIGNMENT = 8;
+
+private:
+    friend class MemPoolTest;
+    static const int INITIAL_CHUNK_SIZE = 4 * 1024;
+
+    /// The maximum size of chunk that should be allocated. Allocations larger than this
+    /// size will get their own individual chunk.
+    static const int MAX_CHUNK_SIZE = 512 * 1024;
+
+    struct ChunkInfo {
+        uint8_t* data; // Owned by the ChunkInfo.
+        int64_t size;  // in bytes
+
+        /// bytes allocated via Allocate() in this chunk
+        int64_t allocated_bytes;
+
+        explicit ChunkInfo(int64_t size, uint8_t* buf);
+
+        ChunkInfo()
+            : data(NULL),
+            size(0),
+            allocated_bytes(0) {}
+    };
+
+    /// A static field used as non-NULL pointer for zero length allocations. NULL is
+    /// reserved for allocation failures. It must be as aligned as max_align_t for
+    /// TryAllocateAligned().
+    static uint32_t k_zero_length_region_;
+
+    /// Find or allocated a chunk with at least min_size spare capacity and update
+    /// current_chunk_idx_. Also updates chunks_, chunk_sizes_ and allocated_bytes_
+    /// if a new chunk needs to be created.
+    /// If check_limits is true, this call can fail (returns false) if adding a
+    /// new chunk exceeds the mem limits.
+    bool find_chunk(size_t min_size, bool check_limits);
+
+    /// Check integrity of the supporting data structures; always returns true but DCHECKs
+    /// all invariants.
+    /// If 'check_current_chunk_empty' is true, checks that the current chunk contains no
+    /// data. Otherwise the current chunk can be either empty or full.
+    bool check_integrity(bool check_current_chunk_empty);
+
+    /// Return offset to unoccupied space in current chunk.
+    int64_t get_free_offset() const {
+        if (current_chunk_idx_ == -1) return 0;
+        return chunks_[current_chunk_idx_].allocated_bytes;
+    }
+
+    template <bool CHECK_LIMIT_FIRST>
+    uint8_t* ALWAYS_INLINE allocate(int64_t size, int alignment) {
+        DCHECK_GE(size, 0);
+        if (UNLIKELY(size == 0)) return reinterpret_cast<uint8_t*>(&k_zero_length_region_);
+
+        if (current_chunk_idx_ != -1) {
+            ChunkInfo& info = chunks_[current_chunk_idx_];
+            int64_t aligned_allocated_bytes = BitUtil::RoundUpToPowerOf2(
+                info.allocated_bytes, alignment);
+            if (aligned_allocated_bytes + size <= info.size) {
+                // Ensure the requested alignment is respected.
+                int64_t padding = aligned_allocated_bytes - info.allocated_bytes;
+                uint8_t* result = info.data + aligned_allocated_bytes;
+                ASAN_UNPOISON_MEMORY_REGION(result, size);
+                DCHECK_LE(info.allocated_bytes + size, info.size);
+                info.allocated_bytes += padding + size;
+                total_allocated_bytes_ += padding + size;
+                DCHECK_LE(current_chunk_idx_, chunks_.size() - 1);
+                return result;
+            }
+        }
+
+        // If we couldn't allocate a new chunk, return NULL. malloc() guarantees alignment
+        // of alignof(std::max_align_t), so we do not need to do anything additional to
+        // guarantee alignment.
+        //static_assert(
+        //INITIAL_CHUNK_SIZE >= config::FLAGS_MEMORY_MAX_ALIGNMENT, "Min chunk size too low");
+        if (UNLIKELY(!find_chunk(size, CHECK_LIMIT_FIRST))) return NULL;
+
+        ChunkInfo& info = chunks_[current_chunk_idx_];
+        uint8_t* result = info.data + info.allocated_bytes;
+        ASAN_UNPOISON_MEMORY_REGION(result, size);
+        DCHECK_LE(info.allocated_bytes + size, info.size);
+        info.allocated_bytes += size;
+        total_allocated_bytes_ += size;
+        DCHECK_LE(current_chunk_idx_, chunks_.size() - 1);
+        peak_allocated_bytes_ = std::max(total_allocated_bytes_, peak_allocated_bytes_);
+        return result;
+    }
+
+private:
+    /// chunk from which we served the last Allocate() call;
+    /// always points to the last chunk that contains allocated data;
+    /// chunks 0..current_chunk_idx_ - 1 are guaranteed to contain data
+    /// (chunks_[i].allocated_bytes > 0 for i: 0..current_chunk_idx_ - 1);
+    /// chunks after 'current_chunk_idx_' are "free chunks" that contain no data.
+    /// -1 if no chunks present
+    int current_chunk_idx_;
+
+    /// The size of the next chunk to allocate.
+    int next_chunk_size_;
+
+    /// sum of allocated_bytes_
+    int64_t total_allocated_bytes_;
+
+    /// sum of all bytes allocated in chunks_
+    int64_t total_reserved_bytes_;
+
+    /// Maximum number of bytes allocated from this pool at one time.
+    int64_t peak_allocated_bytes_;
+
+    std::vector<ChunkInfo> chunks_;
+
+    /// The current and peak memory footprint of this pool. This is different from
+    /// total allocated_bytes_ since it includes bytes in chunks that are not used.
+    MemTracker* mem_tracker_;
 };
 
 // Stamp out templated implementations here so they're included in IR module
diff --git a/be/src/runtime/row_batch.cpp b/be/src/runtime/row_batch.cpp
index cd96311f23..6e31a446fc 100644
--- a/be/src/runtime/row_batch.cpp
+++ b/be/src/runtime/row_batch.cpp
@@ -596,7 +596,7 @@ void RowBatch::swap(RowBatch* other) {
 
     // The destination row batch should be empty.
     DCHECK(!_has_in_flight_row);
-    DCHECK_EQ(_tuple_data_pool->get_total_chunk_sizes(), 0);
+    DCHECK_EQ(_tuple_data_pool->total_reserved_bytes(), 0);
 
     std::swap(_has_in_flight_row, other->_has_in_flight_row);
     std::swap(_num_rows, other->_num_rows);
diff --git a/be/src/runtime/vectorized_row_batch.cpp b/be/src/runtime/vectorized_row_batch.cpp
index 8ffa027242..91543119b4 100644
--- a/be/src/runtime/vectorized_row_batch.cpp
+++ b/be/src/runtime/vectorized_row_batch.cpp
@@ -139,8 +139,11 @@ void VectorizedRowBatch::dump_to_row_block(RowBlock* row_block) {
     row_block->_limit = _size;
     row_block->_info.row_num = _size;
     row_block->_block_status = _block_status;
-    row_block->mem_pool()->free_all();
-    row_block->mem_pool()->acquire_data(_mem_pool.get(), false);
+
+    // exchange two memory pool to reduce chunk allocate in MemPool,
+    row_block->mem_pool()->exchange_data(_mem_pool.get());
+    // Clear to reuse already allocated chunk
+    _mem_pool->clear();
 }
 
 } // namespace doris
diff --git a/be/test/runtime/mem_pool_test.cpp b/be/test/runtime/mem_pool_test.cpp
index 17c53d1734..10aa9c1cd6 100644
--- a/be/test/runtime/mem_pool_test.cpp
+++ b/be/test/runtime/mem_pool_test.cpp
@@ -35,62 +35,63 @@ TEST(MemPoolTest, Basic) {
         // pads to 32 bytes
         p.allocate(25);
     }
-
-    // we handed back 24K
-    EXPECT_EQ(p.total_allocated_bytes(), 24 * 1024); // 32 * 768 == 24 * 1024
+    // we handed back 24K, (4, 8 16) first allocate don't need padding
+    EXPECT_EQ(24 * 1024 - 3 * 7, p.total_allocated_bytes()); // 32 * 768 == 24 * 1024
     // .. and allocated 28K of chunks (4, 8, 16)
-    EXPECT_EQ(p.get_total_chunk_sizes(), 28 * 1024);
+    EXPECT_EQ(28 * 1024, p.total_reserved_bytes());
 
     // we're passing on the first two chunks, containing 12K of data; we're left with one
     // chunk of 16K containing 12K of data
     p2.acquire_data(&p, true);
-    EXPECT_EQ(p.total_allocated_bytes(), 12 * 1024);
-    EXPECT_EQ(p.get_total_chunk_sizes(), 16 * 1024);
+    EXPECT_EQ(12 * 1024 - 7, p.total_allocated_bytes());
+    EXPECT_EQ(16 * 1024, p.total_reserved_bytes());
 
     // we allocate 8K, for which there isn't enough room in the current chunk,
     // so another one is allocated (32K)
     p.allocate(8 * 1024);
-    EXPECT_EQ(p.get_total_chunk_sizes(), (16 + 32) * 1024);
+    EXPECT_EQ((16 + 32) * 1024, p.total_reserved_bytes());
 
     // we allocate 65K, which doesn't fit into the current chunk or the default
     // size of the next allocated chunk (64K)
     p.allocate(65 * 1024);
-    EXPECT_EQ(p.total_allocated_bytes(), (12 + 8 + 65) * 1024);
-    EXPECT_EQ(p.peak_allocated_bytes(), (12 + 8 + 65) * 1024);
-    EXPECT_EQ(p.get_total_chunk_sizes(), (16 + 32 + 65) * 1024);
+    EXPECT_EQ((12 + 8 + 65) * 1024 - 7, p.total_allocated_bytes());
+    EXPECT_EQ((16 + 32 + 65) * 1024, p.total_reserved_bytes());
 
     // Clear() resets allocated data, but doesn't remove any chunks
     p.clear();
-    EXPECT_EQ(p.total_allocated_bytes(), 0);
-    EXPECT_EQ(p.peak_allocated_bytes(), (12 + 8 + 65) * 1024);
-    EXPECT_EQ(p.get_total_chunk_sizes(), (16 + 32 + 65) * 1024);
+    EXPECT_EQ(0, p.total_allocated_bytes());
+    EXPECT_EQ((16 + 32 + 65) * 1024, p.total_reserved_bytes());
 
     // next allocation reuses existing chunks
     p.allocate(1024);
-    EXPECT_EQ(p.total_allocated_bytes(), 1024);
-    EXPECT_EQ(p.peak_allocated_bytes(), (12 + 8 + 65) * 1024);
-    EXPECT_EQ(p.get_total_chunk_sizes(), (16 + 32 + 65) * 1024);
+    EXPECT_EQ(1024, p.total_allocated_bytes());
+    EXPECT_EQ((16 + 32 + 65) * 1024, p.total_reserved_bytes());
 
     // ... unless it doesn't fit into any available chunk
     p.allocate(120 * 1024);
-    EXPECT_EQ(p.total_allocated_bytes(), (1 + 120) * 1024);
-    EXPECT_EQ(p.peak_allocated_bytes(), (1 + 120) * 1024);
-    EXPECT_EQ(p.get_total_chunk_sizes(), (130 + 16 + 32 + 65) * 1024);
+    EXPECT_EQ((1 + 120) * 1024, p.total_allocated_bytes());
+    EXPECT_EQ((130 + 16 + 32 + 65) * 1024, p.total_reserved_bytes());
 
     // ... Try another chunk that fits into an existing chunk
     p.allocate(33 * 1024);
-    EXPECT_EQ(p.total_allocated_bytes(), (1 + 120 + 33) * 1024);
-    EXPECT_EQ(p.get_total_chunk_sizes(), (130 + 65 + 16 + 32) * 1024);
+    EXPECT_EQ((1 + 120 + 33) * 1024, p.total_allocated_bytes());
+    EXPECT_EQ((130 + 65 + 16 + 32) * 1024, p.total_reserved_bytes());
 
     // we're releasing 3 chunks, which get added to p2
     p2.acquire_data(&p, false);
-    EXPECT_EQ(p.total_allocated_bytes(), 0);
-    EXPECT_EQ(p.peak_allocated_bytes(), (1 + 120 + 33) * 1024);
-    EXPECT_EQ(p.get_total_chunk_sizes(), 0);
+    EXPECT_EQ(0, p.total_allocated_bytes());
+    EXPECT_EQ(0, p.total_reserved_bytes());
 
     p3.acquire_data(&p2, true);  // we're keeping the 65k chunk
-    EXPECT_EQ(p2.total_allocated_bytes(), 33 * 1024);
-    EXPECT_EQ(p2.get_total_chunk_sizes(), 65 * 1024);
+    EXPECT_EQ(33 * 1024, p2.total_allocated_bytes());
+    EXPECT_EQ(65 * 1024, p2.total_reserved_bytes());
+
+    {
+        MemPool p4(&tracker);
+        p4.exchange_data(&p2);
+        EXPECT_EQ(33 * 1024, p4.total_allocated_bytes());
+        EXPECT_EQ(65 * 1024, p4.total_reserved_bytes());
+    }
 }
 
 // Test that we can keep an allocated chunk and a free chunk.
@@ -104,183 +105,84 @@ TEST(MemPoolTest, Keep) {
     p.allocate(8 * 1024);
     p.allocate(16 * 1024);
     EXPECT_EQ(p.total_allocated_bytes(), (4 + 8 + 16) * 1024);
-    EXPECT_EQ(p.get_total_chunk_sizes(), (4 + 8 + 16) * 1024);
+    EXPECT_EQ(p.total_reserved_bytes(), (4 + 8 + 16) * 1024);
     p.clear();
     EXPECT_EQ(p.total_allocated_bytes(), 0);
-    EXPECT_EQ(p.get_total_chunk_sizes(), (4 + 8 + 16) * 1024);
+    EXPECT_EQ(p.total_reserved_bytes(), (4 + 8 + 16) * 1024);
     p.allocate(1 * 1024);
     p.allocate(4 * 1024);
     EXPECT_EQ(p.total_allocated_bytes(), (1 + 4) * 1024);
-    EXPECT_EQ(p.get_total_chunk_sizes(), (4 + 8 + 16) * 1024);
+    EXPECT_EQ(p.total_reserved_bytes(), (4 + 8 + 16) * 1024);
     MemPool p2(&tracker);
     p2.acquire_data(&p, true);
-    EXPECT_EQ(p.total_allocated_bytes(), 4 * 1024);
-    EXPECT_EQ(p.get_total_chunk_sizes(), (8 + 16) * 1024);
-    EXPECT_EQ(p2.total_allocated_bytes(), 1 * 1024);
-    EXPECT_EQ(p2.get_total_chunk_sizes(), 4 * 1024);
+
+    {
+        p2.exchange_data(&p);
+        EXPECT_EQ(4 * 1024, p2.total_allocated_bytes());
+        EXPECT_EQ((8 + 16) * 1024, p2.total_reserved_bytes());
+        EXPECT_EQ(1 * 1024, p.total_allocated_bytes());
+        EXPECT_EQ(4 * 1024, p.total_reserved_bytes());
+    }
 }
 
-// Tests that we can return partial allocations.
-TEST(MemPoolTest, ReturnPartial) {
-    MemTracker tracker(-1);
-    MemPool p(&tracker);
-    uint8_t* ptr = p.allocate(1024);
-    EXPECT_EQ(p.total_allocated_bytes(), 1024);
-    memset(ptr, 0, 1024);
-    p.return_partial_allocation(1024);
-
-    uint8_t* ptr2 = p.allocate(1024);
-    EXPECT_EQ(p.total_allocated_bytes(), 1024);
-    EXPECT_TRUE(ptr == ptr2);
-    p.return_partial_allocation(1016);
-
-    ptr2 = p.allocate(1016);
-    EXPECT_EQ(p.total_allocated_bytes(), 1024);
-    EXPECT_TRUE(ptr2 == ptr + 8);
-    p.return_partial_allocation(512);
-    memset(ptr2, 1, 1016 - 512);
-
-    uint8_t* ptr3 = p.allocate(512);
-    EXPECT_EQ(p.total_allocated_bytes(), 1024);
-    EXPECT_TRUE(ptr3 == ptr + 512);
-    memset(ptr3, 2, 512);
-
-    for (int i = 0; i < 8; ++i) {
-        EXPECT_EQ(ptr[i], 0);
-    }
-    for (int i = 8; i < 512; ++i) {
-        EXPECT_EQ(ptr[i], 1);
-    }
-    for (int i = 512; i < 1024; ++i) {
-        EXPECT_EQ(ptr[i], 2);
-    }
-
-    p.free_all();
-}
-
-TEST(MemPoolTest, Limits) {
-    MemTracker limit3(320);
-    MemTracker limit1(160, "", &limit3);
-    MemTracker limit2(240, "", &limit3);
-
-    MemPool* p1 = new MemPool(&limit1);
-    EXPECT_FALSE(limit1.any_limit_exceeded());
-
-    MemPool* p2 = new MemPool(&limit2);
-    EXPECT_FALSE(limit2.any_limit_exceeded());
-
-    // p1 exceeds a non-shared limit
-    p1->allocate(80);
-    EXPECT_FALSE(limit1.limit_exceeded());
-    EXPECT_EQ(limit1.consumption(), 80);
-    EXPECT_FALSE(limit3.limit_exceeded());
-    EXPECT_EQ(limit3.consumption(), 80);
-
-    p1->allocate(88);
-    EXPECT_TRUE(limit1.limit_exceeded());
-    EXPECT_EQ(limit1.consumption(), 168);
-    EXPECT_FALSE(limit3.limit_exceeded());
-    EXPECT_EQ(limit3.consumption(), 168);
-
-    // p2 exceeds a shared limit
-    p2->allocate(80);
-    EXPECT_FALSE(limit2.limit_exceeded());
-    EXPECT_EQ(limit2.consumption(), 80);
-    EXPECT_FALSE(limit3.limit_exceeded());
-    EXPECT_EQ(limit3.consumption(), 248);
-
-    p2->allocate(80);
-    EXPECT_FALSE(limit2.limit_exceeded());
-    EXPECT_EQ(limit2.consumption(), 160);
-    EXPECT_TRUE(limit3.limit_exceeded());
-    EXPECT_EQ(limit3.consumption(), 328);
-
-    // deleting pools reduces consumption
-    p1->free_all();
-    delete p1;
-    EXPECT_EQ(limit1.consumption(), 0);
-    EXPECT_EQ(limit2.consumption(), 160);
-    EXPECT_EQ(limit3.consumption(), 160);
-
-    // allocate 160 bytes from 240 byte limit.
-    p2->free_all();
-    EXPECT_FALSE(limit2.limit_exceeded());
-    uint8_t* result = p2->try_allocate(160);
-    DCHECK(result != NULL);
-
-    // Try To allocate another 160 bytes, this should fail.
-    result = p2->try_allocate(160);
-    DCHECK(result == NULL);
-
-    // Try To allocate 20 bytes, this should succeed. try_allocate() should leave the
-    // pool in a functional state..
-    result = p2->try_allocate(20);
-    DCHECK(result != NULL);
-
-    p2->free_all();
-    delete p2;
-}
+// Maximum allocation size which exceeds 32-bit.
+#define LARGE_ALLOC_SIZE (1LL << 32)
 
 TEST(MemPoolTest, MaxAllocation) {
-    int64_t int_max_rounded = BitUtil::round_up(INT_MAX, 8);
+    int64_t int_max_rounded = BitUtil::round_up(LARGE_ALLOC_SIZE, 8);
 
-    // Allocate a single INT_MAX chunk
+    // Allocate a single LARGE_ALLOC_SIZE chunk
     MemTracker tracker(-1);
     MemPool p1(&tracker);
-    uint8_t* ptr = p1.allocate(INT_MAX);
+    uint8_t* ptr = p1.allocate(LARGE_ALLOC_SIZE);
     EXPECT_TRUE(ptr != NULL);
-    EXPECT_EQ(p1.get_total_chunk_sizes(), int_max_rounded);
-    EXPECT_EQ(p1.total_allocated_bytes(), int_max_rounded);
+    EXPECT_EQ(int_max_rounded, p1.total_reserved_bytes());
+    EXPECT_EQ(int_max_rounded, p1.total_allocated_bytes());
     p1.free_all();
 
-    // Allocate a small chunk (DEFAULT_INITIAL_CHUNK_SIZE) followed by an INT_MAX chunk
+    // Allocate a small chunk (DEFAULT_INITIAL_CHUNK_SIZE) followed by an LARGE_ALLOC_SIZE chunk
     MemPool p2(&tracker);
     p2.allocate(8);
-    EXPECT_EQ(p2.get_total_chunk_sizes(), 4096);
+    EXPECT_EQ(p2.total_reserved_bytes(), 4096);
     EXPECT_EQ(p2.total_allocated_bytes(), 8);
-    ptr = p2.allocate(INT_MAX);
+    ptr = p2.allocate(LARGE_ALLOC_SIZE);
     EXPECT_TRUE(ptr != NULL);
-    EXPECT_EQ(p2.get_total_chunk_sizes(), 4096LL + int_max_rounded);
+    EXPECT_EQ(p2.total_reserved_bytes(), 4096LL + int_max_rounded);
     EXPECT_EQ(p2.total_allocated_bytes(), 8LL + int_max_rounded);
     p2.free_all();
 
-    // Allocate three INT_MAX chunks followed by a small chunk followed by another INT_MAX
+    // Allocate three LARGE_ALLOC_SIZE chunks followed by a small chunk followed by another LARGE_ALLOC_SIZE
     // chunk
     MemPool p3(&tracker);
-    p3.allocate(INT_MAX);
+    p3.allocate(LARGE_ALLOC_SIZE);
     // Allocates new int_max_rounded * 2 chunk
     // NOTE: exceed MAX_CHUNK_SIZE limit, will not *2
-    ptr = p3.allocate(INT_MAX);
+    ptr = p3.allocate(LARGE_ALLOC_SIZE);
     EXPECT_TRUE(ptr != NULL);
-    // EXPECT_EQ(p3.get_total_chunk_sizes(), int_max_rounded * 3);
-    EXPECT_EQ(p3.get_total_chunk_sizes(), int_max_rounded * 2);
-    EXPECT_EQ(p3.total_allocated_bytes(), int_max_rounded * 2);
+    EXPECT_EQ(int_max_rounded * 2, p3.total_reserved_bytes());
+    EXPECT_EQ(int_max_rounded * 2, p3.total_allocated_bytes());
     // Uses existing int_max_rounded * 2 chunk
-    ptr = p3.allocate(INT_MAX);
+    ptr = p3.allocate(LARGE_ALLOC_SIZE);
     EXPECT_TRUE(ptr != NULL);
-    EXPECT_EQ(p3.get_total_chunk_sizes(), int_max_rounded * 3);
-    EXPECT_EQ(p3.total_allocated_bytes(), int_max_rounded * 3);
+    EXPECT_EQ(int_max_rounded * 3, p3.total_reserved_bytes());
+    EXPECT_EQ(int_max_rounded * 3, p3.total_allocated_bytes());
 
     // Allocates a new int_max_rounded * 4 chunk
     // NOTE: exceed MAX_CHUNK_SIZE limit, will not *2
 #if !defined (ADDRESS_SANITIZER) || (__clang_major__ >= 3 && __clang_minor__ >= 7)
     ptr = p3.allocate(8);
     EXPECT_TRUE(ptr != NULL);
-    // EXPECT_EQ(p3.get_total_chunk_sizes(), int_max_rounded * 7);
-    EXPECT_EQ(p3.get_total_chunk_sizes(), int_max_rounded * 3 + 512 * 1024);
-    EXPECT_EQ(p3.total_allocated_bytes(), int_max_rounded * 3 + 8);
+    EXPECT_EQ(int_max_rounded * 3 + 512 * 1024, p3.total_reserved_bytes());
+    EXPECT_EQ(int_max_rounded * 3 + 8, p3.total_allocated_bytes());
     // Uses existing int_max_rounded * 4 chunk
-    ptr = p3.allocate(INT_MAX);
+    ptr = p3.allocate(LARGE_ALLOC_SIZE);
     EXPECT_TRUE(ptr != NULL);
-    // EXPECT_EQ(p3.get_total_chunk_sizes(), int_max_rounded * 7);
-    EXPECT_EQ(p3.get_total_chunk_sizes(), int_max_rounded * 4 + 512 * 1024);
-    EXPECT_EQ(p3.total_allocated_bytes(), int_max_rounded * 4 + 8);
+    EXPECT_EQ(int_max_rounded * 4 + 512 * 1024, p3.total_reserved_bytes());
+    EXPECT_EQ(int_max_rounded * 4 + 8, p3.total_allocated_bytes());
 #endif
     p3.free_all();
 
 }
-#if 0
-#endif
 }
 
 int main(int argc, char** argv) {
diff --git a/run-ut.sh b/run-ut.sh
index b34280042a..d777bc7a33 100755
--- a/run-ut.sh
+++ b/run-ut.sh
@@ -208,6 +208,7 @@ ${DORIS_TEST_BINARY_DIR}/runtime/tablet_writer_mgr_test
 ${DORIS_TEST_BINARY_DIR}/runtime/snapshot_loader_test
 ${DORIS_TEST_BINARY_DIR}/runtime/user_function_cache_test
 ${DORIS_TEST_BINARY_DIR}/runtime/small_file_mgr_test
+${DORIS_TEST_BINARY_DIR}/runtime/mem_pool_test
 # Running expr Unittest
 
 # Running http