From 09a4d3e50ad1ffdb502d5cf38ed8f0a42e143b1f Mon Sep 17 00:00:00 2001
From: lichaoyong <lichaoyong@baidu.com>
Date: Fri, 14 Feb 2020 13:32:03 +0800
Subject: [PATCH] [gutil] import scoped_refptr smart pointer from KUDU (#2899)

scoped_refptr is used to replace std::shared_ptr, is generally faster and smaller.
advantage
  (1) only requires a single allocation, and ref count is on the same cache line as the object
  (2) the pointer only requires 8 bytes (since the ref count is within the object)
  (3) you can manually increase or decrease reference counts when more control is required
  (4) you can convert from a raw pointer back to a scoped_refptr safely without worrying about double freeing
  (5) since we control the implementation, we can implement features, such as debug builds that capture the stack trace of every referent to help debug leaks.
disadvantage
  (1) the referred-to object must inherit from RefCounted
  (2) does not support the weak_ptr use cases
---
 be/src/gutil/CMakeLists.txt                   |   3 +
 be/src/gutil/arm_instruction_set_select.h     |  52 +
 be/src/gutil/atomic_refcount.h                | 153 +++
 be/src/gutil/cycleclock-inl.h                 | 215 ++++
 be/src/gutil/endian.h                         | 306 +++---
 be/src/gutil/map-util.h                       | 944 ++++++++++++++++++
 be/src/gutil/ref_counted.cc                   |  95 ++
 be/src/gutil/ref_counted.h                    | 365 +++++++
 be/src/gutil/sysinfo-test.cc                  |  71 ++
 be/src/gutil/sysinfo.cc                       | 474 +++++++++
 be/src/gutil/sysinfo.h                        |  69 ++
 .../threading/thread_collision_warner.cc      |  93 ++
 .../gutil/threading/thread_collision_warner.h | 247 +++++
 be/src/gutil/walltime.h                       | 216 ++++
 14 files changed, 3170 insertions(+), 133 deletions(-)
 create mode 100644 be/src/gutil/arm_instruction_set_select.h
 create mode 100644 be/src/gutil/atomic_refcount.h
 create mode 100644 be/src/gutil/cycleclock-inl.h
 create mode 100644 be/src/gutil/map-util.h
 create mode 100644 be/src/gutil/ref_counted.cc
 create mode 100644 be/src/gutil/ref_counted.h
 create mode 100644 be/src/gutil/sysinfo-test.cc
 create mode 100644 be/src/gutil/sysinfo.cc
 create mode 100644 be/src/gutil/sysinfo.h
 create mode 100644 be/src/gutil/threading/thread_collision_warner.cc
 create mode 100644 be/src/gutil/threading/thread_collision_warner.h
 create mode 100644 be/src/gutil/walltime.h

diff --git a/be/src/gutil/CMakeLists.txt b/be/src/gutil/CMakeLists.txt
index 2c0d283619..1cc33438f1 100644
--- a/be/src/gutil/CMakeLists.txt
+++ b/be/src/gutil/CMakeLists.txt
@@ -31,6 +31,7 @@ SET(SOURCE_FILES
         hash/jenkins.cc
         int128.cc
         once.cc
+        ref_counted.cc
         spinlock_internal.cc
         stringprintf.cc
         strings/ascii_ctype.cc
@@ -46,6 +47,8 @@ SET(SOURCE_FILES
         strings/substitute.cc
         strings/util.cc
         strtoint.cc
+        sysinfo.cc
+        threading/thread_collision_warner.cc
         utf/rune.c
         cpu.cc)
 
diff --git a/be/src/gutil/arm_instruction_set_select.h b/be/src/gutil/arm_instruction_set_select.h
new file mode 100644
index 0000000000..87bc183358
--- /dev/null
+++ b/be/src/gutil/arm_instruction_set_select.h
@@ -0,0 +1,52 @@
+// Copyright 2011 Google Inc.
+// All Rights Reserved.
+//
+//
+// Generalizes the plethora of ARM flavors available to an easier to manage set
+// Defs reference is at https://wiki.edubuntu.org/ARM/Thumb2PortingHowto
+
+#ifndef ARM_INSTRUCTION_SET_SELECT_H_
+#define ARM_INSTRUCTION_SET_SELECT_H_
+
+#if defined(__ARM_ARCH_7__) || \
+    defined(__ARM_ARCH_7R__) || \
+    defined(__ARM_ARCH_7A__)
+# define ARMV7 1
+#endif
+
+#if defined(ARMV7) || \
+    defined(__ARM_ARCH_6__) || \
+    defined(__ARM_ARCH_6J__) || \
+    defined(__ARM_ARCH_6K__) || \
+    defined(__ARM_ARCH_6Z__) || \
+    defined(__ARM_ARCH_6T2__) || \
+    defined(__ARM_ARCH_6ZK__)
+# define ARMV6 1
+#endif
+
+#if defined(ARMV6) || \
+    defined(__ARM_ARCH_5T__) || \
+    defined(__ARM_ARCH_5E__) || \
+    defined(__ARM_ARCH_5TE__) || \
+    defined(__ARM_ARCH_5TEJ__)
+# define ARMV5 1
+#endif
+
+#if defined(ARMV5) || \
+    defined(__ARM_ARCH_4__) || \
+    defined(__ARM_ARCH_4T__)
+# define ARMV4 1
+#endif
+
+#if defined(ARMV4) || \
+    defined(__ARM_ARCH_3__) || \
+    defined(__ARM_ARCH_3M__)
+# define ARMV3 1
+#endif
+
+#if defined(ARMV3) || \
+    defined(__ARM_ARCH_2__)
+# define ARMV2 1
+#endif
+
+#endif  // ARM_INSTRUCTION_SET_SELECT_H_
diff --git a/be/src/gutil/atomic_refcount.h b/be/src/gutil/atomic_refcount.h
new file mode 100644
index 0000000000..2b5cb36099
--- /dev/null
+++ b/be/src/gutil/atomic_refcount.h
@@ -0,0 +1,153 @@
+#ifndef BASE_ATOMIC_REFCOUNT_H_
+#define BASE_ATOMIC_REFCOUNT_H_
+// Copyright 2008 Google Inc.
+// All rights reserved.
+
+// Atomic increment and decrement for reference counting.
+// For atomic operations on statistics counters and sequence numbers,
+// see atomic_stats_counter.h and atomic_sequence_num.h respectively.
+
+// Some clients use atomic operations for reference counting.
+// you use one of them:
+//         util/refcount/reference_counted.h
+//         util/gtl/refcounted_ptr.h
+//         util/gtl/shared_ptr.h
+// Alternatively, use a Mutex to maintain your reference count.
+// If you really must build your own reference counts with atomic operations,
+// use the following routines in the way suggested by this example:
+//    AtomicWord ref_count_;  // remember to initialize this to 0
+//    ...
+//    void Ref() {
+//      base::RefCountInc(&this->ref_count_);
+//    }
+//    void Unref() {
+//      if (!base::RefCountDec(&this->ref_count_)) {
+//        delete this;
+//      }
+//    }
+// Using these routines (rather than the ones in atomicops.h) will provide the
+// correct semantics; in particular, the memory ordering needed to make
+// reference counting work will be guaranteed.
+// You need not declare the reference count word "volatile".  After
+// initialization you should use the word only via the routines below; the
+// "volatile" in the signatures below is for backwards compatibility.
+//
+// If you need to do something very different from this, use a Mutex.
+
+#include <glog/logging.h>
+
+#include "gutil/atomicops.h"
+#include "gutil/integral_types.h"
+#include "gutil/logging-inl.h"
+
+namespace base {
+
+// These calls are available for both Atomic32, and AtomicWord types,
+// and also for base::subtle::Atomic64 if available on the platform.
+
+// Normally, clients are expected to use RefCountInc/RefCountDec.
+// In rare cases, it may be necessary to adjust the reference count by
+// more than 1, in which case they may use RefCountIncN/RefCountDecN.
+
+// Increment a reference count by "increment", which must exceed 0.
+inline void RefCountIncN(volatile Atomic32 *ptr, Atomic32 increment) {
+  DCHECK_GT(increment, 0);
+  base::subtle::NoBarrier_AtomicIncrement(ptr, increment);
+}
+
+// Decrement a reference count by "decrement", which must exceed 0,
+// and return whether the result is non-zero.
+// Insert barriers to ensure that state written before the reference count
+// became zero will be visible to a thread that has just made the count zero.
+inline bool RefCountDecN(volatile Atomic32 *ptr, Atomic32 decrement) {
+  DCHECK_GT(decrement, 0);
+  bool res = base::subtle::Barrier_AtomicIncrement(ptr, -decrement) != 0;
+  return res;
+}
+
+// Increment a reference count by 1.
+inline void RefCountInc(volatile Atomic32 *ptr) {
+  base::RefCountIncN(ptr, 1);
+}
+
+// Decrement a reference count by 1 and return whether the result is non-zero.
+// Insert barriers to ensure that state written before the reference count
+// became zero will be visible to a thread that has just made the count zero.
+inline bool RefCountDec(volatile Atomic32 *ptr) {
+  return base::RefCountDecN(ptr, 1);
+}
+
+// Return whether the reference count is one.
+// If the reference count is used in the conventional way, a
+// refrerence count of 1 implies that the current thread owns the 
+// reference and no other thread shares it.  
+// This call performs the test for a referenece count of one, and
+// performs the memory barrier needed for the owning thread
+// to act on the object, knowing that it has exclusive access to the
+// object.
+inline bool RefCountIsOne(const volatile Atomic32 *ptr) {
+  return base::subtle::Acquire_Load(ptr) == 1;
+}
+
+// Return whether the reference count is zero.  With conventional object
+// referencing counting, the object will be destroyed, so the reference count
+// should never be zero.  Hence this is generally used for a debug check.
+inline bool RefCountIsZero(const volatile Atomic32 *ptr) {
+  return subtle::Acquire_Load(ptr) == 0;
+}
+
+#if BASE_HAS_ATOMIC64
+// Implementations for Atomic64, if available.
+inline void RefCountIncN(volatile base::subtle::Atomic64 *ptr,
+                         base::subtle::Atomic64 increment) {
+  DCHECK_GT(increment, 0);
+  base::subtle::NoBarrier_AtomicIncrement(ptr, increment);
+}
+inline bool RefCountDecN(volatile base::subtle::Atomic64 *ptr,
+                         base::subtle::Atomic64 decrement) {
+  DCHECK_GT(decrement, 0);
+  return base::subtle::Barrier_AtomicIncrement(ptr, -decrement) != 0;
+}
+inline void RefCountInc(volatile base::subtle::Atomic64 *ptr) {
+  base::RefCountIncN(ptr, 1);
+}
+inline bool RefCountDec(volatile base::subtle::Atomic64 *ptr) {
+  return base::RefCountDecN(ptr, 1);
+}
+inline bool RefCountIsOne(const volatile base::subtle::Atomic64 *ptr) {
+  return base::subtle::Acquire_Load(ptr) == 1;
+}
+inline bool RefCountIsZero(const volatile base::subtle::Atomic64 *ptr) {
+  return base::subtle::Acquire_Load(ptr) == 0;
+}
+#endif
+
+#ifdef AtomicWordCastType
+// Implementations for AtomicWord, if it's a different type from the above.
+inline void RefCountIncN(volatile AtomicWord *ptr, AtomicWord increment) {
+  base::RefCountIncN(
+      reinterpret_cast<volatile AtomicWordCastType *>(ptr), increment);
+}
+inline bool RefCountDecN(volatile AtomicWord *ptr, AtomicWord decrement) {
+  return base::RefCountDecN(
+      reinterpret_cast<volatile AtomicWordCastType *>(ptr), decrement);
+}
+inline void RefCountInc(volatile AtomicWord *ptr) {
+  base::RefCountIncN(ptr, 1);
+}
+inline bool RefCountDec(volatile AtomicWord *ptr) {
+  return base::RefCountDecN(ptr, 1);
+}
+inline bool RefCountIsOne(const volatile AtomicWord *ptr) {
+  return base::subtle::Acquire_Load(
+      reinterpret_cast<const volatile AtomicWordCastType *>(ptr)) == 1;
+}
+inline bool RefCountIsZero(const volatile AtomicWord *ptr) {
+  return base::subtle::Acquire_Load(
+      reinterpret_cast<const volatile AtomicWordCastType *>(ptr)) == 0;
+}
+#endif
+
+} // namespace base
+
+#endif  // BASE_ATOMIC_REFCOUNT_H_
diff --git a/be/src/gutil/cycleclock-inl.h b/be/src/gutil/cycleclock-inl.h
new file mode 100644
index 0000000000..063b397a36
--- /dev/null
+++ b/be/src/gutil/cycleclock-inl.h
@@ -0,0 +1,215 @@
+// Copyright (C) 1999-2007 Google, Inc.
+//
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+// All rights reserved.
+// Extracted from base/timer.h by jrvb
+
+// The implementation of CycleClock::Now()
+// See cycleclock.h
+//
+// IWYU pragma: private, include "base/cycleclock.h"
+
+// NOTE: only i386 and x86_64 have been well tested.
+// PPC, sparc, alpha, and ia64 are based on
+//    http://peter.kuscsik.com/wordpress/?p=14
+// with modifications by m3b.  See also
+//    https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
+
+#ifndef GUTIL_CYCLECLOCK_INL_H_
+#define GUTIL_CYCLECLOCK_INL_H_
+
+#include <sys/time.h>
+
+#include "gutil/port.h"
+#include "gutil/arm_instruction_set_select.h"
+
+// Please do not nest #if directives.  Keep one section, and one #if per
+// platform.
+
+// For historical reasons, the frequency on some platforms is scaled to be
+// close to the platform's core clock frequency.  This is not guaranteed by the
+// interface, and may change in future implementations.
+
+// ----------------------------------------------------------------
+#if defined(__APPLE__)
+#include <mach/mach_time.h>
+inline int64 CycleClock::Now() {
+  // this goes at the top because we need ALL Macs, regardless of
+  // architecture, to return the number of "mach time units" that
+  // have passed since startup.  See sysinfo.cc where
+  // InitializeSystemInfo() sets the supposed cpu clock frequency of
+  // macs to the number of mach time units per second, not actual
+  // CPU clock frequency (which can change in the face of CPU
+  // frequency scaling).  Also note that when the Mac sleeps, this
+  // counter pauses; it does not continue counting, nor does it
+  // reset to zero.
+  return mach_absolute_time();
+}
+
+// ----------------------------------------------------------------
+#elif defined(__i386__)
+inline int64 CycleClock::Now() {
+  int64 ret;
+  __asm__ volatile("rdtsc" : "=A" (ret));
+  return ret;
+}
+
+// ----------------------------------------------------------------
+#elif defined(__x86_64__) || defined(__amd64__)
+inline int64 CycleClock::Now() {
+  uint64 low, high;
+  __asm__ volatile("rdtsc" : "=a" (low), "=d" (high));
+  return (high << 32) | low;
+}
+
+// ----------------------------------------------------------------
+#elif defined(__powerpc__) || defined(__ppc__)
+#define SPR_TB 268
+#define SPR_TBU 269
+inline int64 CycleClock::Now() {
+  uint64 time_base_value;
+  if (sizeof(void*) == 8) {
+    // On PowerPC64, time base can be read with one SPR read.
+    asm volatile("mfspr %0, %1" : "=r" (time_base_value) : "i"(SPR_TB));
+  } else {
+    uint32 tbl, tbu0, tbu1;
+    asm volatile (" mfspr %0, %3\n"
+                  " mfspr %1, %4\n"
+                  " mfspr %2, %3\n" :
+                  "=r"(tbu0), "=r"(tbl), "=r"(tbu1) :
+                  "i"(SPR_TBU), "i"(SPR_TB));
+    // If there is a carry into the upper half, it is okay to return
+    // (tbu1, 0) since it must be between the 2 TBU reads.
+    tbl &= -static_cast<uint32>(tbu0 == tbu1);
+    // high 32 bits in tbu1; low 32 bits in tbl  (tbu0 is garbage)
+    time_base_value =
+        (static_cast<uint64>(tbu1) << 32) | static_cast<uint64>(tbl);
+  }
+  return static_cast<int64>(time_base_value);
+}
+
+// ----------------------------------------------------------------
+#elif defined(__sparc__)
+inline int64 CycleClock::Now() {
+  int64 tick;
+  asm(".byte 0x83, 0x41, 0x00, 0x00");
+  asm("mov   %%g1, %0" : "=r" (tick));
+  return tick;
+}
+
+// ----------------------------------------------------------------
+#elif defined(__ia64__)
+inline int64 CycleClock::Now() {
+  int64 itc;
+  asm("mov %0 = ar.itc" : "=r" (itc));
+  return itc;
+}
+
+// ----------------------------------------------------------------
+#elif defined(_MSC_VER) && defined(_M_IX86)
+inline int64 CycleClock::Now() {
+  // Older MSVC compilers (like 7.x) don't seem to support the
+  // __rdtsc intrinsic properly, so I prefer to use _asm instead
+  // when I know it will work.  Otherwise, I'll use __rdtsc and hope
+  // the code is being compiled with a non-ancient compiler.
+  _asm rdtsc
+}
+
+// ----------------------------------------------------------------
+#elif defined(_MSC_VER)
+// For MSVC, we want to use '_asm rdtsc' when possible (since it works
+// with even ancient MSVC compilers), and when not possible the
+// __rdtsc intrinsic, declared in <intrin.h>.  Unfortunately, in some
+// environments, <windows.h> and <intrin.h> have conflicting
+// declarations of some other intrinsics, breaking compilation.
+// Therefore, we simply declare __rdtsc ourselves. See also
+// http://connect.microsoft.com/VisualStudio/feedback/details/262047
+extern "C" uint64 __rdtsc();
+#pragma intrinsic(__rdtsc)
+inline int64 CycleClock::Now() {
+  return __rdtsc();
+}
+
+// ----------------------------------------------------------------
+#elif defined(ARMV6)  // V6 is the earliest arm that has a standard cyclecount
+#include "gutil/sysinfo.h"
+inline int64 CycleClock::Now() {
+  uint32 pmccntr;
+  uint32 pmuseren;
+  uint32 pmcntenset;
+  // Read the user mode perf monitor counter access permissions.
+  asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren));
+  if (pmuseren & 1) {  // Allows reading perfmon counters for user mode code.
+    asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset));
+    if (pmcntenset & 0x80000000ul) {  // Is it counting?
+      asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr));
+      // The counter is set up to count every 64th cycle
+      return static_cast<int64>(pmccntr) * 64;  // Should optimize to << 6
+    }
+  }
+  struct timeval tv;
+  gettimeofday(&tv, NULL);
+  return static_cast<int64>((tv.tv_sec + tv.tv_usec * 0.000001)
+                            * CyclesPerSecond());
+}
+
+// ----------------------------------------------------------------
+#elif defined(ARMV3)
+#include "gutil/sysinfo.h"   // for CyclesPerSecond()
+inline int64 CycleClock::Now() {
+  struct timeval tv;
+  gettimeofday(&tv, NULL);
+  return static_cast<int64>((tv.tv_sec + tv.tv_usec * 0.000001)
+                            * CyclesPerSecond());
+}
+
+// ----------------------------------------------------------------
+#elif defined(__mips__)
+#include "gutil/sysinfo.h"
+inline int64 CycleClock::Now() {
+  // mips apparently only allows rdtsc for superusers, so we fall
+  // back to gettimeofday.  It's possible clock_gettime would be better.
+  struct timeval tv;
+  gettimeofday(&tv, NULL);
+  return static_cast<int64>((tv.tv_sec + tv.tv_usec * 0.000001)
+                            * CyclesPerSecond());
+}
+
+// ----------------------------------------------------------------
+#elif defined(__aarch64__)
+#include "gutil/sysinfo.h"
+inline int64 CycleClock::Now() {
+  // System timer of ARMv8 runs at a different frequency than the CPU's.
+  // The frequency is fixed, typically in the range 1-50MHz.  It can be
+  // read at CNTFRQ special register.  We assume the OS has set up
+  // the virtual timer properly.
+  int64_t virtual_timer_value;
+  asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
+  return virtual_timer_value;
+}
+// ----------------------------------------------------------------
+#else
+// The soft failover to a generic implementation is automatic only for some
+// platforms.  For other platforms the developer is expected to make an attempt
+// to create a fast implementation and use generic version if nothing better is
+// available.
+#error You need to define CycleTimer for your O/S and CPU
+#endif
+
+#endif  // GUTIL_CYCLECLOCK_INL_H_
diff --git a/be/src/gutil/endian.h b/be/src/gutil/endian.h
index 6b8a0bc772..f957df74aa 100644
--- a/be/src/gutil/endian.h
+++ b/be/src/gutil/endian.h
@@ -1,10 +1,32 @@
-// Copyright 2005 Google, Inc
+// Copyright 2005 Google Inc.
+//
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+// ---
+//
 //
 // Utility functions that depend on bytesex. We define htonll and ntohll,
 // as well as "Google" versions of all the standards: ghtonl, ghtons, and
 // so on. These functions do exactly the same as their standard variants,
 // but don't require including the dangerous netinet/in.h.
-
+//
+// Buffer routines will copy to and from buffers without causing
+// a bus error when the architecture requires differnt byte alignments
 #ifndef UTIL_ENDIAN_ENDIAN_H_
 #define UTIL_ENDIAN_ENDIAN_H_
 
@@ -20,7 +42,7 @@ inline uint64 gbswap_64(uint64 host_int) {
   if (__builtin_constant_p(host_int)) {
     return __bswap_constant_64(host_int);
   } else {
-    register uint64 result;
+    uint64 result;
     __asm__("bswap %0" : "=r" (result) : "0" (host_int));
     return result;
   }
@@ -33,7 +55,7 @@ inline uint64 gbswap_64(uint64 host_int) {
 }
 
 inline unsigned __int128 gbswap_128(unsigned __int128 host_int) {
-    return static_cast<unsigned __int128>(bswap_64(static_cast<uint64>(host_int >> 64))) |
+  return static_cast<unsigned __int128>(bswap_64(static_cast<uint64>(host_int >> 64))) |
         (static_cast<unsigned __int128>(bswap_64(static_cast<uint64>(host_int))) << 64);
 }
 
@@ -58,24 +80,32 @@ inline uint64 ghtonll(uint64 x) { return gbswap_64(x); }
 
 #elif defined IS_BIG_ENDIAN
 
-// These definitions are a lot simpler on big-endian machines
-#define ghtons(x) (x)
-#define ghtonl(x) (x)
-#define ghtonll(x) (x)
+// These definitions are simpler on big-endian machines
+// These are functions instead of macros to avoid self-assignment warnings
+// on calls such as "i = ghtnol(i);".  This also provides type checking.
+inline uint16 ghtons(uint16 x) { return x; }
+inline uint32 ghtonl(uint32 x) { return x; }
+inline uint64 ghtonll(uint64 x) { return x; }
 
 #else
-#error "Unsupported bytesex: Either IS_BIG_ENDIAN or IS_LITTLE_ENDIAN must be defined"
+#error "Unsupported bytesex: Either IS_BIG_ENDIAN or IS_LITTLE_ENDIAN must be defined"  // NOLINT
 #endif  // bytesex
 
-// Convert to little-endian storage, opposite of network format.
-// Convert x from host to little endian: x = LittleEndian.FromHost(x);
-// convert x from little endian to host: x = LittleEndian.ToHost(x);
+// ntoh* and hton* are the same thing for any size and bytesex,
+// since the function is an involution, i.e., its own inverse.
+#define gntohl(x) ghtonl(x)
+#define gntohs(x) ghtons(x)
+#define gntohll(x) ghtonll(x)
+#if !defined(__APPLE__)
+// This one is safe to take as it's an extension
+#define htonll(x) ghtonll(x)
+#define ntohll(x) htonll(x)
+#endif
+
+// Utilities to convert numbers between the current hosts's native byte
+// order and little-endian byte order
 //
-//  Store values into unaligned memory converting to little endian order:
-//    LittleEndian.Store16(p, x);
-//
-//  Load unaligned values stored in little endian coverting to host order:
-//    x = LittleEndian.Load16(p);
+// Load/Store methods are alignment safe
 class LittleEndian {
  public:
   // Conversion functions.
@@ -90,6 +120,9 @@ class LittleEndian {
   static uint64 FromHost64(uint64 x) { return x; }
   static uint64 ToHost64(uint64 x) { return x; }
 
+  static unsigned __int128 FromHost128(unsigned __int128 x) { return x; }
+  static unsigned __int128 ToHost128(unsigned __int128 x) { return x; }
+
   static bool IsLittleEndian() { return true; }
 
 #elif defined IS_BIG_ENDIAN
@@ -183,162 +216,169 @@ class LittleEndian {
       return uint128(Load64VariableLength(p, len));
     } else {
       return uint128(
-          Load64VariableLength(static_cast<const char *>(p) + 8, len - 8),
-          Load64(p));
+        Load64VariableLength(static_cast<const char *>(p) + 8, len - 8),
+        Load64(p));
     }
   }
+
+  // Load & Store in machine's word size.
+  static uword_t LoadUnsignedWord(const void *p) {
+    if (sizeof(uword_t) == 8)
+      return Load64(p);
+    else
+      return Load32(p);
+  }
+
+  static void StoreUnsignedWord(void *p, uword_t v) {
+    if (sizeof(v) == 8)
+      Store64(p, v);
+    else
+      Store32(p, v);
+  }
 };
 
-
-// This one is safe to take as it's an extension
-#define htonll(x) ghtonll(x)
-
-// ntoh* and hton* are the same thing for any size and bytesex,
-// since the function is an involution, i.e., its own inverse.
-#define gntohl(x) ghtonl(x)
-#define gntohs(x) ghtons(x)
-#define gntohll(x) ghtonll(x)
-#define ntohll(x) htonll(x)
-
 // Utilities to convert numbers between the current hosts's native byte
 // order and big-endian byte order (same as network byte order)
 //
 // Load/Store methods are alignment safe
 class BigEndian {
-public:
+ public:
 #ifdef IS_LITTLE_ENDIAN
 
-    static uint16 FromHost16(uint16 x) { return bswap_16(x); }
-    static uint16 ToHost16(uint16 x) { return bswap_16(x); }
+  static uint16 FromHost16(uint16 x) { return bswap_16(x); }
+  static uint16 ToHost16(uint16 x) { return bswap_16(x); }
 
-    static uint32 FromHost24(uint32 x) { return bswap_24(x); }
-    static uint32 ToHost24(uint32 x) { return bswap_24(x); }
+  static uint32 FromHost24(uint32 x) { return bswap_24(x); }
+  static uint32 ToHost24(uint32 x) { return bswap_24(x); }
 
-    static uint32 FromHost32(uint32 x) { return bswap_32(x); }
-    static uint32 ToHost32(uint32 x) { return bswap_32(x); }
+  static uint32 FromHost32(uint32 x) { return bswap_32(x); }
+  static uint32 ToHost32(uint32 x) { return bswap_32(x); }
 
-    static uint64 FromHost64(uint64 x) { return gbswap_64(x); }
-    static uint64 ToHost64(uint64 x) { return gbswap_64(x); }
+  static uint64 FromHost64(uint64 x) { return gbswap_64(x); }
+  static uint64 ToHost64(uint64 x) { return gbswap_64(x); }
 
-    static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); }
-    static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); }
+  static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); }
+  static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); }
 
-    static bool IsLittleEndian() { return true; }
+  static bool IsLittleEndian() { return true; }
 
 #elif defined IS_BIG_ENDIAN
 
-    static uint16 FromHost16(uint16 x) { return x; }
-    static uint16 ToHost16(uint16 x) { return x; }
+  static uint16 FromHost16(uint16 x) { return x; }
+  static uint16 ToHost16(uint16 x) { return x; }
 
     static uint32 FromHost24(uint32 x) { return x; }
     static uint32 ToHost24(uint32 x) { return x; }
 
-    static uint32 FromHost32(uint32 x) { return x; }
-    static uint32 ToHost32(uint32 x) { return x; }
+  static uint32 FromHost32(uint32 x) { return x; }
+  static uint32 ToHost32(uint32 x) { return x; }
 
-    static uint64 FromHost64(uint64 x) { return x; }
-    static uint64 ToHost64(uint64 x) { return x; }
+  static uint64 FromHost64(uint64 x) { return x; }
+  static uint64 ToHost64(uint64 x) { return x; }
 
-    static uint128 FromHost128(uint128 x) { return x; }
-    static uint128 ToHost128(uint128 x) { return x; }
+  static uint128 FromHost128(uint128 x) { return x; }
+  static uint128 ToHost128(uint128 x) { return x; }
 
-    static bool IsLittleEndian() { return false; }
+  static bool IsLittleEndian() { return false; }
 
 #endif /* ENDIAN */
-    // Functions to do unaligned loads and stores in little-endian order.
-    static uint16 Load16(const void *p) {
-        return ToHost16(UNALIGNED_LOAD16(p));
-    }
+  // Functions to do unaligned loads and stores in little-endian order.
+  static uint16 Load16(const void *p) {
+    return ToHost16(UNALIGNED_LOAD16(p));
+  }
 
-    static void Store16(void *p, uint16 v) {
-        UNALIGNED_STORE16(p, FromHost16(v));
-    }
+  static void Store16(void *p, uint16 v) {
+    UNALIGNED_STORE16(p, FromHost16(v));
+  }
 
-    static uint32 Load32(const void *p) {
-        return ToHost32(UNALIGNED_LOAD32(p));
-    }
+  static uint32 Load32(const void *p) {
+    return ToHost32(UNALIGNED_LOAD32(p));
+  }
 
-    static void Store32(void *p, uint32 v) {
-        UNALIGNED_STORE32(p, FromHost32(v));
-    }
+  static void Store32(void *p, uint32 v) {
+    UNALIGNED_STORE32(p, FromHost32(v));
+  }
 
-    static uint64 Load64(const void *p) {
-        return ToHost64(UNALIGNED_LOAD64(p));
-    }
+  static uint64 Load64(const void *p) {
+    return ToHost64(UNALIGNED_LOAD64(p));
+  }
 
-    // Build a uint64 from 1-8 bytes.
-    // 8 * len least significant bits are loaded from the memory with
-    // BigEndian order. The 64 - 8 * len most significant bits are
-    // set all to 0.
-    // In latex-friendly words, this function returns:
-    //     $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned.
-    //
-    // This function is equivalent with:
-    // uint64 val = 0;
-    // memcpy(&val, p, len);
-    // return ToHost64(val);
-    // TODO(user): write a small benchmark and benchmark the speed
-    // of a memcpy based approach.
-    //
-    // For speed reasons this function does not work for len == 0.
-    // The caller needs to guarantee that 1 <= len <= 8.
-    static uint64 Load64VariableLength(const void * const p, int len) {
-        assert(len >= 1 && len <= 8);
-        uint64 val = Load64(p);
-        uint64 mask = 0;
-        --len;
-        do {
-            mask = (mask << 8) | 0xff;
-            // (--len >= 0) is about 10 % faster than (len--) in some benchmarks.
-        } while (--len >= 0);
-        return val & mask;
-    }
+  // Build a uint64 from 1-8 bytes.
+  // 8 * len least significant bits are loaded from the memory with
+  // BigEndian order. The 64 - 8 * len most significant bits are
+  // set all to 0.
+  // In latex-friendly words, this function returns:
+  //     $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned.
+  //
+  // This function is equivalent with:
+  // uint64 val = 0;
+  // memcpy(&val, p, len);
+  // return ToHost64(val);
+  // TODO(user): write a small benchmark and benchmark the speed
+  // of a memcpy based approach.
+  //
+  // For speed reasons this function does not work for len == 0.
+  // The caller needs to guarantee that 1 <= len <= 8.
+  static uint64 Load64VariableLength(const void * const p, int len) {
+    assert(len >= 1 && len <= 8);
+    uint64 val = Load64(p);
+    uint64 mask = 0;
+    --len;
+    do {
+      mask = (mask << 8) | 0xff;
+      // (--len >= 0) is about 10 % faster than (len--) in some benchmarks.
+    } while (--len >= 0);
+    return val & mask;
+  }
 
-    static void Store64(void *p, uint64 v) {
-        UNALIGNED_STORE64(p, FromHost64(v));
-    }
+  static void Store64(void *p, uint64 v) {
+    UNALIGNED_STORE64(p, FromHost64(v));
+  }
 
-    static uint128 Load128(const void *p) {
-        return uint128(
-            ToHost64(UNALIGNED_LOAD64(p)),
-            ToHost64(UNALIGNED_LOAD64(reinterpret_cast<const uint64 *>(p) + 1)));
-    }
+  static uint128 Load128(const void *p) {
+    return uint128(
+        ToHost64(UNALIGNED_LOAD64(p)),
+        ToHost64(UNALIGNED_LOAD64(reinterpret_cast<const uint64 *>(p) + 1)));
+  }
 
-    static void Store128(void *p, const uint128 v) {
-        UNALIGNED_STORE64(p, FromHost64(Uint128High64(v)));
-        UNALIGNED_STORE64(reinterpret_cast<uint64 *>(p) + 1,
-                          FromHost64(Uint128Low64(v)));
-    }
+  static void Store128(void *p, const uint128 v) {
+    UNALIGNED_STORE64(p, FromHost64(Uint128High64(v)));
+    UNALIGNED_STORE64(reinterpret_cast<uint64 *>(p) + 1,
+                FromHost64(Uint128Low64(v)));
+  }
 
-    // Build a uint128 from 1-16 bytes.
-    // 8 * len least significant bits are loaded from the memory with
-    // BigEndian order. The 128 - 8 * len most significant bits are
-    // set all to 0.
-    static uint128 Load128VariableLength(const void *p, int len) {
-        if (len <= 8) {
-            return uint128(Load64VariableLength(static_cast<const char *>(p)+8,
-                                                len));
-        } else {
-            return uint128(
-                Load64VariableLength(p, len-8),
-                Load64(static_cast<const char *>(p)+8));
-        }
+  // Build a uint128 from 1-16 bytes.
+  // 8 * len least significant bits are loaded from the memory with
+  // BigEndian order. The 128 - 8 * len most significant bits are
+  // set all to 0.
+  static uint128 Load128VariableLength(const void *p, int len) {
+    if (len <= 8) {
+      return uint128(Load64VariableLength(static_cast<const char *>(p)+8,
+                        len));
+    } else {
+      return uint128(
+        Load64VariableLength(p, len-8),
+        Load64(static_cast<const char *>(p)+8));
     }
+  }
 
-    // Load & Store in machine's word size.
-    static uword_t LoadUnsignedWord(const void *p) {
-        if (sizeof(uword_t) == 8)
-            return Load64(p);
-        else
-            return Load32(p);
-    }
+  // Load & Store in machine's word size.
+  static uword_t LoadUnsignedWord(const void *p) {
+    if (sizeof(uword_t) == 8)
+      return Load64(p);
+    else
+      return Load32(p);
+  }
 
-    static void StoreUnsignedWord(void *p, uword_t v) {
-        if (sizeof(uword_t) == 8)
-            Store64(p, v);
-        else
-            Store32(p, v);
-    }
+  static void StoreUnsignedWord(void *p, uword_t v) {
+    if (sizeof(uword_t) == 8)
+      Store64(p, v);
+    else
+      Store32(p, v);
+  }
 };  // BigEndian
 
+// Network byte order is big-endian
+typedef BigEndian NetworkByteOrder;
+
 #endif  // UTIL_ENDIAN_ENDIAN_H_
diff --git a/be/src/gutil/map-util.h b/be/src/gutil/map-util.h
new file mode 100644
index 0000000000..c42672fd5f
--- /dev/null
+++ b/be/src/gutil/map-util.h
@@ -0,0 +1,944 @@
+// Copyright 2005 Google Inc.
+//
+// #status: RECOMMENDED
+// #category: maps
+// #summary: Utility functions for use with map-like containers.
+//
+// This file provides utility functions for use with STL map-like data
+// structures, such as std::map and hash_map. Some functions will also work with
+// sets, such as ContainsKey().
+//
+// The main functions in this file fall into the following categories:
+//
+// - Find*()
+// - Contains*()
+// - Insert*()
+// - Lookup*()
+//
+// These functions often have "...OrDie" or "...OrDieNoPrint" variants. These
+// variants will crash the process with a CHECK() failure on error, including
+// the offending key/data in the log message. The NoPrint variants will not
+// include the key/data in the log output under the assumption that it's not a
+// printable type.
+//
+// Most functions are fairly self explanatory from their names, with the
+// exception of Find*() vs Lookup*(). The Find functions typically use the map's
+// .find() member function to locate and return the map's value type. The
+// Lookup*() functions typically use the map's .insert() (yes, insert) member
+// function to insert the given value if necessary and returns (usually a
+// reference to) the map's value type for the found item.
+//
+// See the per-function comments for specifics.
+//
+// There are also a handful of functions for doing other miscellaneous things.
+//
+// A note on terminology:
+//
+// Map-like containers are collections of pairs. Like all STL containers they
+// contain a few standard typedefs identifying the types of data they contain.
+// Given the following map declaration:
+//
+//   map<string, int> my_map;
+//
+// the notable typedefs would be as follows:
+//
+//   - key_type    -- string
+//   - value_type  -- pair<const string, int>
+//   - mapped_type -- int
+//
+// Note that the map above contains two types of "values": the key-value pairs
+// themselves (value_type) and the values within the key-value pairs
+// (mapped_type). A value_type consists of a key_type and a mapped_type.
+//
+// The documentation below is written for programmers thinking in terms of keys
+// and the (mapped_type) values associated with a given key.  For example, the
+// statement
+//
+//   my_map["foo"] = 3;
+//
+// has a key of "foo" (type: string) with a value of 3 (type: int).
+//
+
+#ifndef UTIL_GTL_MAP_UTIL_H_
+#define UTIL_GTL_MAP_UTIL_H_
+
+#include <stddef.h>
+
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include <glog/logging.h>
+
+#include "gutil/logging-inl.h"
+
+//
+// Find*()
+//
+
+// Returns a const reference to the value associated with the given key if it
+// exists. Crashes otherwise.
+//
+// This is intended as a replacement for operator[] as an rvalue (for reading)
+// when the key is guaranteed to exist.
+//
+// operator[] for lookup is discouraged for several reasons:
+//  * It has a side-effect of inserting missing keys
+//  * It is not thread-safe (even when it is not inserting, it can still
+//      choose to resize the underlying storage)
+//  * It invalidates iterators (when it chooses to resize)
+//  * It default constructs a value object even if it doesn't need to
+//
+// This version assumes the key is printable, and includes it in the fatal log
+// message.
+template <class Collection>
+const typename Collection::mapped_type&
+FindOrDie(const Collection& collection,
+          const typename Collection::key_type& key) {
+  auto it = collection.find(key);
+  CHECK(it != collection.end()) << "Map key not found: " << key;
+  return it->second;
+}
+
+// Same as above, but returns a non-const reference.
+template <class Collection>
+typename Collection::mapped_type&
+FindOrDie(Collection& collection,  // NOLINT
+          const typename Collection::key_type& key) {
+  auto it = collection.find(key);
+  CHECK(it != collection.end()) << "Map key not found: " << key;
+  return it->second;
+}
+
+// Same as FindOrDie above, but doesn't log the key on failure.
+template <class Collection>
+const typename Collection::mapped_type&
+FindOrDieNoPrint(const Collection& collection,
+                 const typename Collection::key_type& key) {
+  typename Collection::const_iterator it = collection.find(key);
+  CHECK(it != collection.end()) << "Map key not found";
+  return it->second;
+}
+
+// Same as above, but returns a non-const reference.
+template <class Collection>
+typename Collection::mapped_type&
+FindOrDieNoPrint(Collection& collection,  // NOLINT
+                 const typename Collection::key_type& key) {
+  typename Collection::iterator it = collection.find(key);
+  CHECK(it != collection.end()) << "Map key not found";
+  return it->second;
+}
+
+// Returns a const reference to the value associated with the given key if it
+// exists, otherwise a const reference to the provided default value is
+// returned.
+//
+// WARNING: If a temporary object is passed as the default "value," this
+// function will return a reference to that temporary object, which will be
+// destroyed by the end of the statement. Specifically, if you have a map with
+// string values, and you pass a char* as the default "value," either use the
+// returned value immediately or store it in a string (not string&). Details:
+template <class Collection>
+const typename Collection::mapped_type&
+FindWithDefault(const Collection& collection,
+                const typename Collection::key_type& key,
+                const typename Collection::mapped_type& value) {
+  auto it = collection.find(key);
+  if (it == collection.end()) {
+    return value;
+  }
+  return it->second;
+}
+
+// Returns a pointer to the const value associated with the given key if it
+// exists, or NULL otherwise.
+template <class Collection>
+const typename Collection::mapped_type*
+FindOrNull(const Collection& collection,
+           const typename Collection::key_type& key) {
+  auto it = collection.find(key);
+  if (it == collection.end()) {
+    return 0;
+  }
+  return &it->second;
+}
+
+// Same as above but returns a pointer to the non-const value.
+template <class Collection>
+typename Collection::mapped_type*
+FindOrNull(Collection& collection,  // NOLINT
+           const typename Collection::key_type& key) {
+  auto it = collection.find(key);
+  if (it == collection.end()) {
+    return 0;
+  }
+  return &it->second;
+}
+
+// Returns a pointer to the const value associated with the greatest key
+// that's less than or equal to the given key, or NULL if no such key exists.
+template <class Collection>
+const typename Collection::mapped_type*
+FindFloorOrNull(const Collection& collection,
+                const typename Collection::key_type& key) {
+  auto it = collection.upper_bound(key);
+  if (it == collection.begin()) {
+    return 0;
+  }
+  return &(--it)->second;
+}
+
+// Same as above but returns a pointer to the non-const value.
+template <class Collection>
+typename Collection::mapped_type*
+FindFloorOrNull(Collection& collection,  // NOLINT
+                const typename Collection::key_type& key) {
+  auto it = collection.upper_bound(key);
+  if (it == collection.begin()) {
+    return 0;
+  }
+  return &(--it)->second;
+}
+
+// Returns a const-reference to the value associated with the greatest key
+// that's less than or equal to the given key, or crashes if it does not exist.
+template <class Collection>
+const typename Collection::mapped_type&
+FindFloorOrDie(const Collection& collection,
+               const typename Collection::key_type& key) {
+  auto it = collection.upper_bound(key);
+  CHECK(it != collection.begin());
+  return (--it)->second;
+}
+
+// Same as above, but returns a non-const reference.
+template <class Collection>
+typename Collection::mapped_type&
+FindFloorOrDie(Collection& collection,
+               const typename Collection::key_type& key) {
+  auto it = collection.upper_bound(key);
+  CHECK(it != collection.begin());
+  return (--it)->second;
+}
+
+// Returns the pointer value associated with the given key. If none is found,
+// NULL is returned. The function is designed to be used with a map of keys to
+// pointers.
+//
+// This function does not distinguish between a missing key and a key mapped
+// to a NULL value.
+template <class Collection>
+typename Collection::mapped_type
+FindPtrOrNull(const Collection& collection,
+              const typename Collection::key_type& key) {
+  auto it = collection.find(key);
+  if (it == collection.end()) {
+    return typename Collection::mapped_type(0);
+  }
+  return it->second;
+}
+
+// Same as above, except takes non-const reference to collection.
+//
+// This function is needed for containers that propagate constness to the
+// pointee, such as boost::ptr_map.
+template <class Collection>
+typename Collection::mapped_type
+FindPtrOrNull(Collection& collection,  // NOLINT
+              const typename Collection::key_type& key) {
+  auto it = collection.find(key);
+  if (it == collection.end()) {
+    return typename Collection::mapped_type(0);
+  }
+  return it->second;
+}
+
+// FindPtrOrNull like function for maps whose value is a smart pointer like shared_ptr or
+// unique_ptr.
+// Returns the raw pointer contained in the smart pointer for the first found key, if it exists,
+// or null if it doesn't.
+template <class Collection>
+typename Collection::mapped_type::element_type*
+FindPointeeOrNull(const Collection& collection,  // NOLINT,
+                  const typename Collection::key_type& key) {
+  auto it = collection.find(key);
+  if (it == collection.end()) {
+    return nullptr;
+  }
+  return it->second.get();
+}
+
+// Finds the value associated with the given key and copies it to *value (if not
+// NULL). Returns false if the key was not found, true otherwise.
+template <class Collection, class Key, class Value>
+bool FindCopy(const Collection& collection,
+              const Key& key,
+              Value* const value) {
+  auto it = collection.find(key);
+  if (it == collection.end()) {
+    return false;
+  }
+  if (value) {
+    *value = it->second;
+  }
+  return true;
+}
+
+//
+// Contains*()
+//
+
+// Returns true iff the given collection contains the given key.
+template <class Collection, class Key>
+bool ContainsKey(const Collection& collection, const Key& key) {
+  return collection.find(key) != collection.end();
+}
+
+// Returns true iff the given collection contains the given key-value pair.
+template <class Collection, class Key, class Value>
+bool ContainsKeyValuePair(const Collection& collection,
+                          const Key& key,
+                          const Value& value) {
+  typedef typename Collection::const_iterator const_iterator;
+  std::pair<const_iterator, const_iterator> range = collection.equal_range(key);
+  for (const_iterator it = range.first; it != range.second; ++it) {
+    if (it->second == value) {
+      return true;
+    }
+  }
+  return false;
+}
+
+//
+// Insert*()
+//
+
+// Inserts the given key-value pair into the collection. Returns true if the
+// given key didn't previously exist. If the given key already existed in the
+// map, its value is changed to the given "value" and false is returned.
+template <class Collection>
+bool InsertOrUpdate(Collection* const collection,
+                    const typename Collection::value_type& vt) {
+  std::pair<typename Collection::iterator, bool> ret = collection->insert(vt);
+  if (!ret.second) {
+    // update
+    ret.first->second = vt.second;
+    return false;
+  }
+  return true;
+}
+
+// Same as above, except that the key and value are passed separately.
+template <class Collection>
+bool InsertOrUpdate(Collection* const collection,
+                    const typename Collection::key_type& key,
+                    const typename Collection::mapped_type& value) {
+  return InsertOrUpdate(
+      collection, typename Collection::value_type(key, value));
+}
+
+// Inserts/updates all the key-value pairs from the range defined by the
+// iterators "first" and "last" into the given collection.
+template <class Collection, class InputIterator>
+void InsertOrUpdateMany(Collection* const collection,
+                        InputIterator first, InputIterator last) {
+  for (; first != last; ++first) {
+    InsertOrUpdate(collection, *first);
+  }
+}
+
+// Change the value associated with a particular key in a map or hash_map
+// of the form map<Key, Value*> which owns the objects pointed to by the
+// value pointers.  If there was an existing value for the key, it is deleted.
+// True indicates an insert took place, false indicates an update + delete.
+template <class Collection>
+bool InsertAndDeleteExisting(
+    Collection* const collection,
+    const typename Collection::key_type& key,
+    const typename Collection::mapped_type& value) {
+  std::pair<typename Collection::iterator, bool> ret =
+      collection->insert(typename Collection::value_type(key, value));
+  if (!ret.second) {
+    delete ret.first->second;
+    ret.first->second = value;
+    return false;
+  }
+  return true;
+}
+
+// Inserts the given key and value into the given collection iff the given key
+// did NOT already exist in the collection. If the key previously existed in the
+// collection, the value is not changed. Returns true if the key-value pair was
+// inserted; returns false if the key was already present.
+template <class Collection>
+bool InsertIfNotPresent(Collection* const collection,
+                        const typename Collection::value_type& vt) {
+  return collection->insert(vt).second;
+}
+
+// Same as above except the key and value are passed separately.
+template <class Collection>
+bool InsertIfNotPresent(
+    Collection* const collection,
+    const typename Collection::key_type& key,
+    const typename Collection::mapped_type& value) {
+  return InsertIfNotPresent(
+      collection, typename Collection::value_type(key, value));
+}
+
+// Same as above except dies if the key already exists in the collection.
+template <class Collection>
+void InsertOrDie(Collection* const collection,
+                 const typename Collection::value_type& value) {
+  CHECK(InsertIfNotPresent(collection, value)) << "duplicate value: " << value;
+}
+
+// Same as above except doesn't log the value on error.
+template <class Collection>
+void InsertOrDieNoPrint(Collection* const collection,
+                        const typename Collection::value_type& value) {
+  CHECK(InsertIfNotPresent(collection, value)) << "duplicate value.";
+}
+
+// Inserts the key-value pair into the collection. Dies if key was already
+// present.
+template <class Collection>
+void InsertOrDie(Collection* const collection,
+                 const typename Collection::key_type& key,
+                 const typename Collection::mapped_type& data) {
+  CHECK(InsertIfNotPresent(collection, key, data))
+      << "duplicate key: " << key;
+}
+
+// Same as above except deson't log the key on error.
+template <class Collection>
+void InsertOrDieNoPrint(
+    Collection* const collection,
+    const typename Collection::key_type& key,
+    const typename Collection::mapped_type& data) {
+  CHECK(InsertIfNotPresent(collection, key, data)) << "duplicate key.";
+}
+
+// Inserts a new key and default-initialized value. Dies if the key was already
+// present. Returns a reference to the value. Example usage:
+//
+// map<int, SomeProto> m;
+// SomeProto& proto = InsertKeyOrDie(&m, 3);
+// proto.set_field("foo");
+template <class Collection>
+typename Collection::mapped_type& InsertKeyOrDie(
+    Collection* const collection,
+    const typename Collection::key_type& key) {
+  typedef typename Collection::value_type value_type;
+  std::pair<typename Collection::iterator, bool> res =
+      collection->insert(value_type(key, typename Collection::mapped_type()));
+  CHECK(res.second) << "duplicate key: " << key;
+  return res.first->second;
+}
+
+//
+// Emplace*()
+//
+template <class Collection, class... Args>
+bool EmplaceIfNotPresent(Collection* const collection,
+                         Args&&... args) {
+  return collection->emplace(std::forward<Args>(args)...).second;
+}
+
+// Emplaces the given key-value pair into the collection. Returns true if the
+// given key didn't previously exist. If the given key already existed in the
+// map, its value is changed to the given "value" and false is returned.
+template <class Collection>
+bool EmplaceOrUpdate(Collection* const collection,
+                     const typename Collection::key_type& key,
+                     typename Collection::mapped_type&& value) {
+  typedef typename Collection::mapped_type mapped_type;
+  auto it = collection->find(key);
+  if (it == collection->end()) {
+    collection->emplace(key, std::forward<mapped_type>(value));
+    return true;
+  }
+  it->second = std::forward<mapped_type>(value);
+  return false;
+}
+
+template <class Collection, class... Args>
+void EmplaceOrDie(Collection* const collection,
+                  Args&&... args) {
+  CHECK(EmplaceIfNotPresent(collection, std::forward<Args>(args)...))
+      << "duplicate value";
+}
+
+//
+// Lookup*()
+//
+
+// Looks up a given key and value pair in a collection and inserts the key-value
+// pair if it's not already present. Returns a reference to the value associated
+// with the key.
+template <class Collection>
+typename Collection::mapped_type&
+LookupOrInsert(Collection* const collection,
+               const typename Collection::value_type& vt) {
+  return collection->insert(vt).first->second;
+}
+
+// Same as above except the key-value are passed separately.
+template <class Collection>
+typename Collection::mapped_type&
+LookupOrInsert(Collection* const collection,
+               const typename Collection::key_type& key,
+               const typename Collection::mapped_type& value) {
+  return LookupOrInsert(
+      collection, typename Collection::value_type(key, value));
+}
+
+// It's similar to LookupOrInsert() but uses the emplace and r-value mechanics
+// to achieve the desired results. The constructor of the new element is called
+// with exactly the same arguments as supplied to emplace, forwarded via
+// std::forward<Args>(args). The element may be constructed even if there
+// already is an element with the same key in the container, in which case the
+// newly constructed element will be destroyed immediately.
+// For details, see
+//   https://en.cppreference.com/w/cpp/container/map/emplace
+//   https://en.cppreference.com/w/cpp/container/unordered_map/emplace
+template <class Collection, class... Args>
+typename Collection::mapped_type&
+LookupOrEmplace(Collection* const collection, Args&&... args) {
+  return collection->emplace(std::forward<Args>(args)...).first->second;
+}
+
+// Counts the number of equivalent elements in the given "sequence", and stores
+// the results in "count_map" with element as the key and count as the value.
+//
+// Example:
+//   vector<string> v = {"a", "b", "c", "a", "b"};
+//   map<string, int> m;
+//   AddTokenCounts(v, 1, &m);
+//   assert(m["a"] == 2);
+//   assert(m["b"] == 2);
+//   assert(m["c"] == 1);
+template <typename Sequence, typename Collection>
+void AddTokenCounts(
+    const Sequence& sequence,
+    const typename Collection::mapped_type& increment,
+    Collection* const count_map) {
+  for (typename Sequence::const_iterator it = sequence.begin();
+       it != sequence.end(); ++it) {
+    typename Collection::mapped_type& value =
+        LookupOrInsert(count_map, *it,
+                       typename Collection::mapped_type());
+    value += increment;
+  }
+}
+
+// Helpers for LookupOrInsertNew(), needed to create a new value type when the
+// type itself is a pointer, i.e., these extract the actual type from a pointer.
+template <class T>
+void MapUtilAssignNewDefaultInstance(T** location) {
+  *location = new T();
+}
+
+template <class T, class Arg>
+void MapUtilAssignNewInstance(T** location, const Arg &arg) {
+  *location = new T(arg);
+}
+
+// Returns a reference to the value associated with key. If not found, a value
+// is default constructed on the heap and added to the map.
+//
+// This function is useful for containers of the form map<Key, Value*>, where
+// inserting a new key, value pair involves constructing a new heap-allocated
+// Value, and storing a pointer to that in the collection.
+template <class Collection>
+typename Collection::mapped_type&
+LookupOrInsertNew(Collection* const collection,
+                  const typename Collection::key_type& key) {
+  std::pair<typename Collection::iterator, bool> ret =
+      collection->insert(
+          typename Collection::value_type(key,
+              static_cast<typename Collection::mapped_type>(NULL)));
+  if (ret.second) {
+    // This helper is needed to 'extract' the Value type from the type of the
+    // container value, which is (Value*).
+    MapUtilAssignNewDefaultInstance(&(ret.first->second));
+  }
+  return ret.first->second;
+}
+
+// Same as above but constructs the value using the single-argument constructor
+// and the given "arg".
+template <class Collection, class Arg>
+typename Collection::mapped_type&
+LookupOrInsertNew(Collection* const collection,
+                  const typename Collection::key_type& key,
+                  const Arg& arg) {
+  std::pair<typename Collection::iterator, bool> ret =
+      collection->insert(
+          typename Collection::value_type(
+              key,
+              static_cast<typename Collection::mapped_type>(NULL)));
+  if (ret.second) {
+    // This helper is needed to 'extract' the Value type from the type of the
+    // container value, which is (Value*).
+    MapUtilAssignNewInstance(&(ret.first->second), arg);
+  }
+  return ret.first->second;
+}
+
+// Lookup of linked/shared pointers is used in two scenarios:
+//
+// Use LookupOrInsertSharedPtr if the container does not own the elements
+// for their whole lifetime. This is typically the case when a reader allows
+// parallel updates to the container. In this case a Mutex only needs to lock
+// container operations, but all element operations must be performed on the
+// shared pointer. Finding an element must be performed using FindPtr*() and
+// cannot be done with FindLinkedPtr*() even though it compiles.
+
+// Lookup a key in a map or hash_map whose values are shared_ptrs.  If it is
+// missing, set collection[key].reset(new Value::element_type). Unlike
+// LookupOrInsertNewLinkedPtr, this function returns the shared_ptr instead of
+// the raw pointer. Value::element_type must be default constructable.
+template <class Collection>
+typename Collection::mapped_type&
+LookupOrInsertNewSharedPtr(
+    Collection* const collection,
+    const typename Collection::key_type& key) {
+  typedef typename Collection::mapped_type SharedPtr;
+  typedef typename Collection::mapped_type::element_type Element;
+  std::pair<typename Collection::iterator, bool> ret =
+      collection->insert(typename Collection::value_type(key, SharedPtr()));
+  if (ret.second) {
+    ret.first->second.reset(new Element());
+  }
+  return ret.first->second;
+}
+
+// A variant of LookupOrInsertNewSharedPtr where the value is constructed using
+// a single-parameter constructor.  Note: the constructor argument is computed
+// even if it will not be used, so only values cheap to compute should be passed
+// here.  On the other hand it does not matter how expensive the construction of
+// the actual stored value is, as that only occurs if necessary.
+template <class Collection, class Arg>
+typename Collection::mapped_type&
+LookupOrInsertNewSharedPtr(
+    Collection* const collection,
+    const typename Collection::key_type& key,
+    const Arg& arg) {
+  typedef typename Collection::mapped_type SharedPtr;
+  typedef typename Collection::mapped_type::element_type Element;
+  std::pair<typename Collection::iterator, bool> ret =
+      collection->insert(typename Collection::value_type(key, SharedPtr()));
+  if (ret.second) {
+    ret.first->second.reset(new Element(arg));
+  }
+  return ret.first->second;
+}
+
+//
+// Misc Utility Functions
+//
+
+// Updates the value associated with the given key. If the key was not already
+// present, then the key-value pair are inserted and "previous" is unchanged. If
+// the key was already present, the value is updated and "*previous" will
+// contain a copy of the old value.
+//
+// InsertOrReturnExisting has complementary behavior that returns the
+// address of an already existing value, rather than updating it.
+template <class Collection>
+bool UpdateReturnCopy(Collection* const collection,
+                      const typename Collection::key_type& key,
+                      const typename Collection::mapped_type& value,
+                      typename Collection::mapped_type* previous) {
+  std::pair<typename Collection::iterator, bool> ret =
+      collection->insert(typename Collection::value_type(key, value));
+  if (!ret.second) {
+    // update
+    if (previous) {
+      *previous = ret.first->second;
+    }
+    ret.first->second = value;
+    return true;
+  }
+  return false;
+}
+
+// Same as above except that the key and value are passed as a pair.
+template <class Collection>
+bool UpdateReturnCopy(Collection* const collection,
+                      const typename Collection::value_type& vt,
+                      typename Collection::mapped_type* previous) {
+  std::pair<typename Collection::iterator, bool> ret =
+    collection->insert(vt);
+  if (!ret.second) {
+    // update
+    if (previous) {
+      *previous = ret.first->second;
+    }
+    ret.first->second = vt.second;
+    return true;
+  }
+  return false;
+}
+
+// Tries to insert the given key-value pair into the collection. Returns NULL if
+// the insert succeeds. Otherwise, returns a pointer to the existing value.
+//
+// This complements UpdateReturnCopy in that it allows to update only after
+// verifying the old value and still insert quickly without having to look up
+// twice. Unlike UpdateReturnCopy this also does not come with the issue of an
+// undefined previous* in case new data was inserted.
+template <class Collection>
+typename Collection::mapped_type* const
+InsertOrReturnExisting(Collection* const collection,
+                       const typename Collection::value_type& vt) {
+  std::pair<typename Collection::iterator, bool> ret = collection->insert(vt);
+  if (ret.second) {
+    return NULL;  // Inserted, no existing previous value.
+  } else {
+    return &ret.first->second;  // Return address of already existing value.
+  }
+}
+
+// Same as above, except for explicit key and data.
+template <class Collection>
+typename Collection::mapped_type* const
+InsertOrReturnExisting(
+    Collection* const collection,
+    const typename Collection::key_type& key,
+    const typename Collection::mapped_type& data) {
+  return InsertOrReturnExisting(collection,
+                                typename Collection::value_type(key, data));
+}
+
+// Saves the reverse mapping into reverse. Key/value pairs are inserted in the
+// order the iterator returns them.
+template <class Collection, class ReverseCollection>
+void ReverseMap(const Collection& collection,
+                ReverseCollection* const reverse) {
+  CHECK(reverse != NULL);
+  for (typename Collection::const_iterator it = collection.begin();
+       it != collection.end();
+       ++it) {
+    InsertOrUpdate(reverse, it->second, it->first);
+  }
+}
+
+// Erases the collection item identified by the given key, and returns the value
+// associated with that key. It is assumed that the value (i.e., the
+// mapped_type) is a pointer. Returns NULL if the key was not found in the
+// collection.
+//
+// Examples:
+//   map<string, MyType*> my_map;
+//
+// One line cleanup:
+//     delete EraseKeyReturnValuePtr(&my_map, "abc");
+//
+// Use returned value:
+//     gscoped_ptr<MyType> value_ptr(EraseKeyReturnValuePtr(&my_map, "abc"));
+//     if (value_ptr.get())
+//       value_ptr->DoSomething();
+//
+// Note: if 'collection' is a multimap, this will only erase and return the
+// first value.
+template <class Collection>
+typename Collection::mapped_type EraseKeyReturnValuePtr(
+    Collection* const collection,
+    const typename Collection::key_type& key) {
+  auto it = collection->find(key);
+  if (it == collection->end()) {
+    return typename Collection::mapped_type();
+  }
+  typename Collection::mapped_type v = std::move(it->second);
+  collection->erase(it);
+  return v;
+}
+
+// Inserts all the keys from map_container into key_container, which must
+// support insert(MapContainer::key_type).
+//
+// Note: any initial contents of the key_container are not cleared.
+template <class MapContainer, class KeyContainer>
+void InsertKeysFromMap(const MapContainer& map_container,
+                       KeyContainer* key_container) {
+  CHECK(key_container != NULL);
+  for (typename MapContainer::const_iterator it = map_container.begin();
+       it != map_container.end(); ++it) {
+    key_container->insert(it->first);
+  }
+}
+
+// Appends all the keys from map_container into key_container, which must
+// support push_back(MapContainer::key_type).
+//
+// Note: any initial contents of the key_container are not cleared.
+template <class MapContainer, class KeyContainer>
+void AppendKeysFromMap(const MapContainer& map_container,
+                       KeyContainer* key_container) {
+  CHECK(key_container != NULL);
+  for (typename MapContainer::const_iterator it = map_container.begin();
+       it != map_container.end(); ++it) {
+    key_container->push_back(it->first);
+  }
+}
+
+// A more specialized overload of AppendKeysFromMap to optimize reallocations
+// for the common case in which we're appending keys to a vector and hence can
+// (and sometimes should) call reserve() first.
+//
+// (It would be possible to play SFINAE games to call reserve() for any
+// container that supports it, but this seems to get us 99% of what we need
+// without the complexity of a SFINAE-based solution.)
+template <class MapContainer, class KeyType>
+void AppendKeysFromMap(const MapContainer& map_container,
+                       std::vector<KeyType>* key_container) {
+  CHECK(key_container != NULL);
+  // We now have the opportunity to call reserve(). Calling reserve() every
+  // time is a bad idea for some use cases: libstdc++'s implementation of
+  // vector<>::reserve() resizes the vector's backing store to exactly the
+  // given size (unless it's already at least that big). Because of this,
+  // the use case that involves appending a lot of small maps (total size
+  // N) one by one to a vector would be O(N^2). But never calling reserve()
+  // loses the opportunity to improve the use case of adding from a large
+  // map to an empty vector (this improves performance by up to 33%). A
+  // number of heuristics are possible; see the discussion in
+  // cl/34081696. Here we use the simplest one.
+  if (key_container->empty()) {
+    key_container->reserve(map_container.size());
+  }
+  for (typename MapContainer::const_iterator it = map_container.begin();
+       it != map_container.end(); ++it) {
+    key_container->push_back(it->first);
+  }
+}
+
+// Inserts all the values from map_container into value_container, which must
+// support push_back(MapContainer::mapped_type).
+//
+// Note: any initial contents of the value_container are not cleared.
+template <class MapContainer, class ValueContainer>
+void AppendValuesFromMap(const MapContainer& map_container,
+                         ValueContainer* value_container) {
+  CHECK(value_container != NULL);
+  for (typename MapContainer::const_iterator it = map_container.begin();
+       it != map_container.end(); ++it) {
+    value_container->push_back(it->second);
+  }
+}
+
+template <class MapContainer, class ValueContainer>
+void EmplaceValuesFromMap(MapContainer&& map_container,
+                          ValueContainer* value_container) {
+  CHECK(value_container != nullptr);
+  // See AppendKeysFromMap for why this is done.
+  if (value_container->empty()) {
+    value_container->reserve(map_container.size());
+  }
+  for (auto&& entry : map_container) {
+    value_container->emplace_back(std::move(entry.second));
+  }
+}
+
+// A more specialized overload of AppendValuesFromMap to optimize reallocations
+// for the common case in which we're appending values to a vector and hence
+// can (and sometimes should) call reserve() first.
+//
+// (It would be possible to play SFINAE games to call reserve() for any
+// container that supports it, but this seems to get us 99% of what we need
+// without the complexity of a SFINAE-based solution.)
+template <class MapContainer, class ValueType>
+void AppendValuesFromMap(const MapContainer& map_container,
+                         std::vector<ValueType>* value_container) {
+  EmplaceValuesFromMap(map_container, value_container);
+}
+
+// Compute and insert new value if it's absent from the map. Return a pair with a reference to the
+// value and a bool indicating whether it was absent at first.
+//
+// This inspired on a similar java construct (url split in two lines):
+// https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/ConcurrentHashMap.html
+// #computeIfAbsent-K-java.util.function.Function
+//
+// It takes a reference to the key and a lambda function. If the key exists in the map, returns
+// a pair with a pointer to the current value and 'false'. If the key does not exist in the map,
+// it uses the lambda function to create a value, inserts it into the map, and returns a pair with
+// a pointer to the new value and 'true'.
+//
+// Example usage:
+//
+// auto result = ComputeIfAbsentReturnAbsense(&my_collection,
+//                                            my_key,
+//                                            [] { return new_value; });
+// MyValue* const value = result.first;
+// if (result.second) ....
+//
+// The ComputePair* variants expect a lambda that creates a pair<k, v>. This
+// can be useful if the key is a StringPiece pointing to external state to
+// avoid excess memory for the keys, while being safer in multi-threaded
+// contexts, e.g. in case the key goes out of scope before the container does.
+//
+// Example usage:
+//
+// map<StringPiece, int, GoodFastHash<StringPiece>> string_to_idx;
+// vector<unique_ptr<StringPB>> pbs;
+// auto result = ComputePairIfAbsentReturnAbsense(&string_to_idx, my_key,
+//     [&]() {
+//       unique_ptr<StringPB> s = new StringPB();
+//       s->set_string(my_key);
+//       int idx = pbs.size();
+//       pbs.emplace_back(s.release());
+//       return make_pair(StringPiece(pbs.back()->string()), idx);
+//     });
+template <class MapContainer, typename Function>
+std::pair<typename MapContainer::mapped_type* const, bool>
+ComputePairIfAbsentReturnAbsense(MapContainer* container,
+                                 const typename MapContainer::key_type& key,
+                                 Function compute_pair_func) {
+  typename MapContainer::iterator iter = container->find(key);
+  bool new_value = iter == container->end();
+  if (new_value) {
+    auto p = compute_pair_func();
+    std::pair<typename MapContainer::iterator, bool> result =
+        container->emplace(std::move(p.first), std::move(p.second));
+    DCHECK(result.second) << "duplicate key: " << key;
+    iter = result.first;
+  }
+  return std::make_pair(&iter->second, new_value);
+}
+template <class MapContainer, typename Function>
+std::pair<typename MapContainer::mapped_type* const, bool>
+ComputeIfAbsentReturnAbsense(MapContainer* container,
+                             const typename MapContainer::key_type& key,
+                             Function compute_func) {
+  return ComputePairIfAbsentReturnAbsense(container, key, [&key, &compute_func] {
+    return std::make_pair(key, compute_func());
+  });
+};
+
+// Like the above but doesn't return a pair, just returns a pointer to the value.
+// Example usage:
+//
+// MyValue* const value = ComputeIfAbsent(&my_collection,
+//                                        my_key,
+//                                        [] { return new_value; });
+//
+template <class MapContainer, typename Function>
+typename MapContainer::mapped_type* const
+ComputeIfAbsent(MapContainer* container,
+                const typename MapContainer::key_type& key,
+                Function compute_func) {
+  return ComputeIfAbsentReturnAbsense(container, key, compute_func).first;
+};
+
+template <class MapContainer, typename Function>
+typename MapContainer::mapped_type* const
+ComputePairIfAbsent(MapContainer* container,
+                    const typename MapContainer::key_type& key,
+                    Function compute_pair_func) {
+  return ComputePairIfAbsentReturnAbsense<MapContainer, Function>(container, key, compute_pair_func).first;
+};
+
+#endif  // UTIL_GTL_MAP_UTIL_H_
diff --git a/be/src/gutil/ref_counted.cc b/be/src/gutil/ref_counted.cc
new file mode 100644
index 0000000000..280d9df25e
--- /dev/null
+++ b/be/src/gutil/ref_counted.cc
@@ -0,0 +1,95 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "gutil/ref_counted.h"
+
+#include <common/logging.h>
+#include "gutil/atomic_refcount.h"
+
+namespace doris {
+
+namespace subtle {
+
+RefCountedBase::RefCountedBase()
+    : ref_count_(0)
+#ifndef NDEBUG
+    , in_dtor_(false)
+#endif
+    {
+}
+
+RefCountedBase::~RefCountedBase() {
+#ifndef NDEBUG
+  DCHECK(in_dtor_) << "RefCounted object deleted without calling Release()";
+#endif
+}
+
+void RefCountedBase::AddRef() const {
+  // TODO(maruel): Add back once it doesn't assert 500 times/sec.
+  // Current thread books the critical section "AddRelease" without release it.
+  // DFAKE_SCOPED_LOCK_THREAD_LOCKED(add_release_);
+#ifndef NDEBUG
+  DCHECK(!in_dtor_);
+#endif
+  ++ref_count_;
+}
+
+bool RefCountedBase::Release() const {
+  // TODO(maruel): Add back once it doesn't assert 500 times/sec.
+  // Current thread books the critical section "AddRelease" without release it.
+  // DFAKE_SCOPED_LOCK_THREAD_LOCKED(add_release_);
+#ifndef NDEBUG
+  DCHECK(!in_dtor_);
+#endif
+  if (--ref_count_ == 0) {
+#ifndef NDEBUG
+    in_dtor_ = true;
+#endif
+    return true;
+  }
+  return false;
+}
+
+bool RefCountedThreadSafeBase::HasOneRef() const {
+  return base::RefCountIsOne(
+      &const_cast<RefCountedThreadSafeBase*>(this)->ref_count_);
+}
+
+RefCountedThreadSafeBase::RefCountedThreadSafeBase() : ref_count_(0) {
+#ifndef NDEBUG
+  in_dtor_ = false;
+#endif
+}
+
+RefCountedThreadSafeBase::~RefCountedThreadSafeBase() {
+#ifndef NDEBUG
+  DCHECK(in_dtor_) << "RefCountedThreadSafe object deleted without "
+                      "calling Release()";
+#endif
+}
+
+void RefCountedThreadSafeBase::AddRef() const {
+#ifndef NDEBUG
+  DCHECK(!in_dtor_);
+#endif
+  base::RefCountInc(&ref_count_);
+}
+
+bool RefCountedThreadSafeBase::Release() const {
+#ifndef NDEBUG
+  DCHECK(!in_dtor_);
+  DCHECK(!base::RefCountIsZero(&ref_count_));
+#endif
+  if (!base::RefCountDec(&ref_count_)) {
+#ifndef NDEBUG
+    in_dtor_ = true;
+#endif
+    return true;
+  }
+  return false;
+}
+
+}  // namespace subtle
+
+}  // namespace doris
diff --git a/be/src/gutil/ref_counted.h b/be/src/gutil/ref_counted.h
new file mode 100644
index 0000000000..5cd73b262c
--- /dev/null
+++ b/be/src/gutil/ref_counted.h
@@ -0,0 +1,365 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_MEMORY_REF_COUNTED_H_
+#define BASE_MEMORY_REF_COUNTED_H_
+
+#include <cassert>
+#include <cstddef>
+#include <utility>  // IWYU pragma: keep
+
+#include "gutil/atomicops.h"
+#include "gutil/macros.h"
+#include "gutil/threading/thread_collision_warner.h"
+
+namespace doris {
+namespace subtle {
+
+typedef Atomic32 AtomicRefCount;
+
+class RefCountedBase {
+ public:
+  bool HasOneRef() const { return ref_count_ == 1; }
+
+ protected:
+  RefCountedBase();
+  ~RefCountedBase();
+
+  void AddRef() const;
+
+  // Returns true if the object should self-delete.
+  bool Release() const;
+
+ private:
+  mutable int ref_count_;
+#ifndef NDEBUG
+  mutable bool in_dtor_;
+#endif
+
+  DFAKE_MUTEX(add_release_);
+
+  DISALLOW_COPY_AND_ASSIGN(RefCountedBase);
+};
+
+class RefCountedThreadSafeBase {
+ public:
+  bool HasOneRef() const;
+
+ protected:
+  RefCountedThreadSafeBase();
+  ~RefCountedThreadSafeBase();
+
+  void AddRef() const;
+
+  // Returns true if the object should self-delete.
+  bool Release() const;
+
+ private:
+  mutable AtomicRefCount ref_count_;
+#ifndef NDEBUG
+  mutable bool in_dtor_;
+#endif
+
+  DISALLOW_COPY_AND_ASSIGN(RefCountedThreadSafeBase);
+};
+
+}  // namespace subtle
+
+//
+// A base class for reference counted classes.  Otherwise, known as a cheap
+// knock-off of WebKit's RefCounted<T> class.  To use this guy just extend your
+// class from it like so:
+//
+//   class MyFoo : public RefCounted<MyFoo> {
+//    ...
+//    private:
+//     friend class RefCounted<MyFoo>;
+//     ~MyFoo();
+//   };
+//
+// You should always make your destructor private, to avoid any code deleting
+// the object accidently while there are references to it.
+template <class T>
+class RefCounted : public subtle::RefCountedBase {
+ public:
+  RefCounted() {}
+
+  void AddRef() const {
+    subtle::RefCountedBase::AddRef();
+  }
+
+  void Release() const {
+    if (subtle::RefCountedBase::Release()) {
+      delete static_cast<const T*>(this);
+    }
+  }
+
+ protected:
+  ~RefCounted() {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(RefCounted<T>);
+};
+
+// Forward declaration.
+template <class T, typename Traits> class RefCountedThreadSafe;
+
+// Default traits for RefCountedThreadSafe<T>.  Deletes the object when its ref
+// count reaches 0.  Overload to delete it on a different thread etc.
+template<typename T>
+struct DefaultRefCountedThreadSafeTraits {
+  static void Destruct(const T* x) {
+    // Delete through RefCountedThreadSafe to make child classes only need to be
+    // friend with RefCountedThreadSafe instead of this struct, which is an
+    // implementation detail.
+    RefCountedThreadSafe<T,
+                         DefaultRefCountedThreadSafeTraits>::DeleteInternal(x);
+  }
+};
+
+//
+// A thread-safe variant of RefCounted<T>
+//
+//   class MyFoo : public RefCountedThreadSafe<MyFoo> {
+//    ...
+//   };
+//
+// If you're using the default trait, then you should add compile time
+// asserts that no one else is deleting your object.  i.e.
+//    private:
+//     friend class RefCountedThreadSafe<MyFoo>;
+//     ~MyFoo();
+template <class T, typename Traits = DefaultRefCountedThreadSafeTraits<T> >
+class RefCountedThreadSafe : public subtle::RefCountedThreadSafeBase {
+ public:
+  RefCountedThreadSafe() {}
+
+  void AddRef() const {
+    subtle::RefCountedThreadSafeBase::AddRef();
+  }
+
+  void Release() const {
+    if (subtle::RefCountedThreadSafeBase::Release()) {
+      Traits::Destruct(static_cast<const T*>(this));
+    }
+  }
+
+ protected:
+  ~RefCountedThreadSafe() {}
+
+ private:
+  friend struct DefaultRefCountedThreadSafeTraits<T>;
+  static void DeleteInternal(const T* x) { delete x; }
+
+  DISALLOW_COPY_AND_ASSIGN(RefCountedThreadSafe);
+};
+
+//
+// A thread-safe wrapper for some piece of data so we can place other
+// things in scoped_refptrs<>.
+//
+template<typename T>
+class RefCountedData
+    : public doris::RefCountedThreadSafe< doris::RefCountedData<T> > {
+ public:
+  RefCountedData() : data() {}
+  RefCountedData(const T& in_value) : data(in_value) {}
+
+  T data;
+
+ private:
+  friend class doris::RefCountedThreadSafe<doris::RefCountedData<T> >;
+  ~RefCountedData() {}
+};
+
+}  // namespace doris
+
+//
+// A smart pointer class for reference counted objects.  Use this class instead
+// of calling AddRef and Release manually on a reference counted object to
+// avoid common memory leaks caused by forgetting to Release an object
+// reference.  Sample usage:
+//
+//   class MyFoo : public RefCounted<MyFoo> {
+//    ...
+//   };
+//
+//   void some_function() {
+//     scoped_refptr<MyFoo> foo = new MyFoo();
+//     foo->Method(param);
+//     // |foo| is released when this function returns
+//   }
+//
+//   void some_other_function() {
+//     scoped_refptr<MyFoo> foo = new MyFoo();
+//     ...
+//     foo = NULL;  // explicitly releases |foo|
+//     ...
+//     if (foo)
+//       foo->Method(param);
+//   }
+//
+// The above examples show how scoped_refptr<T> acts like a pointer to T.
+// Given two scoped_refptr<T> classes, it is also possible to exchange
+// references between the two objects, like so:
+//
+//   {
+//     scoped_refptr<MyFoo> a = new MyFoo();
+//     scoped_refptr<MyFoo> b;
+//
+//     b.swap(a);
+//     // now, |b| references the MyFoo object, and |a| references NULL.
+//   }
+//
+// To make both |a| and |b| in the above example reference the same MyFoo
+// object, simply use the assignment operator:
+//
+//   {
+//     scoped_refptr<MyFoo> a = new MyFoo();
+//     scoped_refptr<MyFoo> b;
+//
+//     b = a;
+//     // now, |a| and |b| each own a reference to the same MyFoo object.
+//   }
+//
+template <class T>
+class scoped_refptr {
+ public:
+  typedef T element_type;
+
+  scoped_refptr() : ptr_(NULL) {
+  }
+
+  scoped_refptr(T* p) : ptr_(p) {
+    if (ptr_)
+      ptr_->AddRef();
+  }
+
+  // Copy constructor.
+  scoped_refptr(const scoped_refptr<T>& r) : ptr_(r.ptr_) {
+    if (ptr_)
+      ptr_->AddRef();
+  }
+
+  // Copy conversion constructor.
+  template <typename U>
+  scoped_refptr(const scoped_refptr<U>& r) : ptr_(r.get()) {
+    if (ptr_)
+      ptr_->AddRef();
+  }
+
+  // Move constructor. This is required in addition to the conversion
+  // constructor below in order for clang to warn about pessimizing moves.
+  scoped_refptr(scoped_refptr&& r) noexcept : ptr_(r.get()) { // NOLINT
+    r.ptr_ = nullptr;
+  }
+
+  // Move conversion constructor.
+  template <typename U>
+  scoped_refptr(scoped_refptr<U>&& r) noexcept : ptr_(r.get()) { // NOLINT
+    r.ptr_ = nullptr;
+  }
+
+  ~scoped_refptr() {
+    if (ptr_)
+      ptr_->Release();
+  }
+
+  T* get() const { return ptr_; }
+
+// The following is disabled in Kudu's version of this file since it's
+// relatively dangerous. Chromium is planning on doing the same in their
+// tree, but hasn't done so yet. See http://code.google.com/p/chromium/issues/detail?id=110610
+#if SCOPED_REFPTR_ALLOW_IMPLICIT_CONVERSION_TO_PTR
+  // Allow scoped_refptr<C> to be used in boolean expression
+  // and comparison operations.
+  operator T*() const { return ptr_; }
+#else
+  typedef T* scoped_refptr::*Testable;
+  operator Testable() const { return ptr_ ? &scoped_refptr::ptr_ : NULL; }
+#endif
+
+  T* operator->() const {
+    assert(ptr_ != NULL);
+    return ptr_;
+  }
+
+  scoped_refptr<T>& operator=(T* p) {
+    // AddRef first so that self assignment should work
+    if (p)
+      p->AddRef();
+    T* old_ptr = ptr_;
+    ptr_ = p;
+    if (old_ptr)
+      old_ptr->Release();
+    return *this;
+  }
+
+  scoped_refptr<T>& operator=(const scoped_refptr<T>& r) {
+    return *this = r.ptr_;
+  }
+
+  template <typename U>
+  scoped_refptr<T>& operator=(const scoped_refptr<U>& r) {
+    return *this = r.get();
+  }
+
+  scoped_refptr<T>& operator=(scoped_refptr<T>&& r) {
+    scoped_refptr<T>(std::move(r)).swap(*this);
+    return *this;
+  }
+
+  template <typename U>
+  scoped_refptr<T>& operator=(scoped_refptr<U>&& r) {
+    scoped_refptr<T>(std::move(r)).swap(*this);
+    return *this;
+  }
+
+  void swap(T** pp) {
+    T* p = ptr_;
+    ptr_ = *pp;
+    *pp = p;
+  }
+
+  void swap(scoped_refptr<T>& r) {
+    swap(&r.ptr_);
+  }
+
+  // Like gscoped_ptr::reset(), drops a reference on the currently held object
+  // (if any), and adds a reference to the passed-in object (if not NULL).
+  void reset(T* p = NULL) {
+    *this = p;
+  }
+
+ protected:
+  T* ptr_;
+
+ private:
+  template <typename U> friend class scoped_refptr;
+};
+
+// Handy utility for creating a scoped_refptr<T> out of a T* explicitly without
+// having to retype all the template arguments
+template <typename T>
+scoped_refptr<T> make_scoped_refptr(T* t) {
+  return scoped_refptr<T>(t);
+}
+
+// equal_to and hash implementations for templated scoped_refptrs suitable for
+// use with STL unordered_* containers.
+template <class T>
+struct ScopedRefPtrEqualToFunctor {
+  bool operator()(const scoped_refptr<T>& x, const scoped_refptr<T>& y) const {
+    return x.get() == y.get();
+  }
+};
+
+template <class T>
+struct ScopedRefPtrHashFunctor {
+  size_t operator()(const scoped_refptr<T>& p) const {
+    return reinterpret_cast<size_t>(p.get());
+  }
+};
+
+#endif  // BASE_MEMORY_REF_COUNTED_H_
diff --git a/be/src/gutil/sysinfo-test.cc b/be/src/gutil/sysinfo-test.cc
new file mode 100644
index 0000000000..85ccd11448
--- /dev/null
+++ b/be/src/gutil/sysinfo-test.cc
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gutil/sysinfo.h"
+
+#include <gtest/gtest.h>
+
+namespace doris {
+
+TEST(SysInfoTest, ReadMaxCpuIndexTest) {
+  using base::ParseMaxCpuIndex;
+  EXPECT_EQ(0, ParseMaxCpuIndex("0\n"));
+  EXPECT_EQ(1, ParseMaxCpuIndex("1\n"));
+  EXPECT_EQ(7, ParseMaxCpuIndex("0-7\n"));
+  EXPECT_EQ(40, ParseMaxCpuIndex("0-7,30-40\n"));
+  EXPECT_EQ(143, ParseMaxCpuIndex("2,4-127,128-143\n"));
+  EXPECT_EQ(44, ParseMaxCpuIndex("44-44\n"));
+
+  // Don't assume that ranges are in ascending order or non-overlapping,
+  // just in case.
+  EXPECT_EQ(8, ParseMaxCpuIndex("0-7,5-8\n"));
+  EXPECT_EQ(7, ParseMaxCpuIndex("0-7,5-6\n"));
+  EXPECT_EQ(3, ParseMaxCpuIndex("2-3,0-1\n"));
+  EXPECT_EQ(3, ParseMaxCpuIndex("2-3,0\n"));
+  EXPECT_EQ(3, ParseMaxCpuIndex("3,0-2\n"));
+
+  // Invalid inputs.
+  EXPECT_EQ(-1, ParseMaxCpuIndex(""));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("\n"));
+  EXPECT_EQ(-1, ParseMaxCpuIndex(" "));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("a\n"));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("0\n1\n"));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("\n1\n"));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("\n1"));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("0-\n"));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("-2\n"));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("1-9qwerty\n"));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("1-9,0-\n"));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,3-\n"));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,a-4\n"));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,3@4\n"));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,\n"));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("3-2\n"));
+
+  // Overflows in various positions.
+  EXPECT_EQ(-1, ParseMaxCpuIndex("2147483648")); // 2^31
+  EXPECT_EQ(-1, ParseMaxCpuIndex("18446744073709551617")); // 2^64 + 1
+  EXPECT_EQ(-1, ParseMaxCpuIndex("999999999999999999999999999999999999999999999999"));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("0-2147483648")); // 2^31
+  EXPECT_EQ(-1, ParseMaxCpuIndex("0-18446744073709551617")); // 2^64 + 1
+  EXPECT_EQ(-1, ParseMaxCpuIndex("0-999999999999999999999999999999999999999999999999"));
+  EXPECT_EQ(-1, ParseMaxCpuIndex("2147483648-1")); // 2^31
+  EXPECT_EQ(-1, ParseMaxCpuIndex("18446744073709551617-1")); // 2^64 + 1
+  EXPECT_EQ(-1, ParseMaxCpuIndex("999999999999999999999999999999999999999999999999-1"));
+}
+
+} // namespace doris
diff --git a/be/src/gutil/sysinfo.cc b/be/src/gutil/sysinfo.cc
new file mode 100644
index 0000000000..4365b18ff8
--- /dev/null
+++ b/be/src/gutil/sysinfo.cc
@@ -0,0 +1,474 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#if (defined(_WIN32) || defined(__MINGW32__)) && !defined(__CYGWIN__) && !defined(__CYGWIN32)
+# define PLATFORM_WINDOWS 1
+#endif
+
+#include <ctype.h>
+#include <fcntl.h>    // for open()
+#include <unistd.h>   // for read()
+
+#if defined __MACH__          // Mac OS X, almost certainly
+#include <sys/sysctl.h>       // how we figure out numcpu's on OS X
+#include <sys/types.h>
+#elif defined __FreeBSD__
+#include <sys/sysctl.h>
+#elif defined __sun__         // Solaris
+#include <procfs.h>           // for, e.g., prmap_t
+#elif defined(PLATFORM_WINDOWS)
+#include <process.h>          // for getpid() (actually, _getpid())
+#include <shlwapi.h>          // for SHGetValueA()
+#include <tlhelp32.h>         // for Module32First()
+#endif
+
+#include "gutil/sysinfo.h"
+
+#include <algorithm>
+#include <cerrno>    // for errno
+#include <cstdio>    // for snprintf(), sscanf()
+#include <cstdlib>   // for getenv()
+#include <cstring>   // for memmove(), memchr(), etc.
+#include <ctime>
+#include <limits>
+#include <ostream>
+
+#include <glog/logging.h>
+
+#include "gutil/dynamic_annotations.h"   // for RunningOnValgrind
+#include "gutil/integral_types.h"
+#include "gutil/macros.h"
+#include "gutil/port.h"
+#include "gutil/walltime.h"
+
+using std::numeric_limits;
+
+// This isn't in the 'base' namespace in tcmallc. But, tcmalloc
+// exports these functions, so we need to namespace them to avoid
+// the conflict.
+namespace base {
+
+// ----------------------------------------------------------------------
+// CyclesPerSecond()
+// NumCPUs()
+//    It's important this not call malloc! -- they may be called at
+//    global-construct time, before we've set up all our proper malloc
+//    hooks and such.
+// ----------------------------------------------------------------------
+
+static double cpuinfo_cycles_per_second = 1.0;  // 0.0 might be dangerous
+static int cpuinfo_num_cpus = 1;  // Conservative guess
+static int cpuinfo_max_cpu_index = -1;
+
+void SleepForNanoseconds(int64_t nanoseconds) {
+  // Sleep for nanosecond duration
+  struct timespec sleep_time;
+  sleep_time.tv_sec = nanoseconds / 1000 / 1000 / 1000;
+  sleep_time.tv_nsec = (nanoseconds % (1000 * 1000 * 1000));
+  while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
+    ;  // Ignore signals and wait for the full interval to elapse.
+}
+
+void SleepForMilliseconds(int64_t milliseconds) {
+  SleepForNanoseconds(milliseconds * 1000 * 1000);
+}
+
+// Helper function estimates cycles/sec by observing cycles elapsed during
+// sleep(). Using small sleep time decreases accuracy significantly.
+static int64 EstimateCyclesPerSecond(const int estimate_time_ms) {
+  CHECK(estimate_time_ms > 0);
+  if (estimate_time_ms <= 0)
+    return 1;
+  double multiplier = 1000.0 / (double)estimate_time_ms;  // scale by this much
+
+  const int64 start_ticks = CycleClock::Now();
+  SleepForMilliseconds(estimate_time_ms);
+  const int64 guess = int64(multiplier * (CycleClock::Now() - start_ticks));
+  return guess;
+}
+
+// ReadIntFromFile is only called on linux and cygwin platforms.
+#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+
+// Slurp a file with a single read() call into 'buf'. This is only safe to use on small
+// files in places like /proc where we are guaranteed not to get a partial read.
+// Any remaining bytes in the buffer are zeroed.
+//
+// 'buflen' must be more than large enough to hold the whole file, or else this will
+// issue a FATAL error.
+static bool SlurpSmallTextFile(const char* file, char* buf, int buflen) {
+  bool ret = false;
+  int fd;
+  RETRY_ON_EINTR(fd, open(file, O_RDONLY));
+  if (fd == -1) return ret;
+
+  memset(buf, '\0', buflen);
+  int n;
+  RETRY_ON_EINTR(n, read(fd, buf, buflen - 1));
+  CHECK_NE(n, buflen - 1) << "buffer of len " << buflen << " not large enough to store "
+                          << "contents of " << file;
+  if (n > 0) {
+    ret = true;
+  }
+
+  int close_ret;
+  RETRY_ON_EINTR(close_ret, close(fd));
+  if (PREDICT_FALSE(close_ret != 0)) {
+    PLOG(WARNING) << "Failed to close fd " << fd;
+  }
+
+  return ret;
+}
+
+// Helper function for reading an int from a file. Returns true if successful
+// and the memory location pointed to by value is set to the value read.
+static bool ReadIntFromFile(const char *file, int *value) {
+  char line[1024];
+  if (!SlurpSmallTextFile(file, line, arraysize(line))) {
+    return false;
+  }
+  char* err;
+  const int temp_value = strtol(line, &err, 10);
+  if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
+    *value = temp_value;
+    return true;
+  }
+  return false;
+}
+
+static int ReadMaxCPUIndex() {
+  char buf[1024];
+  // TODO(tarmstrong): KUDU-2730: 'present' doesn't include CPUs that could be hotplugged
+  // in the future. 'possible' does, but using it instead could result in a blow-up in the
+  // number of per-CPU data structures.
+  CHECK(SlurpSmallTextFile("/sys/devices/system/cpu/present", buf, arraysize(buf)));
+  int max_idx = ParseMaxCpuIndex(buf);
+  CHECK_GE(max_idx, 0) << "unable to parse max CPU index from: " << buf;
+  return max_idx;
+}
+
+int ParseMaxCpuIndex(const char* str) {
+  DCHECK(str != nullptr);
+  const char* pos = str;
+  // Initialize max_idx to invalid so we can just return if we find zero ranges.
+  int max_idx = -1;
+
+  while (true) {
+    const char* range_start = pos;
+    const char* dash = nullptr;
+    // Scan forward until we find the separator indicating end of range, which is always a
+    // newline or comma if the input is valid.
+    for (; *pos != ',' && *pos != '\n'; pos++) {
+      // Check for early end of string - bail here to avoid advancing past end.
+      if (*pos == '\0') return -1;
+      if (*pos == '-') {
+        // Multiple dashes in range is invalid.
+        if (dash != nullptr) return -1;
+        dash = pos;
+      } else if (!isdigit(*pos)) {
+        return -1;
+      }
+    }
+
+    // At this point we found a range [range_start, pos) comprised of digits and an
+    // optional dash.
+    const char* num_start = dash == nullptr ? range_start : dash + 1;
+    // Check for ranges with missing numbers, e.g. "", "3-", "-3".
+    if (num_start == pos || dash == range_start) return -1;
+    // The numbers are comprised only of digits, so it can only fail if it is out of
+    // range of int (the return type of this function).
+    unsigned long start_idx = strtoul(range_start, nullptr, 10);
+    if (start_idx > numeric_limits<int>::max()) return -1;
+    unsigned long end_idx = strtoul(num_start, nullptr, 10);
+    if (end_idx > numeric_limits<int>::max() || start_idx > end_idx) {
+      return -1;
+    }
+    // Keep track of the max index we've seen so far.
+    max_idx = std::max(static_cast<int>(end_idx), max_idx);
+    // End of line, expect no more input.
+    if (*pos == '\n') break;
+    ++pos;
+  }
+  // String must have a single newline at the very end.
+  if (*pos != '\n' || *(pos + 1) != '\0') return -1;
+  return max_idx;
+}
+
+#endif
+
+// WARNING: logging calls back to InitializeSystemInfo() so it must
+// not invoke any logging code.  Also, InitializeSystemInfo() can be
+// called before main() -- in fact it *must* be since already_called
+// isn't protected -- before malloc hooks are properly set up, so
+// we make an effort not to call any routines which might allocate
+// memory.
+
+static void InitializeSystemInfo() {
+  static bool already_called = false;   // safe if we run before threads
+  if (already_called)  return;
+  already_called = true;
+
+  bool saw_mhz = false;
+
+  if (RunningOnValgrind()) {
+    // Valgrind may slow the progress of time artificially (--scale-time=N
+    // option). We thus can't rely on CPU Mhz info stored in /sys or /proc
+    // files. Thus, actually measure the cps.
+    cpuinfo_cycles_per_second = EstimateCyclesPerSecond(100);
+    saw_mhz = true;
+  }
+
+#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+  char line[1024];
+  char* err;
+  int freq;
+
+  // If the kernel is exporting the tsc frequency use that. There are issues
+  // where cpuinfo_max_freq cannot be relied on because the BIOS may be
+  // exporintg an invalid p-state (on x86) or p-states may be used to put the
+  // processor in a new mode (turbo mode). Essentially, those frequencies
+  // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
+  // well.
+  if (!saw_mhz &&
+      ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
+      // The value is in kHz (as the file name suggests).  For example, on a
+      // 2GHz warpstation, the file contains the value "2000000".
+      cpuinfo_cycles_per_second = freq * 1000.0;
+      saw_mhz = true;
+  }
+
+  // If CPU scaling is in effect, we want to use the *maximum* frequency,
+  // not whatever CPU speed some random processor happens to be using now.
+  if (!saw_mhz &&
+      ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
+                      &freq)) {
+    // The value is in kHz.  For example, on a 2GHz machine, the file
+    // contains the value "2000000".
+    cpuinfo_cycles_per_second = freq * 1000.0;
+    saw_mhz = true;
+  }
+
+  // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
+  const char* pname = "/proc/cpuinfo";
+  int fd;
+  RETRY_ON_EINTR(fd, open(pname, O_RDONLY));
+  if (fd == -1) {
+    PLOG(FATAL) << "Unable to read CPU info from /proc. procfs must be mounted.";
+  }
+
+  double bogo_clock = 1.0;
+  bool saw_bogo = false;
+  int num_cpus = 0;
+  line[0] = line[1] = '\0';
+  int chars_read = 0;
+  do {   // we'll exit when the last read didn't read anything
+    // Move the next line to the beginning of the buffer
+    const int oldlinelen = strlen(line);
+    if (sizeof(line) == oldlinelen + 1)    // oldlinelen took up entire line
+      line[0] = '\0';
+    else                                   // still other lines left to save
+      memmove(line, line + oldlinelen+1, sizeof(line) - (oldlinelen+1));
+    // Terminate the new line, reading more if we can't find the newline
+    char* newline = strchr(line, '\n');
+    if (newline == NULL) {
+      const int linelen = strlen(line);
+      const int bytes_to_read = sizeof(line)-1 - linelen;
+      CHECK(bytes_to_read > 0);  // because the memmove recovered >=1 bytes
+      RETRY_ON_EINTR(chars_read, read(fd, line + linelen, bytes_to_read));
+      line[linelen + chars_read] = '\0';
+      newline = strchr(line, '\n');
+    }
+    if (newline != NULL)
+      *newline = '\0';
+
+#if defined(__powerpc__) || defined(__ppc__)
+    // PowerPC cpus report the frequency in "clock" line
+    if (strncasecmp(line, "clock", sizeof("clock")-1) == 0) {
+      const char* freqstr = strchr(line, ':');
+      if (freqstr) {
+        // PowerPC frequencies are only reported as MHz (check 'show_cpuinfo'
+        // function at arch/powerpc/kernel/setup-common.c)
+        char *endp = strstr(line, "MHz");
+        if (endp) {
+          *endp = 0;
+          cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0;
+          if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
+            saw_mhz = true;
+        }
+      }
+#else
+    // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
+    // accept postive values. Some environments (virtual machines) report zero,
+    // which would cause infinite looping in WallTime_Init.
+    if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) {
+      const char* freqstr = strchr(line, ':');
+      if (freqstr) {
+        cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0;
+        if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
+          saw_mhz = true;
+      }
+    } else if (strncasecmp(line, "bogomips", sizeof("bogomips")-1) == 0) {
+      const char* freqstr = strchr(line, ':');
+      if (freqstr) {
+        bogo_clock = strtod(freqstr+1, &err) * 1000000.0;
+        if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
+          saw_bogo = true;
+      }
+#endif
+    } else if (strncasecmp(line, "processor", sizeof("processor")-1) == 0) {
+      num_cpus++;  // count up every time we see an "processor :" entry
+    }
+  } while (chars_read > 0);
+  int ret;
+  RETRY_ON_EINTR(ret, close(fd));
+  if (PREDICT_FALSE(ret != 0)) {
+    PLOG(WARNING) << "Failed to close fd " << fd;
+  }
+
+  if (!saw_mhz) {
+    if (saw_bogo) {
+      // If we didn't find anything better, we'll use bogomips, but
+      // we're not happy about it.
+      cpuinfo_cycles_per_second = bogo_clock;
+    } else {
+      // If we don't even have bogomips, we'll use the slow estimation.
+      cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
+    }
+  }
+  if (cpuinfo_cycles_per_second == 0.0) {
+    cpuinfo_cycles_per_second = 1.0;   // maybe unnecessary, but safe
+  }
+  if (num_cpus > 0) {
+    cpuinfo_num_cpus = num_cpus;
+  }
+  cpuinfo_max_cpu_index = ReadMaxCPUIndex();
+
+#elif defined __FreeBSD__
+  // For this sysctl to work, the machine must be configured without
+  // SMP, APIC, or APM support.  hz should be 64-bit in freebsd 7.0
+  // and later.  Before that, it's a 32-bit quantity (and gives the
+  // wrong answer on machines faster than 2^32 Hz).  See
+  //  http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
+  // But also compare FreeBSD 7.0:
+  //  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
+  //  231         error = sysctl_handle_quad(oidp, &freq, 0, req);
+  // To FreeBSD 6.3 (it's the same in 6-STABLE):
+  //  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
+  //  139         error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
+#if __FreeBSD__ >= 7
+  uint64_t hz = 0;
+#else
+  unsigned int hz = 0;
+#endif
+  size_t sz = sizeof(hz);
+  const char *sysctl_path = "machdep.tsc_freq";
+  if ( sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0 ) {
+    fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
+            sysctl_path, strerror(errno));
+    cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
+  } else {
+    cpuinfo_cycles_per_second = hz;
+  }
+  // TODO(csilvers): also figure out cpuinfo_num_cpus
+
+#elif defined(PLATFORM_WINDOWS)
+# pragma comment(lib, "shlwapi.lib")  // for SHGetValue()
+  // In NT, read MHz from the registry. If we fail to do so or we're in win9x
+  // then make a crude estimate.
+  OSVERSIONINFO os;
+  os.dwOSVersionInfoSize = sizeof(os);
+  DWORD data, data_size = sizeof(data);
+  if (GetVersionEx(&os) &&
+      os.dwPlatformId == VER_PLATFORM_WIN32_NT &&
+      SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE,
+                         "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
+                           "~MHz", NULL, &data, &data_size)))
+    cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz
+  else
+    cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500?
+
+  // Get the number of processors.
+  SYSTEM_INFO info;
+  GetSystemInfo(&info);
+  cpuinfo_num_cpus = info.dwNumberOfProcessors;
+
+#elif defined(__MACH__) && defined(__APPLE__)
+  // returning "mach time units" per second. the current number of elapsed
+  // mach time units can be found by calling uint64 mach_absolute_time();
+  // while not as precise as actual CPU cycles, it is accurate in the face
+  // of CPU frequency scaling and multi-cpu/core machines.
+  // Our mac users have these types of machines, and accuracy
+  // (i.e. correctness) trumps precision.
+  // See cycleclock.h: CycleClock::Now(), which returns number of mach time
+  // units on Mac OS X.
+  mach_timebase_info_data_t timebase_info;
+  mach_timebase_info(&timebase_info);
+  double mach_time_units_per_nanosecond =
+      static_cast<double>(timebase_info.denom) /
+      static_cast<double>(timebase_info.numer);
+  cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9;
+
+  int num_cpus = 0;
+  size_t size = sizeof(num_cpus);
+  int numcpus_name[] = { CTL_HW, HW_NCPU };
+  if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, nullptr, 0)
+      == 0
+      && (size == sizeof(num_cpus)))
+    cpuinfo_num_cpus = num_cpus;
+
+#else
+  // Generic cycles per second counter
+  cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
+#endif
+
+  // On platforms where we can't determine the max CPU index, just use the
+  // number of CPUs. This might break if CPUs are taken offline, but
+  // better than a wild guess.
+  if (cpuinfo_max_cpu_index < 0) {
+    cpuinfo_max_cpu_index = cpuinfo_num_cpus - 1;
+  }
+}
+
+double CyclesPerSecond(void) {
+  InitializeSystemInfo();
+  return cpuinfo_cycles_per_second;
+}
+
+int NumCPUs(void) {
+  InitializeSystemInfo();
+  return cpuinfo_num_cpus;
+}
+
+int MaxCPUIndex(void) {
+  InitializeSystemInfo();
+  return cpuinfo_max_cpu_index;
+}
+
+} // namespace base
diff --git a/be/src/gutil/sysinfo.h b/be/src/gutil/sysinfo.h
new file mode 100644
index 0000000000..d46cfe5504
--- /dev/null
+++ b/be/src/gutil/sysinfo.h
@@ -0,0 +1,69 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef _SYSINFO_H_
+#define _SYSINFO_H_
+
+#include <cstdint>
+
+namespace base {
+
+// Return the number of online CPUs. This is computed and cached the first time this or
+// NumCPUs() is called, so does not reflect any CPUs enabled or disabled at a later
+// point in time.
+//
+// Note that, if not all CPUs are online, this may return a value lower than the maximum
+// value of sched_getcpu().
+extern int NumCPUs();
+
+// Return the maximum CPU index that may be returned by sched_getcpu(). For example, on
+// an 8-core machine, this will return '7' even if some of the CPUs have been disabled.
+extern int MaxCPUIndex();
+
+void SleepForNanoseconds(int64_t nanoseconds);
+void SleepForMilliseconds(int64_t milliseconds);
+
+// processor cycles per second of each processor.  Thread-safe.
+extern double CyclesPerSecond(void);
+
+// Parse the maximum CPU index from 'str'. The list is in the format of the CPU lists
+// under /sys/devices/system/cpu/, e.g. /sys/devices/system/cpu/present. Returns the
+// index of the max CPU or -1 if the string could not be parsed.
+// Examples of the format and the expected output include:
+// * "0\n" -> 0
+// * "0-8\n" -> 8
+// * "0-15,32-47\n" -> 47
+// * "2,4-127,128-143\n" -> 143
+// Ref: https://www.kernel.org/doc/Documentation/cputopology.txt
+// Exposed for testing.
+extern int ParseMaxCpuIndex(const char* str);
+
+} // namespace base
+#endif   /* #ifndef _SYSINFO_H_ */
diff --git a/be/src/gutil/threading/thread_collision_warner.cc b/be/src/gutil/threading/thread_collision_warner.cc
new file mode 100644
index 0000000000..26329cfb9c
--- /dev/null
+++ b/be/src/gutil/threading/thread_collision_warner.cc
@@ -0,0 +1,93 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "gutil/threading/thread_collision_warner.h"
+
+#ifdef __linux__
+#include <syscall.h>
+#else
+#include <sys/syscall.h>
+#endif
+
+#include <unistd.h>
+
+#include <cstdint>
+#include <ostream>
+
+#include <glog/logging.h>
+
+namespace base {
+
+void DCheckAsserter::warn(int64_t previous_thread_id, int64_t current_thread_id) {
+  LOG(FATAL) << "Thread Collision! Previous thread id: " << previous_thread_id
+             << ", current thread id: " << current_thread_id;
+}
+
+#if 0
+// Original source from Chromium -- we didn't import their threading library
+// into Kudu source as of yet
+
+static subtle::Atomic32 CurrentThread() {
+  const PlatformThreadId current_thread_id = PlatformThread::CurrentId();
+  // We need to get the thread id into an atomic data type. This might be a
+  // truncating conversion, but any loss-of-information just increases the
+  // chance of a fault negative, not a false positive.
+  const subtle::Atomic32 atomic_thread_id =
+      static_cast<subtle::Atomic32>(current_thread_id);
+
+  return atomic_thread_id;
+}
+#else
+
+static subtle::Atomic64 CurrentThread() {
+#if defined(__APPLE__)
+  uint64_t tid;
+  CHECK_EQ(0, pthread_threadid_np(NULL, &tid));
+  return tid;
+#elif defined(__linux__)
+  return syscall(__NR_gettid);
+#endif
+}
+
+#endif
+
+void ThreadCollisionWarner::EnterSelf() {
+  // If the active thread is 0 then I'll write the current thread ID
+  // if two or more threads arrive here only one will succeed to
+  // write on valid_thread_id_ the current thread ID.
+  subtle::Atomic64 current_thread_id = CurrentThread();
+
+  int64_t previous_thread_id = subtle::NoBarrier_CompareAndSwap(&valid_thread_id_,
+                                                                0,
+                                                                current_thread_id);
+  if (previous_thread_id != 0 && previous_thread_id != current_thread_id) {
+    // gotcha! a thread is trying to use the same class and that is
+    // not current thread.
+    asserter_->warn(previous_thread_id, current_thread_id);
+  }
+
+  subtle::NoBarrier_AtomicIncrement(&counter_, 1);
+}
+
+void ThreadCollisionWarner::Enter() {
+  subtle::Atomic64 current_thread_id = CurrentThread();
+
+  int64_t previous_thread_id = subtle::NoBarrier_CompareAndSwap(&valid_thread_id_,
+                                                                0,
+                                                                current_thread_id);
+  if (previous_thread_id != 0) {
+    // gotcha! another thread is trying to use the same class.
+    asserter_->warn(previous_thread_id, current_thread_id);
+  }
+
+  subtle::NoBarrier_AtomicIncrement(&counter_, 1);
+}
+
+void ThreadCollisionWarner::Leave() {
+  if (subtle::Barrier_AtomicIncrement(&counter_, -1) == 0) {
+    subtle::NoBarrier_Store(&valid_thread_id_, 0);
+  }
+}
+
+}  // namespace base
diff --git a/be/src/gutil/threading/thread_collision_warner.h b/be/src/gutil/threading/thread_collision_warner.h
new file mode 100644
index 0000000000..5d9a9ac692
--- /dev/null
+++ b/be/src/gutil/threading/thread_collision_warner.h
@@ -0,0 +1,247 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_THREADING_THREAD_COLLISION_WARNER_H_
+#define BASE_THREADING_THREAD_COLLISION_WARNER_H_
+
+#include <cstdint>
+
+#include "gutil/atomicops.h"
+#include "gutil/macros.h"
+
+#ifndef BASE_EXPORT
+#define BASE_EXPORT
+#endif
+
+// A helper class alongside macros to be used to verify assumptions about thread
+// safety of a class.
+//
+// Example: Queue implementation non thread-safe but still usable if clients
+//          are synchronized somehow.
+//
+//          In this case the macro DFAKE_SCOPED_LOCK has to be
+//          used, it checks that if a thread is inside the push/pop then
+//          noone else is still inside the pop/push
+//
+// class NonThreadSafeQueue {
+//  public:
+//   ...
+//   void push(int) { DFAKE_SCOPED_LOCK(push_pop_); ... }
+//   int pop() { DFAKE_SCOPED_LOCK(push_pop_); ... }
+//   ...
+//  private:
+//   DFAKE_MUTEX(push_pop_);
+// };
+//
+//
+// Example: Queue implementation non thread-safe but still usable if clients
+//          are synchronized somehow, it calls a method to "protect" from
+//          a "protected" method
+//
+//          In this case the macro DFAKE_SCOPED_RECURSIVE_LOCK
+//          has to be used, it checks that if a thread is inside the push/pop
+//          then noone else is still inside the pop/push
+//
+// class NonThreadSafeQueue {
+//  public:
+//   void push(int) {
+//     DFAKE_SCOPED_LOCK(push_pop_);
+//     ...
+//   }
+//   int pop() {
+//     DFAKE_SCOPED_RECURSIVE_LOCK(push_pop_);
+//     bar();
+//     ...
+//   }
+//   void bar() { DFAKE_SCOPED_RECURSIVE_LOCK(push_pop_); ... }
+//   ...
+//  private:
+//   DFAKE_MUTEX(push_pop_);
+// };
+//
+//
+// Example: Queue implementation not usable even if clients are synchronized,
+//          so only one thread in the class life cycle can use the two members
+//          push/pop.
+//
+//          In this case the macro DFAKE_SCOPED_LOCK_THREAD_LOCKED pins the
+//          specified
+//          critical section the first time a thread enters push or pop, from
+//          that time on only that thread is allowed to execute push or pop.
+//
+// class NonThreadSafeQueue {
+//  public:
+//   ...
+//   void push(int) { DFAKE_SCOPED_LOCK_THREAD_LOCKED(push_pop_); ... }
+//   int pop() { DFAKE_SCOPED_LOCK_THREAD_LOCKED(push_pop_); ... }
+//   ...
+//  private:
+//   DFAKE_MUTEX(push_pop_);
+// };
+//
+//
+// Example: Class that has to be contructed/destroyed on same thread, it has
+//          a "shareable" method (with external synchronization) and a not
+//          shareable method (even with external synchronization).
+//
+//          In this case 3 Critical sections have to be defined
+//
+// class ExoticClass {
+//  public:
+//   ExoticClass() { DFAKE_SCOPED_LOCK_THREAD_LOCKED(ctor_dtor_); ... }
+//   ~ExoticClass() { DFAKE_SCOPED_LOCK_THREAD_LOCKED(ctor_dtor_); ... }
+//
+//   void Shareable() { DFAKE_SCOPED_LOCK(shareable_section_); ... }
+//   void NotShareable() { DFAKE_SCOPED_LOCK_THREAD_LOCKED(ctor_dtor_); ... }
+//   ...
+//  private:
+//   DFAKE_MUTEX(ctor_dtor_);
+//   DFAKE_MUTEX(shareable_section_);
+// };
+
+
+#if !defined(NDEBUG)
+
+// Defines a class member that acts like a mutex. It is used only as a
+// verification tool.
+#define DFAKE_MUTEX(obj) \
+     mutable base::ThreadCollisionWarner obj
+// Asserts the call is never called simultaneously in two threads. Used at
+// member function scope.
+#define DFAKE_SCOPED_LOCK(obj) \
+     base::ThreadCollisionWarner::ScopedCheck s_check_##obj(&obj)
+// Asserts the call is never called simultaneously in two threads. Used at
+// member function scope. Same as DFAKE_SCOPED_LOCK but allows recursive locks.
+#define DFAKE_SCOPED_RECURSIVE_LOCK(obj) \
+     base::ThreadCollisionWarner::ScopedRecursiveCheck sr_check_##obj(&obj)
+// Asserts the code is always executed in the same thread.
+#define DFAKE_SCOPED_LOCK_THREAD_LOCKED(obj) \
+     base::ThreadCollisionWarner::Check check_##obj(&obj)
+
+#else
+
+#define DFAKE_MUTEX(obj) typedef void InternalFakeMutexType##obj
+#define DFAKE_SCOPED_LOCK(obj) ((void)0)
+#define DFAKE_SCOPED_RECURSIVE_LOCK(obj) ((void)0)
+#define DFAKE_SCOPED_LOCK_THREAD_LOCKED(obj) ((void)0)
+
+#endif
+
+namespace base {
+
+// The class ThreadCollisionWarner uses an Asserter to notify the collision
+// AsserterBase is the interfaces and DCheckAsserter is the default asserter
+// used. During the unit tests is used another class that doesn't "DCHECK"
+// in case of collision (check thread_collision_warner_unittests.cc)
+struct BASE_EXPORT AsserterBase {
+  virtual ~AsserterBase() {}
+  virtual void warn(int64_t previous_thread_id, int64_t current_thread_id) = 0;
+};
+
+struct BASE_EXPORT DCheckAsserter : public AsserterBase {
+  virtual ~DCheckAsserter() {}
+  void warn(int64_t previous_thread_id, int64_t current_thread_id) override;
+};
+
+class BASE_EXPORT ThreadCollisionWarner {
+ public:
+  // The parameter asserter is there only for test purpose
+  explicit ThreadCollisionWarner(AsserterBase* asserter = new DCheckAsserter())
+      : valid_thread_id_(0),
+        counter_(0),
+        asserter_(asserter) {}
+
+  ~ThreadCollisionWarner() {
+    delete asserter_;
+  }
+
+  // This class is meant to be used through the macro
+  // DFAKE_SCOPED_LOCK_THREAD_LOCKED
+  // it doesn't leave the critical section, as opposed to ScopedCheck,
+  // because the critical section being pinned is allowed to be used only
+  // from one thread
+  class BASE_EXPORT Check {
+   public:
+    explicit Check(ThreadCollisionWarner* warner)
+        : warner_(warner) {
+      warner_->EnterSelf();
+    }
+
+    ~Check() {}
+
+   private:
+    ThreadCollisionWarner* warner_;
+
+    DISALLOW_COPY_AND_ASSIGN(Check);
+  };
+
+  // This class is meant to be used through the macro
+  // DFAKE_SCOPED_LOCK
+  class BASE_EXPORT ScopedCheck {
+   public:
+    explicit ScopedCheck(ThreadCollisionWarner* warner)
+        : warner_(warner) {
+      warner_->Enter();
+    }
+
+    ~ScopedCheck() {
+      warner_->Leave();
+    }
+
+   private:
+    ThreadCollisionWarner* warner_;
+
+    DISALLOW_COPY_AND_ASSIGN(ScopedCheck);
+  };
+
+  // This class is meant to be used through the macro
+  // DFAKE_SCOPED_RECURSIVE_LOCK
+  class BASE_EXPORT ScopedRecursiveCheck {
+   public:
+    explicit ScopedRecursiveCheck(ThreadCollisionWarner* warner)
+        : warner_(warner) {
+      warner_->EnterSelf();
+    }
+
+    ~ScopedRecursiveCheck() {
+      warner_->Leave();
+    }
+
+   private:
+    ThreadCollisionWarner* warner_;
+
+    DISALLOW_COPY_AND_ASSIGN(ScopedRecursiveCheck);
+  };
+
+ private:
+  // This method stores the current thread identifier and does a DCHECK
+  // if a another thread has already done it, it is safe if same thread
+  // calls this multiple time (recursion allowed).
+  void EnterSelf();
+
+  // Same as EnterSelf but recursion is not allowed.
+  void Enter();
+
+  // Removes the thread_id stored in order to allow other threads to
+  // call EnterSelf or Enter.
+  void Leave();
+
+  // This stores the thread id that is inside the critical section, if the
+  // value is 0 then no thread is inside.
+  volatile subtle::Atomic64 valid_thread_id_;
+
+  // Counter to trace how many time a critical section was "pinned"
+  // (when allowed) in order to unpin it when counter_ reaches 0.
+  volatile subtle::Atomic64 counter_;
+
+  // Here only for class unit tests purpose, during the test I need to not
+  // DCHECK but notify the collision with something else.
+  AsserterBase* asserter_;
+
+  DISALLOW_COPY_AND_ASSIGN(ThreadCollisionWarner);
+};
+
+}  // namespace base
+
+#endif  // BASE_THREADING_THREAD_COLLISION_WARNER_H_
diff --git a/be/src/gutil/walltime.h b/be/src/gutil/walltime.h
new file mode 100644
index 0000000000..337535f449
--- /dev/null
+++ b/be/src/gutil/walltime.h
@@ -0,0 +1,216 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#ifndef GUTIL_WALLTIME_H_
+#define GUTIL_WALLTIME_H_
+
+#include <sys/time.h>
+
+#include <ctime>
+#include <string>
+
+#if defined(__APPLE__)
+#include <mach/clock.h>
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+
+#include <glog/logging.h>
+
+#include "gutil/once.h"
+#endif  // #if defined(__APPLE__)
+
+#include "gutil/integral_types.h"
+
+typedef double WallTime;
+
+// Append result to a supplied string.
+// If an error occurs during conversion 'dst' is not modified.
+void StringAppendStrftime(std::string* dst,
+                          const char* format,
+                          time_t when,
+                          bool local);
+
+// Return the given timestamp (in seconds since the epoch) as a string suitable
+// for user display in the current timezone.
+std::string TimestampAsString(time_t timestamp_secs);
+
+// Return the local time as a string suitable for user display.
+std::string LocalTimeAsString();
+
+// Similar to the WallTime_Parse, but it takes a boolean flag local as
+// argument specifying if the time_spec is in local time or UTC
+// time. If local is set to true, the same exact result as
+// WallTime_Parse is returned.
+bool WallTime_Parse_Timezone(const char* time_spec,
+                             const char* format,
+                             const struct tm* default_time,
+                             bool local,
+                             WallTime* result);
+
+// Return current time in seconds as a WallTime.
+WallTime WallTime_Now();
+
+typedef int64 MicrosecondsInt64;
+
+namespace walltime_internal {
+
+#if defined(__APPLE__)
+
+extern GoogleOnceType timebase_info_once;
+extern mach_timebase_info_data_t timebase_info;
+extern void InitializeTimebaseInfo();
+
+inline void GetCurrentTime(mach_timespec_t* ts) {
+  clock_serv_t cclock;
+  host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
+  CHECK_EQ(KERN_SUCCESS, clock_get_time(cclock, ts));
+  mach_port_deallocate(mach_task_self(), cclock);
+}
+
+inline MicrosecondsInt64 GetCurrentTimeMicros() {
+  mach_timespec_t ts;
+  GetCurrentTime(&ts);
+  // 'tv_sec' is just 4 bytes on macOS, need to be careful not
+  // to convert to nanos until we've moved to a larger int.
+  MicrosecondsInt64 micros_from_secs = ts.tv_sec;
+  micros_from_secs *= 1000 * 1000;
+  micros_from_secs += ts.tv_nsec / 1000;
+  return micros_from_secs;
+}
+
+inline int64_t GetMonoTimeNanos() {
+  // See Apple Technical Q&A QA1398 for further detail on mono time in OS X.
+  GoogleOnceInit(&timebase_info_once, &InitializeTimebaseInfo);
+
+  uint64_t time = mach_absolute_time();
+
+  // mach_absolute_time returns ticks, which need to be scaled by the timebase
+  // info to get nanoseconds.
+  return time * timebase_info.numer / timebase_info.denom;
+}
+
+inline MicrosecondsInt64 GetMonoTimeMicros() {
+  return GetMonoTimeNanos() / 1000;
+}
+
+inline MicrosecondsInt64 GetThreadCpuTimeMicros() {
+  // See https://www.gnu.org/software/hurd/gnumach-doc/Thread-Information.html
+  // and Chromium base/time/time_mac.cc.
+  task_t thread = mach_thread_self();
+  if (thread == MACH_PORT_NULL) {
+    LOG(WARNING) << "Failed to get mach_thread_self()";
+    return 0;
+  }
+
+  mach_msg_type_number_t thread_info_count = THREAD_BASIC_INFO_COUNT;
+  thread_basic_info_data_t thread_info_data;
+
+  kern_return_t result = thread_info(
+      thread,
+      THREAD_BASIC_INFO,
+      reinterpret_cast<thread_info_t>(&thread_info_data),
+      &thread_info_count);
+
+  if (result != KERN_SUCCESS) {
+    LOG(WARNING) << "Failed to get thread_info()";
+    return 0;
+  }
+
+  return thread_info_data.user_time.seconds * 1000000 + thread_info_data.user_time.microseconds;
+}
+
+#else
+
+inline MicrosecondsInt64 GetClockTimeMicros(clockid_t clock) {
+  timespec ts;
+  clock_gettime(clock, &ts);
+  // 'tv_sec' is usually 8 bytes, but the spec says it only
+  // needs to be 'a signed int'. Moved to a 64 bit var before
+  // converting to micros to be safe.
+  MicrosecondsInt64 micros_from_secs = ts.tv_sec;
+  micros_from_secs *= 1000 * 1000;
+  micros_from_secs += ts.tv_nsec / 1000;
+  return micros_from_secs;
+}
+
+#endif // defined(__APPLE__)
+
+} // namespace walltime_internal
+
+// Returns the time since the Epoch measured in microseconds.
+inline MicrosecondsInt64 GetCurrentTimeMicros() {
+#if defined(__APPLE__)
+  return walltime_internal::GetCurrentTimeMicros();
+#else
+  return walltime_internal::GetClockTimeMicros(CLOCK_REALTIME);
+#endif  // defined(__APPLE__)
+}
+
+// Returns the time since some arbitrary reference point, measured in microseconds.
+// Guaranteed to be monotonic (and therefore useful for measuring intervals),
+// but the underlying clock is subject for adjustment by adjtime() and
+// the kernel's NTP discipline. For example, the underlying clock might
+// be slewed a bit to reach some reference point, time to time adjusted to be
+// of the desired result frequency, etc.
+inline MicrosecondsInt64 GetMonoTimeMicros() {
+#if defined(__APPLE__)
+  // In fact, walltime_internal::GetMonoTimeMicros() is implemented via
+  // mach_absolute_time() which is not actually affected by adjtime()
+  // or the NTP discipline. On Darwin 16.0 and newer (macOS 10.12 and newer),
+  // it's the same as clock_gettime(CLOCK_UPTIME_RAW); see 'man clock_gettime'
+  // on macOS 10.12 and newer.
+  return walltime_internal::GetMonoTimeMicros();
+#else
+  return walltime_internal::GetClockTimeMicros(CLOCK_MONOTONIC);
+#endif  // defined(__APPLE__)
+}
+
+// Returns the time since some arbitrary reference point, measured in microseconds.
+// Guaranteed to be monotonic and not affected at all by frequency and time
+// adjustments such as adjtime() or the kernel's NTP discipline.
+inline MicrosecondsInt64 GetMonoTimeMicrosRaw() {
+#if defined(__APPLE__)
+  return walltime_internal::GetMonoTimeMicros();
+#else
+  return walltime_internal::GetClockTimeMicros(CLOCK_MONOTONIC_RAW);
+#endif  // defined(__APPLE__)
+}
+
+// Returns the time spent in user CPU on the current thread, measured in microseconds.
+inline MicrosecondsInt64 GetThreadCpuTimeMicros() {
+#if defined(__APPLE__)
+  return walltime_internal::GetThreadCpuTimeMicros();
+#else
+  return walltime_internal::GetClockTimeMicros(CLOCK_THREAD_CPUTIME_ID);
+#endif  // defined(__APPLE__)
+}
+
+// A CycleClock yields the value of a cycle counter that increments at a rate
+// that is approximately constant.
+class CycleClock {
+ public:
+  // Return the value of the counter.
+  static inline int64 Now();
+
+ private:
+  CycleClock();
+};
+
+// inline method bodies
+#include "gutil/cycleclock-inl.h"  // IWYU pragma: export
+#endif  // GUTIL_WALLTIME_H_