diff --git a/be/src/gutil/CMakeLists.txt b/be/src/gutil/CMakeLists.txt index 2c0d283619..1cc33438f1 100644 --- a/be/src/gutil/CMakeLists.txt +++ b/be/src/gutil/CMakeLists.txt @@ -31,6 +31,7 @@ SET(SOURCE_FILES hash/jenkins.cc int128.cc once.cc + ref_counted.cc spinlock_internal.cc stringprintf.cc strings/ascii_ctype.cc @@ -46,6 +47,8 @@ SET(SOURCE_FILES strings/substitute.cc strings/util.cc strtoint.cc + sysinfo.cc + threading/thread_collision_warner.cc utf/rune.c cpu.cc) diff --git a/be/src/gutil/arm_instruction_set_select.h b/be/src/gutil/arm_instruction_set_select.h new file mode 100644 index 0000000000..87bc183358 --- /dev/null +++ b/be/src/gutil/arm_instruction_set_select.h @@ -0,0 +1,52 @@ +// Copyright 2011 Google Inc. +// All Rights Reserved. +// +// +// Generalizes the plethora of ARM flavors available to an easier to manage set +// Defs reference is at https://wiki.edubuntu.org/ARM/Thumb2PortingHowto + +#ifndef ARM_INSTRUCTION_SET_SELECT_H_ +#define ARM_INSTRUCTION_SET_SELECT_H_ + +#if defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7A__) +# define ARMV7 1 +#endif + +#if defined(ARMV7) || \ + defined(__ARM_ARCH_6__) || \ + defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6ZK__) +# define ARMV6 1 +#endif + +#if defined(ARMV6) || \ + defined(__ARM_ARCH_5T__) || \ + defined(__ARM_ARCH_5E__) || \ + defined(__ARM_ARCH_5TE__) || \ + defined(__ARM_ARCH_5TEJ__) +# define ARMV5 1 +#endif + +#if defined(ARMV5) || \ + defined(__ARM_ARCH_4__) || \ + defined(__ARM_ARCH_4T__) +# define ARMV4 1 +#endif + +#if defined(ARMV4) || \ + defined(__ARM_ARCH_3__) || \ + defined(__ARM_ARCH_3M__) +# define ARMV3 1 +#endif + +#if defined(ARMV3) || \ + defined(__ARM_ARCH_2__) +# define ARMV2 1 +#endif + +#endif // ARM_INSTRUCTION_SET_SELECT_H_ diff --git a/be/src/gutil/atomic_refcount.h b/be/src/gutil/atomic_refcount.h new file mode 100644 index 0000000000..2b5cb36099 --- /dev/null +++ b/be/src/gutil/atomic_refcount.h @@ -0,0 +1,153 @@ +#ifndef BASE_ATOMIC_REFCOUNT_H_ +#define BASE_ATOMIC_REFCOUNT_H_ +// Copyright 2008 Google Inc. +// All rights reserved. + +// Atomic increment and decrement for reference counting. +// For atomic operations on statistics counters and sequence numbers, +// see atomic_stats_counter.h and atomic_sequence_num.h respectively. + +// Some clients use atomic operations for reference counting. +// you use one of them: +// util/refcount/reference_counted.h +// util/gtl/refcounted_ptr.h +// util/gtl/shared_ptr.h +// Alternatively, use a Mutex to maintain your reference count. +// If you really must build your own reference counts with atomic operations, +// use the following routines in the way suggested by this example: +// AtomicWord ref_count_; // remember to initialize this to 0 +// ... +// void Ref() { +// base::RefCountInc(&this->ref_count_); +// } +// void Unref() { +// if (!base::RefCountDec(&this->ref_count_)) { +// delete this; +// } +// } +// Using these routines (rather than the ones in atomicops.h) will provide the +// correct semantics; in particular, the memory ordering needed to make +// reference counting work will be guaranteed. +// You need not declare the reference count word "volatile". After +// initialization you should use the word only via the routines below; the +// "volatile" in the signatures below is for backwards compatibility. +// +// If you need to do something very different from this, use a Mutex. + +#include + +#include "gutil/atomicops.h" +#include "gutil/integral_types.h" +#include "gutil/logging-inl.h" + +namespace base { + +// These calls are available for both Atomic32, and AtomicWord types, +// and also for base::subtle::Atomic64 if available on the platform. + +// Normally, clients are expected to use RefCountInc/RefCountDec. +// In rare cases, it may be necessary to adjust the reference count by +// more than 1, in which case they may use RefCountIncN/RefCountDecN. + +// Increment a reference count by "increment", which must exceed 0. +inline void RefCountIncN(volatile Atomic32 *ptr, Atomic32 increment) { + DCHECK_GT(increment, 0); + base::subtle::NoBarrier_AtomicIncrement(ptr, increment); +} + +// Decrement a reference count by "decrement", which must exceed 0, +// and return whether the result is non-zero. +// Insert barriers to ensure that state written before the reference count +// became zero will be visible to a thread that has just made the count zero. +inline bool RefCountDecN(volatile Atomic32 *ptr, Atomic32 decrement) { + DCHECK_GT(decrement, 0); + bool res = base::subtle::Barrier_AtomicIncrement(ptr, -decrement) != 0; + return res; +} + +// Increment a reference count by 1. +inline void RefCountInc(volatile Atomic32 *ptr) { + base::RefCountIncN(ptr, 1); +} + +// Decrement a reference count by 1 and return whether the result is non-zero. +// Insert barriers to ensure that state written before the reference count +// became zero will be visible to a thread that has just made the count zero. +inline bool RefCountDec(volatile Atomic32 *ptr) { + return base::RefCountDecN(ptr, 1); +} + +// Return whether the reference count is one. +// If the reference count is used in the conventional way, a +// refrerence count of 1 implies that the current thread owns the +// reference and no other thread shares it. +// This call performs the test for a referenece count of one, and +// performs the memory barrier needed for the owning thread +// to act on the object, knowing that it has exclusive access to the +// object. +inline bool RefCountIsOne(const volatile Atomic32 *ptr) { + return base::subtle::Acquire_Load(ptr) == 1; +} + +// Return whether the reference count is zero. With conventional object +// referencing counting, the object will be destroyed, so the reference count +// should never be zero. Hence this is generally used for a debug check. +inline bool RefCountIsZero(const volatile Atomic32 *ptr) { + return subtle::Acquire_Load(ptr) == 0; +} + +#if BASE_HAS_ATOMIC64 +// Implementations for Atomic64, if available. +inline void RefCountIncN(volatile base::subtle::Atomic64 *ptr, + base::subtle::Atomic64 increment) { + DCHECK_GT(increment, 0); + base::subtle::NoBarrier_AtomicIncrement(ptr, increment); +} +inline bool RefCountDecN(volatile base::subtle::Atomic64 *ptr, + base::subtle::Atomic64 decrement) { + DCHECK_GT(decrement, 0); + return base::subtle::Barrier_AtomicIncrement(ptr, -decrement) != 0; +} +inline void RefCountInc(volatile base::subtle::Atomic64 *ptr) { + base::RefCountIncN(ptr, 1); +} +inline bool RefCountDec(volatile base::subtle::Atomic64 *ptr) { + return base::RefCountDecN(ptr, 1); +} +inline bool RefCountIsOne(const volatile base::subtle::Atomic64 *ptr) { + return base::subtle::Acquire_Load(ptr) == 1; +} +inline bool RefCountIsZero(const volatile base::subtle::Atomic64 *ptr) { + return base::subtle::Acquire_Load(ptr) == 0; +} +#endif + +#ifdef AtomicWordCastType +// Implementations for AtomicWord, if it's a different type from the above. +inline void RefCountIncN(volatile AtomicWord *ptr, AtomicWord increment) { + base::RefCountIncN( + reinterpret_cast(ptr), increment); +} +inline bool RefCountDecN(volatile AtomicWord *ptr, AtomicWord decrement) { + return base::RefCountDecN( + reinterpret_cast(ptr), decrement); +} +inline void RefCountInc(volatile AtomicWord *ptr) { + base::RefCountIncN(ptr, 1); +} +inline bool RefCountDec(volatile AtomicWord *ptr) { + return base::RefCountDecN(ptr, 1); +} +inline bool RefCountIsOne(const volatile AtomicWord *ptr) { + return base::subtle::Acquire_Load( + reinterpret_cast(ptr)) == 1; +} +inline bool RefCountIsZero(const volatile AtomicWord *ptr) { + return base::subtle::Acquire_Load( + reinterpret_cast(ptr)) == 0; +} +#endif + +} // namespace base + +#endif // BASE_ATOMIC_REFCOUNT_H_ diff --git a/be/src/gutil/cycleclock-inl.h b/be/src/gutil/cycleclock-inl.h new file mode 100644 index 0000000000..063b397a36 --- /dev/null +++ b/be/src/gutil/cycleclock-inl.h @@ -0,0 +1,215 @@ +// Copyright (C) 1999-2007 Google, Inc. +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +// All rights reserved. +// Extracted from base/timer.h by jrvb + +// The implementation of CycleClock::Now() +// See cycleclock.h +// +// IWYU pragma: private, include "base/cycleclock.h" + +// NOTE: only i386 and x86_64 have been well tested. +// PPC, sparc, alpha, and ia64 are based on +// http://peter.kuscsik.com/wordpress/?p=14 +// with modifications by m3b. See also +// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h + +#ifndef GUTIL_CYCLECLOCK_INL_H_ +#define GUTIL_CYCLECLOCK_INL_H_ + +#include + +#include "gutil/port.h" +#include "gutil/arm_instruction_set_select.h" + +// Please do not nest #if directives. Keep one section, and one #if per +// platform. + +// For historical reasons, the frequency on some platforms is scaled to be +// close to the platform's core clock frequency. This is not guaranteed by the +// interface, and may change in future implementations. + +// ---------------------------------------------------------------- +#if defined(__APPLE__) +#include +inline int64 CycleClock::Now() { + // this goes at the top because we need ALL Macs, regardless of + // architecture, to return the number of "mach time units" that + // have passed since startup. See sysinfo.cc where + // InitializeSystemInfo() sets the supposed cpu clock frequency of + // macs to the number of mach time units per second, not actual + // CPU clock frequency (which can change in the face of CPU + // frequency scaling). Also note that when the Mac sleeps, this + // counter pauses; it does not continue counting, nor does it + // reset to zero. + return mach_absolute_time(); +} + +// ---------------------------------------------------------------- +#elif defined(__i386__) +inline int64 CycleClock::Now() { + int64 ret; + __asm__ volatile("rdtsc" : "=A" (ret)); + return ret; +} + +// ---------------------------------------------------------------- +#elif defined(__x86_64__) || defined(__amd64__) +inline int64 CycleClock::Now() { + uint64 low, high; + __asm__ volatile("rdtsc" : "=a" (low), "=d" (high)); + return (high << 32) | low; +} + +// ---------------------------------------------------------------- +#elif defined(__powerpc__) || defined(__ppc__) +#define SPR_TB 268 +#define SPR_TBU 269 +inline int64 CycleClock::Now() { + uint64 time_base_value; + if (sizeof(void*) == 8) { + // On PowerPC64, time base can be read with one SPR read. + asm volatile("mfspr %0, %1" : "=r" (time_base_value) : "i"(SPR_TB)); + } else { + uint32 tbl, tbu0, tbu1; + asm volatile (" mfspr %0, %3\n" + " mfspr %1, %4\n" + " mfspr %2, %3\n" : + "=r"(tbu0), "=r"(tbl), "=r"(tbu1) : + "i"(SPR_TBU), "i"(SPR_TB)); + // If there is a carry into the upper half, it is okay to return + // (tbu1, 0) since it must be between the 2 TBU reads. + tbl &= -static_cast(tbu0 == tbu1); + // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) + time_base_value = + (static_cast(tbu1) << 32) | static_cast(tbl); + } + return static_cast(time_base_value); +} + +// ---------------------------------------------------------------- +#elif defined(__sparc__) +inline int64 CycleClock::Now() { + int64 tick; + asm(".byte 0x83, 0x41, 0x00, 0x00"); + asm("mov %%g1, %0" : "=r" (tick)); + return tick; +} + +// ---------------------------------------------------------------- +#elif defined(__ia64__) +inline int64 CycleClock::Now() { + int64 itc; + asm("mov %0 = ar.itc" : "=r" (itc)); + return itc; +} + +// ---------------------------------------------------------------- +#elif defined(_MSC_VER) && defined(_M_IX86) +inline int64 CycleClock::Now() { + // Older MSVC compilers (like 7.x) don't seem to support the + // __rdtsc intrinsic properly, so I prefer to use _asm instead + // when I know it will work. Otherwise, I'll use __rdtsc and hope + // the code is being compiled with a non-ancient compiler. + _asm rdtsc +} + +// ---------------------------------------------------------------- +#elif defined(_MSC_VER) +// For MSVC, we want to use '_asm rdtsc' when possible (since it works +// with even ancient MSVC compilers), and when not possible the +// __rdtsc intrinsic, declared in . Unfortunately, in some +// environments, and have conflicting +// declarations of some other intrinsics, breaking compilation. +// Therefore, we simply declare __rdtsc ourselves. See also +// http://connect.microsoft.com/VisualStudio/feedback/details/262047 +extern "C" uint64 __rdtsc(); +#pragma intrinsic(__rdtsc) +inline int64 CycleClock::Now() { + return __rdtsc(); +} + +// ---------------------------------------------------------------- +#elif defined(ARMV6) // V6 is the earliest arm that has a standard cyclecount +#include "gutil/sysinfo.h" +inline int64 CycleClock::Now() { + uint32 pmccntr; + uint32 pmuseren; + uint32 pmcntenset; + // Read the user mode perf monitor counter access permissions. + asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren)); + if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset)); + if (pmcntenset & 0x80000000ul) { // Is it counting? + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr)); + // The counter is set up to count every 64th cycle + return static_cast(pmccntr) * 64; // Should optimize to << 6 + } + } + struct timeval tv; + gettimeofday(&tv, NULL); + return static_cast((tv.tv_sec + tv.tv_usec * 0.000001) + * CyclesPerSecond()); +} + +// ---------------------------------------------------------------- +#elif defined(ARMV3) +#include "gutil/sysinfo.h" // for CyclesPerSecond() +inline int64 CycleClock::Now() { + struct timeval tv; + gettimeofday(&tv, NULL); + return static_cast((tv.tv_sec + tv.tv_usec * 0.000001) + * CyclesPerSecond()); +} + +// ---------------------------------------------------------------- +#elif defined(__mips__) +#include "gutil/sysinfo.h" +inline int64 CycleClock::Now() { + // mips apparently only allows rdtsc for superusers, so we fall + // back to gettimeofday. It's possible clock_gettime would be better. + struct timeval tv; + gettimeofday(&tv, NULL); + return static_cast((tv.tv_sec + tv.tv_usec * 0.000001) + * CyclesPerSecond()); +} + +// ---------------------------------------------------------------- +#elif defined(__aarch64__) +#include "gutil/sysinfo.h" +inline int64 CycleClock::Now() { + // System timer of ARMv8 runs at a different frequency than the CPU's. + // The frequency is fixed, typically in the range 1-50MHz. It can be + // read at CNTFRQ special register. We assume the OS has set up + // the virtual timer properly. + int64_t virtual_timer_value; + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); + return virtual_timer_value; +} +// ---------------------------------------------------------------- +#else +// The soft failover to a generic implementation is automatic only for some +// platforms. For other platforms the developer is expected to make an attempt +// to create a fast implementation and use generic version if nothing better is +// available. +#error You need to define CycleTimer for your O/S and CPU +#endif + +#endif // GUTIL_CYCLECLOCK_INL_H_ diff --git a/be/src/gutil/endian.h b/be/src/gutil/endian.h index 6b8a0bc772..f957df74aa 100644 --- a/be/src/gutil/endian.h +++ b/be/src/gutil/endian.h @@ -1,10 +1,32 @@ -// Copyright 2005 Google, Inc +// Copyright 2005 Google Inc. +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +// --- +// // // Utility functions that depend on bytesex. We define htonll and ntohll, // as well as "Google" versions of all the standards: ghtonl, ghtons, and // so on. These functions do exactly the same as their standard variants, // but don't require including the dangerous netinet/in.h. - +// +// Buffer routines will copy to and from buffers without causing +// a bus error when the architecture requires differnt byte alignments #ifndef UTIL_ENDIAN_ENDIAN_H_ #define UTIL_ENDIAN_ENDIAN_H_ @@ -20,7 +42,7 @@ inline uint64 gbswap_64(uint64 host_int) { if (__builtin_constant_p(host_int)) { return __bswap_constant_64(host_int); } else { - register uint64 result; + uint64 result; __asm__("bswap %0" : "=r" (result) : "0" (host_int)); return result; } @@ -33,7 +55,7 @@ inline uint64 gbswap_64(uint64 host_int) { } inline unsigned __int128 gbswap_128(unsigned __int128 host_int) { - return static_cast(bswap_64(static_cast(host_int >> 64))) | + return static_cast(bswap_64(static_cast(host_int >> 64))) | (static_cast(bswap_64(static_cast(host_int))) << 64); } @@ -58,24 +80,32 @@ inline uint64 ghtonll(uint64 x) { return gbswap_64(x); } #elif defined IS_BIG_ENDIAN -// These definitions are a lot simpler on big-endian machines -#define ghtons(x) (x) -#define ghtonl(x) (x) -#define ghtonll(x) (x) +// These definitions are simpler on big-endian machines +// These are functions instead of macros to avoid self-assignment warnings +// on calls such as "i = ghtnol(i);". This also provides type checking. +inline uint16 ghtons(uint16 x) { return x; } +inline uint32 ghtonl(uint32 x) { return x; } +inline uint64 ghtonll(uint64 x) { return x; } #else -#error "Unsupported bytesex: Either IS_BIG_ENDIAN or IS_LITTLE_ENDIAN must be defined" +#error "Unsupported bytesex: Either IS_BIG_ENDIAN or IS_LITTLE_ENDIAN must be defined" // NOLINT #endif // bytesex -// Convert to little-endian storage, opposite of network format. -// Convert x from host to little endian: x = LittleEndian.FromHost(x); -// convert x from little endian to host: x = LittleEndian.ToHost(x); +// ntoh* and hton* are the same thing for any size and bytesex, +// since the function is an involution, i.e., its own inverse. +#define gntohl(x) ghtonl(x) +#define gntohs(x) ghtons(x) +#define gntohll(x) ghtonll(x) +#if !defined(__APPLE__) +// This one is safe to take as it's an extension +#define htonll(x) ghtonll(x) +#define ntohll(x) htonll(x) +#endif + +// Utilities to convert numbers between the current hosts's native byte +// order and little-endian byte order // -// Store values into unaligned memory converting to little endian order: -// LittleEndian.Store16(p, x); -// -// Load unaligned values stored in little endian coverting to host order: -// x = LittleEndian.Load16(p); +// Load/Store methods are alignment safe class LittleEndian { public: // Conversion functions. @@ -90,6 +120,9 @@ class LittleEndian { static uint64 FromHost64(uint64 x) { return x; } static uint64 ToHost64(uint64 x) { return x; } + static unsigned __int128 FromHost128(unsigned __int128 x) { return x; } + static unsigned __int128 ToHost128(unsigned __int128 x) { return x; } + static bool IsLittleEndian() { return true; } #elif defined IS_BIG_ENDIAN @@ -183,162 +216,169 @@ class LittleEndian { return uint128(Load64VariableLength(p, len)); } else { return uint128( - Load64VariableLength(static_cast(p) + 8, len - 8), - Load64(p)); + Load64VariableLength(static_cast(p) + 8, len - 8), + Load64(p)); } } + + // Load & Store in machine's word size. + static uword_t LoadUnsignedWord(const void *p) { + if (sizeof(uword_t) == 8) + return Load64(p); + else + return Load32(p); + } + + static void StoreUnsignedWord(void *p, uword_t v) { + if (sizeof(v) == 8) + Store64(p, v); + else + Store32(p, v); + } }; - -// This one is safe to take as it's an extension -#define htonll(x) ghtonll(x) - -// ntoh* and hton* are the same thing for any size and bytesex, -// since the function is an involution, i.e., its own inverse. -#define gntohl(x) ghtonl(x) -#define gntohs(x) ghtons(x) -#define gntohll(x) ghtonll(x) -#define ntohll(x) htonll(x) - // Utilities to convert numbers between the current hosts's native byte // order and big-endian byte order (same as network byte order) // // Load/Store methods are alignment safe class BigEndian { -public: + public: #ifdef IS_LITTLE_ENDIAN - static uint16 FromHost16(uint16 x) { return bswap_16(x); } - static uint16 ToHost16(uint16 x) { return bswap_16(x); } + static uint16 FromHost16(uint16 x) { return bswap_16(x); } + static uint16 ToHost16(uint16 x) { return bswap_16(x); } - static uint32 FromHost24(uint32 x) { return bswap_24(x); } - static uint32 ToHost24(uint32 x) { return bswap_24(x); } + static uint32 FromHost24(uint32 x) { return bswap_24(x); } + static uint32 ToHost24(uint32 x) { return bswap_24(x); } - static uint32 FromHost32(uint32 x) { return bswap_32(x); } - static uint32 ToHost32(uint32 x) { return bswap_32(x); } + static uint32 FromHost32(uint32 x) { return bswap_32(x); } + static uint32 ToHost32(uint32 x) { return bswap_32(x); } - static uint64 FromHost64(uint64 x) { return gbswap_64(x); } - static uint64 ToHost64(uint64 x) { return gbswap_64(x); } + static uint64 FromHost64(uint64 x) { return gbswap_64(x); } + static uint64 ToHost64(uint64 x) { return gbswap_64(x); } - static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); } - static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); } + static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); } + static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); } - static bool IsLittleEndian() { return true; } + static bool IsLittleEndian() { return true; } #elif defined IS_BIG_ENDIAN - static uint16 FromHost16(uint16 x) { return x; } - static uint16 ToHost16(uint16 x) { return x; } + static uint16 FromHost16(uint16 x) { return x; } + static uint16 ToHost16(uint16 x) { return x; } static uint32 FromHost24(uint32 x) { return x; } static uint32 ToHost24(uint32 x) { return x; } - static uint32 FromHost32(uint32 x) { return x; } - static uint32 ToHost32(uint32 x) { return x; } + static uint32 FromHost32(uint32 x) { return x; } + static uint32 ToHost32(uint32 x) { return x; } - static uint64 FromHost64(uint64 x) { return x; } - static uint64 ToHost64(uint64 x) { return x; } + static uint64 FromHost64(uint64 x) { return x; } + static uint64 ToHost64(uint64 x) { return x; } - static uint128 FromHost128(uint128 x) { return x; } - static uint128 ToHost128(uint128 x) { return x; } + static uint128 FromHost128(uint128 x) { return x; } + static uint128 ToHost128(uint128 x) { return x; } - static bool IsLittleEndian() { return false; } + static bool IsLittleEndian() { return false; } #endif /* ENDIAN */ - // Functions to do unaligned loads and stores in little-endian order. - static uint16 Load16(const void *p) { - return ToHost16(UNALIGNED_LOAD16(p)); - } + // Functions to do unaligned loads and stores in little-endian order. + static uint16 Load16(const void *p) { + return ToHost16(UNALIGNED_LOAD16(p)); + } - static void Store16(void *p, uint16 v) { - UNALIGNED_STORE16(p, FromHost16(v)); - } + static void Store16(void *p, uint16 v) { + UNALIGNED_STORE16(p, FromHost16(v)); + } - static uint32 Load32(const void *p) { - return ToHost32(UNALIGNED_LOAD32(p)); - } + static uint32 Load32(const void *p) { + return ToHost32(UNALIGNED_LOAD32(p)); + } - static void Store32(void *p, uint32 v) { - UNALIGNED_STORE32(p, FromHost32(v)); - } + static void Store32(void *p, uint32 v) { + UNALIGNED_STORE32(p, FromHost32(v)); + } - static uint64 Load64(const void *p) { - return ToHost64(UNALIGNED_LOAD64(p)); - } + static uint64 Load64(const void *p) { + return ToHost64(UNALIGNED_LOAD64(p)); + } - // Build a uint64 from 1-8 bytes. - // 8 * len least significant bits are loaded from the memory with - // BigEndian order. The 64 - 8 * len most significant bits are - // set all to 0. - // In latex-friendly words, this function returns: - // $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned. - // - // This function is equivalent with: - // uint64 val = 0; - // memcpy(&val, p, len); - // return ToHost64(val); - // TODO(user): write a small benchmark and benchmark the speed - // of a memcpy based approach. - // - // For speed reasons this function does not work for len == 0. - // The caller needs to guarantee that 1 <= len <= 8. - static uint64 Load64VariableLength(const void * const p, int len) { - assert(len >= 1 && len <= 8); - uint64 val = Load64(p); - uint64 mask = 0; - --len; - do { - mask = (mask << 8) | 0xff; - // (--len >= 0) is about 10 % faster than (len--) in some benchmarks. - } while (--len >= 0); - return val & mask; - } + // Build a uint64 from 1-8 bytes. + // 8 * len least significant bits are loaded from the memory with + // BigEndian order. The 64 - 8 * len most significant bits are + // set all to 0. + // In latex-friendly words, this function returns: + // $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned. + // + // This function is equivalent with: + // uint64 val = 0; + // memcpy(&val, p, len); + // return ToHost64(val); + // TODO(user): write a small benchmark and benchmark the speed + // of a memcpy based approach. + // + // For speed reasons this function does not work for len == 0. + // The caller needs to guarantee that 1 <= len <= 8. + static uint64 Load64VariableLength(const void * const p, int len) { + assert(len >= 1 && len <= 8); + uint64 val = Load64(p); + uint64 mask = 0; + --len; + do { + mask = (mask << 8) | 0xff; + // (--len >= 0) is about 10 % faster than (len--) in some benchmarks. + } while (--len >= 0); + return val & mask; + } - static void Store64(void *p, uint64 v) { - UNALIGNED_STORE64(p, FromHost64(v)); - } + static void Store64(void *p, uint64 v) { + UNALIGNED_STORE64(p, FromHost64(v)); + } - static uint128 Load128(const void *p) { - return uint128( - ToHost64(UNALIGNED_LOAD64(p)), - ToHost64(UNALIGNED_LOAD64(reinterpret_cast(p) + 1))); - } + static uint128 Load128(const void *p) { + return uint128( + ToHost64(UNALIGNED_LOAD64(p)), + ToHost64(UNALIGNED_LOAD64(reinterpret_cast(p) + 1))); + } - static void Store128(void *p, const uint128 v) { - UNALIGNED_STORE64(p, FromHost64(Uint128High64(v))); - UNALIGNED_STORE64(reinterpret_cast(p) + 1, - FromHost64(Uint128Low64(v))); - } + static void Store128(void *p, const uint128 v) { + UNALIGNED_STORE64(p, FromHost64(Uint128High64(v))); + UNALIGNED_STORE64(reinterpret_cast(p) + 1, + FromHost64(Uint128Low64(v))); + } - // Build a uint128 from 1-16 bytes. - // 8 * len least significant bits are loaded from the memory with - // BigEndian order. The 128 - 8 * len most significant bits are - // set all to 0. - static uint128 Load128VariableLength(const void *p, int len) { - if (len <= 8) { - return uint128(Load64VariableLength(static_cast(p)+8, - len)); - } else { - return uint128( - Load64VariableLength(p, len-8), - Load64(static_cast(p)+8)); - } + // Build a uint128 from 1-16 bytes. + // 8 * len least significant bits are loaded from the memory with + // BigEndian order. The 128 - 8 * len most significant bits are + // set all to 0. + static uint128 Load128VariableLength(const void *p, int len) { + if (len <= 8) { + return uint128(Load64VariableLength(static_cast(p)+8, + len)); + } else { + return uint128( + Load64VariableLength(p, len-8), + Load64(static_cast(p)+8)); } + } - // Load & Store in machine's word size. - static uword_t LoadUnsignedWord(const void *p) { - if (sizeof(uword_t) == 8) - return Load64(p); - else - return Load32(p); - } + // Load & Store in machine's word size. + static uword_t LoadUnsignedWord(const void *p) { + if (sizeof(uword_t) == 8) + return Load64(p); + else + return Load32(p); + } - static void StoreUnsignedWord(void *p, uword_t v) { - if (sizeof(uword_t) == 8) - Store64(p, v); - else - Store32(p, v); - } + static void StoreUnsignedWord(void *p, uword_t v) { + if (sizeof(uword_t) == 8) + Store64(p, v); + else + Store32(p, v); + } }; // BigEndian +// Network byte order is big-endian +typedef BigEndian NetworkByteOrder; + #endif // UTIL_ENDIAN_ENDIAN_H_ diff --git a/be/src/gutil/map-util.h b/be/src/gutil/map-util.h new file mode 100644 index 0000000000..c42672fd5f --- /dev/null +++ b/be/src/gutil/map-util.h @@ -0,0 +1,944 @@ +// Copyright 2005 Google Inc. +// +// #status: RECOMMENDED +// #category: maps +// #summary: Utility functions for use with map-like containers. +// +// This file provides utility functions for use with STL map-like data +// structures, such as std::map and hash_map. Some functions will also work with +// sets, such as ContainsKey(). +// +// The main functions in this file fall into the following categories: +// +// - Find*() +// - Contains*() +// - Insert*() +// - Lookup*() +// +// These functions often have "...OrDie" or "...OrDieNoPrint" variants. These +// variants will crash the process with a CHECK() failure on error, including +// the offending key/data in the log message. The NoPrint variants will not +// include the key/data in the log output under the assumption that it's not a +// printable type. +// +// Most functions are fairly self explanatory from their names, with the +// exception of Find*() vs Lookup*(). The Find functions typically use the map's +// .find() member function to locate and return the map's value type. The +// Lookup*() functions typically use the map's .insert() (yes, insert) member +// function to insert the given value if necessary and returns (usually a +// reference to) the map's value type for the found item. +// +// See the per-function comments for specifics. +// +// There are also a handful of functions for doing other miscellaneous things. +// +// A note on terminology: +// +// Map-like containers are collections of pairs. Like all STL containers they +// contain a few standard typedefs identifying the types of data they contain. +// Given the following map declaration: +// +// map my_map; +// +// the notable typedefs would be as follows: +// +// - key_type -- string +// - value_type -- pair +// - mapped_type -- int +// +// Note that the map above contains two types of "values": the key-value pairs +// themselves (value_type) and the values within the key-value pairs +// (mapped_type). A value_type consists of a key_type and a mapped_type. +// +// The documentation below is written for programmers thinking in terms of keys +// and the (mapped_type) values associated with a given key. For example, the +// statement +// +// my_map["foo"] = 3; +// +// has a key of "foo" (type: string) with a value of 3 (type: int). +// + +#ifndef UTIL_GTL_MAP_UTIL_H_ +#define UTIL_GTL_MAP_UTIL_H_ + +#include + +#include +#include +#include + +#include + +#include "gutil/logging-inl.h" + +// +// Find*() +// + +// Returns a const reference to the value associated with the given key if it +// exists. Crashes otherwise. +// +// This is intended as a replacement for operator[] as an rvalue (for reading) +// when the key is guaranteed to exist. +// +// operator[] for lookup is discouraged for several reasons: +// * It has a side-effect of inserting missing keys +// * It is not thread-safe (even when it is not inserting, it can still +// choose to resize the underlying storage) +// * It invalidates iterators (when it chooses to resize) +// * It default constructs a value object even if it doesn't need to +// +// This version assumes the key is printable, and includes it in the fatal log +// message. +template +const typename Collection::mapped_type& +FindOrDie(const Collection& collection, + const typename Collection::key_type& key) { + auto it = collection.find(key); + CHECK(it != collection.end()) << "Map key not found: " << key; + return it->second; +} + +// Same as above, but returns a non-const reference. +template +typename Collection::mapped_type& +FindOrDie(Collection& collection, // NOLINT + const typename Collection::key_type& key) { + auto it = collection.find(key); + CHECK(it != collection.end()) << "Map key not found: " << key; + return it->second; +} + +// Same as FindOrDie above, but doesn't log the key on failure. +template +const typename Collection::mapped_type& +FindOrDieNoPrint(const Collection& collection, + const typename Collection::key_type& key) { + typename Collection::const_iterator it = collection.find(key); + CHECK(it != collection.end()) << "Map key not found"; + return it->second; +} + +// Same as above, but returns a non-const reference. +template +typename Collection::mapped_type& +FindOrDieNoPrint(Collection& collection, // NOLINT + const typename Collection::key_type& key) { + typename Collection::iterator it = collection.find(key); + CHECK(it != collection.end()) << "Map key not found"; + return it->second; +} + +// Returns a const reference to the value associated with the given key if it +// exists, otherwise a const reference to the provided default value is +// returned. +// +// WARNING: If a temporary object is passed as the default "value," this +// function will return a reference to that temporary object, which will be +// destroyed by the end of the statement. Specifically, if you have a map with +// string values, and you pass a char* as the default "value," either use the +// returned value immediately or store it in a string (not string&). Details: +template +const typename Collection::mapped_type& +FindWithDefault(const Collection& collection, + const typename Collection::key_type& key, + const typename Collection::mapped_type& value) { + auto it = collection.find(key); + if (it == collection.end()) { + return value; + } + return it->second; +} + +// Returns a pointer to the const value associated with the given key if it +// exists, or NULL otherwise. +template +const typename Collection::mapped_type* +FindOrNull(const Collection& collection, + const typename Collection::key_type& key) { + auto it = collection.find(key); + if (it == collection.end()) { + return 0; + } + return &it->second; +} + +// Same as above but returns a pointer to the non-const value. +template +typename Collection::mapped_type* +FindOrNull(Collection& collection, // NOLINT + const typename Collection::key_type& key) { + auto it = collection.find(key); + if (it == collection.end()) { + return 0; + } + return &it->second; +} + +// Returns a pointer to the const value associated with the greatest key +// that's less than or equal to the given key, or NULL if no such key exists. +template +const typename Collection::mapped_type* +FindFloorOrNull(const Collection& collection, + const typename Collection::key_type& key) { + auto it = collection.upper_bound(key); + if (it == collection.begin()) { + return 0; + } + return &(--it)->second; +} + +// Same as above but returns a pointer to the non-const value. +template +typename Collection::mapped_type* +FindFloorOrNull(Collection& collection, // NOLINT + const typename Collection::key_type& key) { + auto it = collection.upper_bound(key); + if (it == collection.begin()) { + return 0; + } + return &(--it)->second; +} + +// Returns a const-reference to the value associated with the greatest key +// that's less than or equal to the given key, or crashes if it does not exist. +template +const typename Collection::mapped_type& +FindFloorOrDie(const Collection& collection, + const typename Collection::key_type& key) { + auto it = collection.upper_bound(key); + CHECK(it != collection.begin()); + return (--it)->second; +} + +// Same as above, but returns a non-const reference. +template +typename Collection::mapped_type& +FindFloorOrDie(Collection& collection, + const typename Collection::key_type& key) { + auto it = collection.upper_bound(key); + CHECK(it != collection.begin()); + return (--it)->second; +} + +// Returns the pointer value associated with the given key. If none is found, +// NULL is returned. The function is designed to be used with a map of keys to +// pointers. +// +// This function does not distinguish between a missing key and a key mapped +// to a NULL value. +template +typename Collection::mapped_type +FindPtrOrNull(const Collection& collection, + const typename Collection::key_type& key) { + auto it = collection.find(key); + if (it == collection.end()) { + return typename Collection::mapped_type(0); + } + return it->second; +} + +// Same as above, except takes non-const reference to collection. +// +// This function is needed for containers that propagate constness to the +// pointee, such as boost::ptr_map. +template +typename Collection::mapped_type +FindPtrOrNull(Collection& collection, // NOLINT + const typename Collection::key_type& key) { + auto it = collection.find(key); + if (it == collection.end()) { + return typename Collection::mapped_type(0); + } + return it->second; +} + +// FindPtrOrNull like function for maps whose value is a smart pointer like shared_ptr or +// unique_ptr. +// Returns the raw pointer contained in the smart pointer for the first found key, if it exists, +// or null if it doesn't. +template +typename Collection::mapped_type::element_type* +FindPointeeOrNull(const Collection& collection, // NOLINT, + const typename Collection::key_type& key) { + auto it = collection.find(key); + if (it == collection.end()) { + return nullptr; + } + return it->second.get(); +} + +// Finds the value associated with the given key and copies it to *value (if not +// NULL). Returns false if the key was not found, true otherwise. +template +bool FindCopy(const Collection& collection, + const Key& key, + Value* const value) { + auto it = collection.find(key); + if (it == collection.end()) { + return false; + } + if (value) { + *value = it->second; + } + return true; +} + +// +// Contains*() +// + +// Returns true iff the given collection contains the given key. +template +bool ContainsKey(const Collection& collection, const Key& key) { + return collection.find(key) != collection.end(); +} + +// Returns true iff the given collection contains the given key-value pair. +template +bool ContainsKeyValuePair(const Collection& collection, + const Key& key, + const Value& value) { + typedef typename Collection::const_iterator const_iterator; + std::pair range = collection.equal_range(key); + for (const_iterator it = range.first; it != range.second; ++it) { + if (it->second == value) { + return true; + } + } + return false; +} + +// +// Insert*() +// + +// Inserts the given key-value pair into the collection. Returns true if the +// given key didn't previously exist. If the given key already existed in the +// map, its value is changed to the given "value" and false is returned. +template +bool InsertOrUpdate(Collection* const collection, + const typename Collection::value_type& vt) { + std::pair ret = collection->insert(vt); + if (!ret.second) { + // update + ret.first->second = vt.second; + return false; + } + return true; +} + +// Same as above, except that the key and value are passed separately. +template +bool InsertOrUpdate(Collection* const collection, + const typename Collection::key_type& key, + const typename Collection::mapped_type& value) { + return InsertOrUpdate( + collection, typename Collection::value_type(key, value)); +} + +// Inserts/updates all the key-value pairs from the range defined by the +// iterators "first" and "last" into the given collection. +template +void InsertOrUpdateMany(Collection* const collection, + InputIterator first, InputIterator last) { + for (; first != last; ++first) { + InsertOrUpdate(collection, *first); + } +} + +// Change the value associated with a particular key in a map or hash_map +// of the form map which owns the objects pointed to by the +// value pointers. If there was an existing value for the key, it is deleted. +// True indicates an insert took place, false indicates an update + delete. +template +bool InsertAndDeleteExisting( + Collection* const collection, + const typename Collection::key_type& key, + const typename Collection::mapped_type& value) { + std::pair ret = + collection->insert(typename Collection::value_type(key, value)); + if (!ret.second) { + delete ret.first->second; + ret.first->second = value; + return false; + } + return true; +} + +// Inserts the given key and value into the given collection iff the given key +// did NOT already exist in the collection. If the key previously existed in the +// collection, the value is not changed. Returns true if the key-value pair was +// inserted; returns false if the key was already present. +template +bool InsertIfNotPresent(Collection* const collection, + const typename Collection::value_type& vt) { + return collection->insert(vt).second; +} + +// Same as above except the key and value are passed separately. +template +bool InsertIfNotPresent( + Collection* const collection, + const typename Collection::key_type& key, + const typename Collection::mapped_type& value) { + return InsertIfNotPresent( + collection, typename Collection::value_type(key, value)); +} + +// Same as above except dies if the key already exists in the collection. +template +void InsertOrDie(Collection* const collection, + const typename Collection::value_type& value) { + CHECK(InsertIfNotPresent(collection, value)) << "duplicate value: " << value; +} + +// Same as above except doesn't log the value on error. +template +void InsertOrDieNoPrint(Collection* const collection, + const typename Collection::value_type& value) { + CHECK(InsertIfNotPresent(collection, value)) << "duplicate value."; +} + +// Inserts the key-value pair into the collection. Dies if key was already +// present. +template +void InsertOrDie(Collection* const collection, + const typename Collection::key_type& key, + const typename Collection::mapped_type& data) { + CHECK(InsertIfNotPresent(collection, key, data)) + << "duplicate key: " << key; +} + +// Same as above except deson't log the key on error. +template +void InsertOrDieNoPrint( + Collection* const collection, + const typename Collection::key_type& key, + const typename Collection::mapped_type& data) { + CHECK(InsertIfNotPresent(collection, key, data)) << "duplicate key."; +} + +// Inserts a new key and default-initialized value. Dies if the key was already +// present. Returns a reference to the value. Example usage: +// +// map m; +// SomeProto& proto = InsertKeyOrDie(&m, 3); +// proto.set_field("foo"); +template +typename Collection::mapped_type& InsertKeyOrDie( + Collection* const collection, + const typename Collection::key_type& key) { + typedef typename Collection::value_type value_type; + std::pair res = + collection->insert(value_type(key, typename Collection::mapped_type())); + CHECK(res.second) << "duplicate key: " << key; + return res.first->second; +} + +// +// Emplace*() +// +template +bool EmplaceIfNotPresent(Collection* const collection, + Args&&... args) { + return collection->emplace(std::forward(args)...).second; +} + +// Emplaces the given key-value pair into the collection. Returns true if the +// given key didn't previously exist. If the given key already existed in the +// map, its value is changed to the given "value" and false is returned. +template +bool EmplaceOrUpdate(Collection* const collection, + const typename Collection::key_type& key, + typename Collection::mapped_type&& value) { + typedef typename Collection::mapped_type mapped_type; + auto it = collection->find(key); + if (it == collection->end()) { + collection->emplace(key, std::forward(value)); + return true; + } + it->second = std::forward(value); + return false; +} + +template +void EmplaceOrDie(Collection* const collection, + Args&&... args) { + CHECK(EmplaceIfNotPresent(collection, std::forward(args)...)) + << "duplicate value"; +} + +// +// Lookup*() +// + +// Looks up a given key and value pair in a collection and inserts the key-value +// pair if it's not already present. Returns a reference to the value associated +// with the key. +template +typename Collection::mapped_type& +LookupOrInsert(Collection* const collection, + const typename Collection::value_type& vt) { + return collection->insert(vt).first->second; +} + +// Same as above except the key-value are passed separately. +template +typename Collection::mapped_type& +LookupOrInsert(Collection* const collection, + const typename Collection::key_type& key, + const typename Collection::mapped_type& value) { + return LookupOrInsert( + collection, typename Collection::value_type(key, value)); +} + +// It's similar to LookupOrInsert() but uses the emplace and r-value mechanics +// to achieve the desired results. The constructor of the new element is called +// with exactly the same arguments as supplied to emplace, forwarded via +// std::forward(args). The element may be constructed even if there +// already is an element with the same key in the container, in which case the +// newly constructed element will be destroyed immediately. +// For details, see +// https://en.cppreference.com/w/cpp/container/map/emplace +// https://en.cppreference.com/w/cpp/container/unordered_map/emplace +template +typename Collection::mapped_type& +LookupOrEmplace(Collection* const collection, Args&&... args) { + return collection->emplace(std::forward(args)...).first->second; +} + +// Counts the number of equivalent elements in the given "sequence", and stores +// the results in "count_map" with element as the key and count as the value. +// +// Example: +// vector v = {"a", "b", "c", "a", "b"}; +// map m; +// AddTokenCounts(v, 1, &m); +// assert(m["a"] == 2); +// assert(m["b"] == 2); +// assert(m["c"] == 1); +template +void AddTokenCounts( + const Sequence& sequence, + const typename Collection::mapped_type& increment, + Collection* const count_map) { + for (typename Sequence::const_iterator it = sequence.begin(); + it != sequence.end(); ++it) { + typename Collection::mapped_type& value = + LookupOrInsert(count_map, *it, + typename Collection::mapped_type()); + value += increment; + } +} + +// Helpers for LookupOrInsertNew(), needed to create a new value type when the +// type itself is a pointer, i.e., these extract the actual type from a pointer. +template +void MapUtilAssignNewDefaultInstance(T** location) { + *location = new T(); +} + +template +void MapUtilAssignNewInstance(T** location, const Arg &arg) { + *location = new T(arg); +} + +// Returns a reference to the value associated with key. If not found, a value +// is default constructed on the heap and added to the map. +// +// This function is useful for containers of the form map, where +// inserting a new key, value pair involves constructing a new heap-allocated +// Value, and storing a pointer to that in the collection. +template +typename Collection::mapped_type& +LookupOrInsertNew(Collection* const collection, + const typename Collection::key_type& key) { + std::pair ret = + collection->insert( + typename Collection::value_type(key, + static_cast(NULL))); + if (ret.second) { + // This helper is needed to 'extract' the Value type from the type of the + // container value, which is (Value*). + MapUtilAssignNewDefaultInstance(&(ret.first->second)); + } + return ret.first->second; +} + +// Same as above but constructs the value using the single-argument constructor +// and the given "arg". +template +typename Collection::mapped_type& +LookupOrInsertNew(Collection* const collection, + const typename Collection::key_type& key, + const Arg& arg) { + std::pair ret = + collection->insert( + typename Collection::value_type( + key, + static_cast(NULL))); + if (ret.second) { + // This helper is needed to 'extract' the Value type from the type of the + // container value, which is (Value*). + MapUtilAssignNewInstance(&(ret.first->second), arg); + } + return ret.first->second; +} + +// Lookup of linked/shared pointers is used in two scenarios: +// +// Use LookupOrInsertSharedPtr if the container does not own the elements +// for their whole lifetime. This is typically the case when a reader allows +// parallel updates to the container. In this case a Mutex only needs to lock +// container operations, but all element operations must be performed on the +// shared pointer. Finding an element must be performed using FindPtr*() and +// cannot be done with FindLinkedPtr*() even though it compiles. + +// Lookup a key in a map or hash_map whose values are shared_ptrs. If it is +// missing, set collection[key].reset(new Value::element_type). Unlike +// LookupOrInsertNewLinkedPtr, this function returns the shared_ptr instead of +// the raw pointer. Value::element_type must be default constructable. +template +typename Collection::mapped_type& +LookupOrInsertNewSharedPtr( + Collection* const collection, + const typename Collection::key_type& key) { + typedef typename Collection::mapped_type SharedPtr; + typedef typename Collection::mapped_type::element_type Element; + std::pair ret = + collection->insert(typename Collection::value_type(key, SharedPtr())); + if (ret.second) { + ret.first->second.reset(new Element()); + } + return ret.first->second; +} + +// A variant of LookupOrInsertNewSharedPtr where the value is constructed using +// a single-parameter constructor. Note: the constructor argument is computed +// even if it will not be used, so only values cheap to compute should be passed +// here. On the other hand it does not matter how expensive the construction of +// the actual stored value is, as that only occurs if necessary. +template +typename Collection::mapped_type& +LookupOrInsertNewSharedPtr( + Collection* const collection, + const typename Collection::key_type& key, + const Arg& arg) { + typedef typename Collection::mapped_type SharedPtr; + typedef typename Collection::mapped_type::element_type Element; + std::pair ret = + collection->insert(typename Collection::value_type(key, SharedPtr())); + if (ret.second) { + ret.first->second.reset(new Element(arg)); + } + return ret.first->second; +} + +// +// Misc Utility Functions +// + +// Updates the value associated with the given key. If the key was not already +// present, then the key-value pair are inserted and "previous" is unchanged. If +// the key was already present, the value is updated and "*previous" will +// contain a copy of the old value. +// +// InsertOrReturnExisting has complementary behavior that returns the +// address of an already existing value, rather than updating it. +template +bool UpdateReturnCopy(Collection* const collection, + const typename Collection::key_type& key, + const typename Collection::mapped_type& value, + typename Collection::mapped_type* previous) { + std::pair ret = + collection->insert(typename Collection::value_type(key, value)); + if (!ret.second) { + // update + if (previous) { + *previous = ret.first->second; + } + ret.first->second = value; + return true; + } + return false; +} + +// Same as above except that the key and value are passed as a pair. +template +bool UpdateReturnCopy(Collection* const collection, + const typename Collection::value_type& vt, + typename Collection::mapped_type* previous) { + std::pair ret = + collection->insert(vt); + if (!ret.second) { + // update + if (previous) { + *previous = ret.first->second; + } + ret.first->second = vt.second; + return true; + } + return false; +} + +// Tries to insert the given key-value pair into the collection. Returns NULL if +// the insert succeeds. Otherwise, returns a pointer to the existing value. +// +// This complements UpdateReturnCopy in that it allows to update only after +// verifying the old value and still insert quickly without having to look up +// twice. Unlike UpdateReturnCopy this also does not come with the issue of an +// undefined previous* in case new data was inserted. +template +typename Collection::mapped_type* const +InsertOrReturnExisting(Collection* const collection, + const typename Collection::value_type& vt) { + std::pair ret = collection->insert(vt); + if (ret.second) { + return NULL; // Inserted, no existing previous value. + } else { + return &ret.first->second; // Return address of already existing value. + } +} + +// Same as above, except for explicit key and data. +template +typename Collection::mapped_type* const +InsertOrReturnExisting( + Collection* const collection, + const typename Collection::key_type& key, + const typename Collection::mapped_type& data) { + return InsertOrReturnExisting(collection, + typename Collection::value_type(key, data)); +} + +// Saves the reverse mapping into reverse. Key/value pairs are inserted in the +// order the iterator returns them. +template +void ReverseMap(const Collection& collection, + ReverseCollection* const reverse) { + CHECK(reverse != NULL); + for (typename Collection::const_iterator it = collection.begin(); + it != collection.end(); + ++it) { + InsertOrUpdate(reverse, it->second, it->first); + } +} + +// Erases the collection item identified by the given key, and returns the value +// associated with that key. It is assumed that the value (i.e., the +// mapped_type) is a pointer. Returns NULL if the key was not found in the +// collection. +// +// Examples: +// map my_map; +// +// One line cleanup: +// delete EraseKeyReturnValuePtr(&my_map, "abc"); +// +// Use returned value: +// gscoped_ptr value_ptr(EraseKeyReturnValuePtr(&my_map, "abc")); +// if (value_ptr.get()) +// value_ptr->DoSomething(); +// +// Note: if 'collection' is a multimap, this will only erase and return the +// first value. +template +typename Collection::mapped_type EraseKeyReturnValuePtr( + Collection* const collection, + const typename Collection::key_type& key) { + auto it = collection->find(key); + if (it == collection->end()) { + return typename Collection::mapped_type(); + } + typename Collection::mapped_type v = std::move(it->second); + collection->erase(it); + return v; +} + +// Inserts all the keys from map_container into key_container, which must +// support insert(MapContainer::key_type). +// +// Note: any initial contents of the key_container are not cleared. +template +void InsertKeysFromMap(const MapContainer& map_container, + KeyContainer* key_container) { + CHECK(key_container != NULL); + for (typename MapContainer::const_iterator it = map_container.begin(); + it != map_container.end(); ++it) { + key_container->insert(it->first); + } +} + +// Appends all the keys from map_container into key_container, which must +// support push_back(MapContainer::key_type). +// +// Note: any initial contents of the key_container are not cleared. +template +void AppendKeysFromMap(const MapContainer& map_container, + KeyContainer* key_container) { + CHECK(key_container != NULL); + for (typename MapContainer::const_iterator it = map_container.begin(); + it != map_container.end(); ++it) { + key_container->push_back(it->first); + } +} + +// A more specialized overload of AppendKeysFromMap to optimize reallocations +// for the common case in which we're appending keys to a vector and hence can +// (and sometimes should) call reserve() first. +// +// (It would be possible to play SFINAE games to call reserve() for any +// container that supports it, but this seems to get us 99% of what we need +// without the complexity of a SFINAE-based solution.) +template +void AppendKeysFromMap(const MapContainer& map_container, + std::vector* key_container) { + CHECK(key_container != NULL); + // We now have the opportunity to call reserve(). Calling reserve() every + // time is a bad idea for some use cases: libstdc++'s implementation of + // vector<>::reserve() resizes the vector's backing store to exactly the + // given size (unless it's already at least that big). Because of this, + // the use case that involves appending a lot of small maps (total size + // N) one by one to a vector would be O(N^2). But never calling reserve() + // loses the opportunity to improve the use case of adding from a large + // map to an empty vector (this improves performance by up to 33%). A + // number of heuristics are possible; see the discussion in + // cl/34081696. Here we use the simplest one. + if (key_container->empty()) { + key_container->reserve(map_container.size()); + } + for (typename MapContainer::const_iterator it = map_container.begin(); + it != map_container.end(); ++it) { + key_container->push_back(it->first); + } +} + +// Inserts all the values from map_container into value_container, which must +// support push_back(MapContainer::mapped_type). +// +// Note: any initial contents of the value_container are not cleared. +template +void AppendValuesFromMap(const MapContainer& map_container, + ValueContainer* value_container) { + CHECK(value_container != NULL); + for (typename MapContainer::const_iterator it = map_container.begin(); + it != map_container.end(); ++it) { + value_container->push_back(it->second); + } +} + +template +void EmplaceValuesFromMap(MapContainer&& map_container, + ValueContainer* value_container) { + CHECK(value_container != nullptr); + // See AppendKeysFromMap for why this is done. + if (value_container->empty()) { + value_container->reserve(map_container.size()); + } + for (auto&& entry : map_container) { + value_container->emplace_back(std::move(entry.second)); + } +} + +// A more specialized overload of AppendValuesFromMap to optimize reallocations +// for the common case in which we're appending values to a vector and hence +// can (and sometimes should) call reserve() first. +// +// (It would be possible to play SFINAE games to call reserve() for any +// container that supports it, but this seems to get us 99% of what we need +// without the complexity of a SFINAE-based solution.) +template +void AppendValuesFromMap(const MapContainer& map_container, + std::vector* value_container) { + EmplaceValuesFromMap(map_container, value_container); +} + +// Compute and insert new value if it's absent from the map. Return a pair with a reference to the +// value and a bool indicating whether it was absent at first. +// +// This inspired on a similar java construct (url split in two lines): +// https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/ConcurrentHashMap.html +// #computeIfAbsent-K-java.util.function.Function +// +// It takes a reference to the key and a lambda function. If the key exists in the map, returns +// a pair with a pointer to the current value and 'false'. If the key does not exist in the map, +// it uses the lambda function to create a value, inserts it into the map, and returns a pair with +// a pointer to the new value and 'true'. +// +// Example usage: +// +// auto result = ComputeIfAbsentReturnAbsense(&my_collection, +// my_key, +// [] { return new_value; }); +// MyValue* const value = result.first; +// if (result.second) .... +// +// The ComputePair* variants expect a lambda that creates a pair. This +// can be useful if the key is a StringPiece pointing to external state to +// avoid excess memory for the keys, while being safer in multi-threaded +// contexts, e.g. in case the key goes out of scope before the container does. +// +// Example usage: +// +// map> string_to_idx; +// vector> pbs; +// auto result = ComputePairIfAbsentReturnAbsense(&string_to_idx, my_key, +// [&]() { +// unique_ptr s = new StringPB(); +// s->set_string(my_key); +// int idx = pbs.size(); +// pbs.emplace_back(s.release()); +// return make_pair(StringPiece(pbs.back()->string()), idx); +// }); +template +std::pair +ComputePairIfAbsentReturnAbsense(MapContainer* container, + const typename MapContainer::key_type& key, + Function compute_pair_func) { + typename MapContainer::iterator iter = container->find(key); + bool new_value = iter == container->end(); + if (new_value) { + auto p = compute_pair_func(); + std::pair result = + container->emplace(std::move(p.first), std::move(p.second)); + DCHECK(result.second) << "duplicate key: " << key; + iter = result.first; + } + return std::make_pair(&iter->second, new_value); +} +template +std::pair +ComputeIfAbsentReturnAbsense(MapContainer* container, + const typename MapContainer::key_type& key, + Function compute_func) { + return ComputePairIfAbsentReturnAbsense(container, key, [&key, &compute_func] { + return std::make_pair(key, compute_func()); + }); +}; + +// Like the above but doesn't return a pair, just returns a pointer to the value. +// Example usage: +// +// MyValue* const value = ComputeIfAbsent(&my_collection, +// my_key, +// [] { return new_value; }); +// +template +typename MapContainer::mapped_type* const +ComputeIfAbsent(MapContainer* container, + const typename MapContainer::key_type& key, + Function compute_func) { + return ComputeIfAbsentReturnAbsense(container, key, compute_func).first; +}; + +template +typename MapContainer::mapped_type* const +ComputePairIfAbsent(MapContainer* container, + const typename MapContainer::key_type& key, + Function compute_pair_func) { + return ComputePairIfAbsentReturnAbsense(container, key, compute_pair_func).first; +}; + +#endif // UTIL_GTL_MAP_UTIL_H_ diff --git a/be/src/gutil/ref_counted.cc b/be/src/gutil/ref_counted.cc new file mode 100644 index 0000000000..280d9df25e --- /dev/null +++ b/be/src/gutil/ref_counted.cc @@ -0,0 +1,95 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gutil/ref_counted.h" + +#include +#include "gutil/atomic_refcount.h" + +namespace doris { + +namespace subtle { + +RefCountedBase::RefCountedBase() + : ref_count_(0) +#ifndef NDEBUG + , in_dtor_(false) +#endif + { +} + +RefCountedBase::~RefCountedBase() { +#ifndef NDEBUG + DCHECK(in_dtor_) << "RefCounted object deleted without calling Release()"; +#endif +} + +void RefCountedBase::AddRef() const { + // TODO(maruel): Add back once it doesn't assert 500 times/sec. + // Current thread books the critical section "AddRelease" without release it. + // DFAKE_SCOPED_LOCK_THREAD_LOCKED(add_release_); +#ifndef NDEBUG + DCHECK(!in_dtor_); +#endif + ++ref_count_; +} + +bool RefCountedBase::Release() const { + // TODO(maruel): Add back once it doesn't assert 500 times/sec. + // Current thread books the critical section "AddRelease" without release it. + // DFAKE_SCOPED_LOCK_THREAD_LOCKED(add_release_); +#ifndef NDEBUG + DCHECK(!in_dtor_); +#endif + if (--ref_count_ == 0) { +#ifndef NDEBUG + in_dtor_ = true; +#endif + return true; + } + return false; +} + +bool RefCountedThreadSafeBase::HasOneRef() const { + return base::RefCountIsOne( + &const_cast(this)->ref_count_); +} + +RefCountedThreadSafeBase::RefCountedThreadSafeBase() : ref_count_(0) { +#ifndef NDEBUG + in_dtor_ = false; +#endif +} + +RefCountedThreadSafeBase::~RefCountedThreadSafeBase() { +#ifndef NDEBUG + DCHECK(in_dtor_) << "RefCountedThreadSafe object deleted without " + "calling Release()"; +#endif +} + +void RefCountedThreadSafeBase::AddRef() const { +#ifndef NDEBUG + DCHECK(!in_dtor_); +#endif + base::RefCountInc(&ref_count_); +} + +bool RefCountedThreadSafeBase::Release() const { +#ifndef NDEBUG + DCHECK(!in_dtor_); + DCHECK(!base::RefCountIsZero(&ref_count_)); +#endif + if (!base::RefCountDec(&ref_count_)) { +#ifndef NDEBUG + in_dtor_ = true; +#endif + return true; + } + return false; +} + +} // namespace subtle + +} // namespace doris diff --git a/be/src/gutil/ref_counted.h b/be/src/gutil/ref_counted.h new file mode 100644 index 0000000000..5cd73b262c --- /dev/null +++ b/be/src/gutil/ref_counted.h @@ -0,0 +1,365 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_MEMORY_REF_COUNTED_H_ +#define BASE_MEMORY_REF_COUNTED_H_ + +#include +#include +#include // IWYU pragma: keep + +#include "gutil/atomicops.h" +#include "gutil/macros.h" +#include "gutil/threading/thread_collision_warner.h" + +namespace doris { +namespace subtle { + +typedef Atomic32 AtomicRefCount; + +class RefCountedBase { + public: + bool HasOneRef() const { return ref_count_ == 1; } + + protected: + RefCountedBase(); + ~RefCountedBase(); + + void AddRef() const; + + // Returns true if the object should self-delete. + bool Release() const; + + private: + mutable int ref_count_; +#ifndef NDEBUG + mutable bool in_dtor_; +#endif + + DFAKE_MUTEX(add_release_); + + DISALLOW_COPY_AND_ASSIGN(RefCountedBase); +}; + +class RefCountedThreadSafeBase { + public: + bool HasOneRef() const; + + protected: + RefCountedThreadSafeBase(); + ~RefCountedThreadSafeBase(); + + void AddRef() const; + + // Returns true if the object should self-delete. + bool Release() const; + + private: + mutable AtomicRefCount ref_count_; +#ifndef NDEBUG + mutable bool in_dtor_; +#endif + + DISALLOW_COPY_AND_ASSIGN(RefCountedThreadSafeBase); +}; + +} // namespace subtle + +// +// A base class for reference counted classes. Otherwise, known as a cheap +// knock-off of WebKit's RefCounted class. To use this guy just extend your +// class from it like so: +// +// class MyFoo : public RefCounted { +// ... +// private: +// friend class RefCounted; +// ~MyFoo(); +// }; +// +// You should always make your destructor private, to avoid any code deleting +// the object accidently while there are references to it. +template +class RefCounted : public subtle::RefCountedBase { + public: + RefCounted() {} + + void AddRef() const { + subtle::RefCountedBase::AddRef(); + } + + void Release() const { + if (subtle::RefCountedBase::Release()) { + delete static_cast(this); + } + } + + protected: + ~RefCounted() {} + + private: + DISALLOW_COPY_AND_ASSIGN(RefCounted); +}; + +// Forward declaration. +template class RefCountedThreadSafe; + +// Default traits for RefCountedThreadSafe. Deletes the object when its ref +// count reaches 0. Overload to delete it on a different thread etc. +template +struct DefaultRefCountedThreadSafeTraits { + static void Destruct(const T* x) { + // Delete through RefCountedThreadSafe to make child classes only need to be + // friend with RefCountedThreadSafe instead of this struct, which is an + // implementation detail. + RefCountedThreadSafe::DeleteInternal(x); + } +}; + +// +// A thread-safe variant of RefCounted +// +// class MyFoo : public RefCountedThreadSafe { +// ... +// }; +// +// If you're using the default trait, then you should add compile time +// asserts that no one else is deleting your object. i.e. +// private: +// friend class RefCountedThreadSafe; +// ~MyFoo(); +template > +class RefCountedThreadSafe : public subtle::RefCountedThreadSafeBase { + public: + RefCountedThreadSafe() {} + + void AddRef() const { + subtle::RefCountedThreadSafeBase::AddRef(); + } + + void Release() const { + if (subtle::RefCountedThreadSafeBase::Release()) { + Traits::Destruct(static_cast(this)); + } + } + + protected: + ~RefCountedThreadSafe() {} + + private: + friend struct DefaultRefCountedThreadSafeTraits; + static void DeleteInternal(const T* x) { delete x; } + + DISALLOW_COPY_AND_ASSIGN(RefCountedThreadSafe); +}; + +// +// A thread-safe wrapper for some piece of data so we can place other +// things in scoped_refptrs<>. +// +template +class RefCountedData + : public doris::RefCountedThreadSafe< doris::RefCountedData > { + public: + RefCountedData() : data() {} + RefCountedData(const T& in_value) : data(in_value) {} + + T data; + + private: + friend class doris::RefCountedThreadSafe >; + ~RefCountedData() {} +}; + +} // namespace doris + +// +// A smart pointer class for reference counted objects. Use this class instead +// of calling AddRef and Release manually on a reference counted object to +// avoid common memory leaks caused by forgetting to Release an object +// reference. Sample usage: +// +// class MyFoo : public RefCounted { +// ... +// }; +// +// void some_function() { +// scoped_refptr foo = new MyFoo(); +// foo->Method(param); +// // |foo| is released when this function returns +// } +// +// void some_other_function() { +// scoped_refptr foo = new MyFoo(); +// ... +// foo = NULL; // explicitly releases |foo| +// ... +// if (foo) +// foo->Method(param); +// } +// +// The above examples show how scoped_refptr acts like a pointer to T. +// Given two scoped_refptr classes, it is also possible to exchange +// references between the two objects, like so: +// +// { +// scoped_refptr a = new MyFoo(); +// scoped_refptr b; +// +// b.swap(a); +// // now, |b| references the MyFoo object, and |a| references NULL. +// } +// +// To make both |a| and |b| in the above example reference the same MyFoo +// object, simply use the assignment operator: +// +// { +// scoped_refptr a = new MyFoo(); +// scoped_refptr b; +// +// b = a; +// // now, |a| and |b| each own a reference to the same MyFoo object. +// } +// +template +class scoped_refptr { + public: + typedef T element_type; + + scoped_refptr() : ptr_(NULL) { + } + + scoped_refptr(T* p) : ptr_(p) { + if (ptr_) + ptr_->AddRef(); + } + + // Copy constructor. + scoped_refptr(const scoped_refptr& r) : ptr_(r.ptr_) { + if (ptr_) + ptr_->AddRef(); + } + + // Copy conversion constructor. + template + scoped_refptr(const scoped_refptr& r) : ptr_(r.get()) { + if (ptr_) + ptr_->AddRef(); + } + + // Move constructor. This is required in addition to the conversion + // constructor below in order for clang to warn about pessimizing moves. + scoped_refptr(scoped_refptr&& r) noexcept : ptr_(r.get()) { // NOLINT + r.ptr_ = nullptr; + } + + // Move conversion constructor. + template + scoped_refptr(scoped_refptr&& r) noexcept : ptr_(r.get()) { // NOLINT + r.ptr_ = nullptr; + } + + ~scoped_refptr() { + if (ptr_) + ptr_->Release(); + } + + T* get() const { return ptr_; } + +// The following is disabled in Kudu's version of this file since it's +// relatively dangerous. Chromium is planning on doing the same in their +// tree, but hasn't done so yet. See http://code.google.com/p/chromium/issues/detail?id=110610 +#if SCOPED_REFPTR_ALLOW_IMPLICIT_CONVERSION_TO_PTR + // Allow scoped_refptr to be used in boolean expression + // and comparison operations. + operator T*() const { return ptr_; } +#else + typedef T* scoped_refptr::*Testable; + operator Testable() const { return ptr_ ? &scoped_refptr::ptr_ : NULL; } +#endif + + T* operator->() const { + assert(ptr_ != NULL); + return ptr_; + } + + scoped_refptr& operator=(T* p) { + // AddRef first so that self assignment should work + if (p) + p->AddRef(); + T* old_ptr = ptr_; + ptr_ = p; + if (old_ptr) + old_ptr->Release(); + return *this; + } + + scoped_refptr& operator=(const scoped_refptr& r) { + return *this = r.ptr_; + } + + template + scoped_refptr& operator=(const scoped_refptr& r) { + return *this = r.get(); + } + + scoped_refptr& operator=(scoped_refptr&& r) { + scoped_refptr(std::move(r)).swap(*this); + return *this; + } + + template + scoped_refptr& operator=(scoped_refptr&& r) { + scoped_refptr(std::move(r)).swap(*this); + return *this; + } + + void swap(T** pp) { + T* p = ptr_; + ptr_ = *pp; + *pp = p; + } + + void swap(scoped_refptr& r) { + swap(&r.ptr_); + } + + // Like gscoped_ptr::reset(), drops a reference on the currently held object + // (if any), and adds a reference to the passed-in object (if not NULL). + void reset(T* p = NULL) { + *this = p; + } + + protected: + T* ptr_; + + private: + template friend class scoped_refptr; +}; + +// Handy utility for creating a scoped_refptr out of a T* explicitly without +// having to retype all the template arguments +template +scoped_refptr make_scoped_refptr(T* t) { + return scoped_refptr(t); +} + +// equal_to and hash implementations for templated scoped_refptrs suitable for +// use with STL unordered_* containers. +template +struct ScopedRefPtrEqualToFunctor { + bool operator()(const scoped_refptr& x, const scoped_refptr& y) const { + return x.get() == y.get(); + } +}; + +template +struct ScopedRefPtrHashFunctor { + size_t operator()(const scoped_refptr& p) const { + return reinterpret_cast(p.get()); + } +}; + +#endif // BASE_MEMORY_REF_COUNTED_H_ diff --git a/be/src/gutil/sysinfo-test.cc b/be/src/gutil/sysinfo-test.cc new file mode 100644 index 0000000000..85ccd11448 --- /dev/null +++ b/be/src/gutil/sysinfo-test.cc @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gutil/sysinfo.h" + +#include + +namespace doris { + +TEST(SysInfoTest, ReadMaxCpuIndexTest) { + using base::ParseMaxCpuIndex; + EXPECT_EQ(0, ParseMaxCpuIndex("0\n")); + EXPECT_EQ(1, ParseMaxCpuIndex("1\n")); + EXPECT_EQ(7, ParseMaxCpuIndex("0-7\n")); + EXPECT_EQ(40, ParseMaxCpuIndex("0-7,30-40\n")); + EXPECT_EQ(143, ParseMaxCpuIndex("2,4-127,128-143\n")); + EXPECT_EQ(44, ParseMaxCpuIndex("44-44\n")); + + // Don't assume that ranges are in ascending order or non-overlapping, + // just in case. + EXPECT_EQ(8, ParseMaxCpuIndex("0-7,5-8\n")); + EXPECT_EQ(7, ParseMaxCpuIndex("0-7,5-6\n")); + EXPECT_EQ(3, ParseMaxCpuIndex("2-3,0-1\n")); + EXPECT_EQ(3, ParseMaxCpuIndex("2-3,0\n")); + EXPECT_EQ(3, ParseMaxCpuIndex("3,0-2\n")); + + // Invalid inputs. + EXPECT_EQ(-1, ParseMaxCpuIndex("")); + EXPECT_EQ(-1, ParseMaxCpuIndex("\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex(" ")); + EXPECT_EQ(-1, ParseMaxCpuIndex("a\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("0\n1\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("\n1\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("\n1")); + EXPECT_EQ(-1, ParseMaxCpuIndex("0-\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("-2\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("1-9qwerty\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("1-9,0-\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,3-\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,a-4\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,3@4\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("3-2\n")); + + // Overflows in various positions. + EXPECT_EQ(-1, ParseMaxCpuIndex("2147483648")); // 2^31 + EXPECT_EQ(-1, ParseMaxCpuIndex("18446744073709551617")); // 2^64 + 1 + EXPECT_EQ(-1, ParseMaxCpuIndex("999999999999999999999999999999999999999999999999")); + EXPECT_EQ(-1, ParseMaxCpuIndex("0-2147483648")); // 2^31 + EXPECT_EQ(-1, ParseMaxCpuIndex("0-18446744073709551617")); // 2^64 + 1 + EXPECT_EQ(-1, ParseMaxCpuIndex("0-999999999999999999999999999999999999999999999999")); + EXPECT_EQ(-1, ParseMaxCpuIndex("2147483648-1")); // 2^31 + EXPECT_EQ(-1, ParseMaxCpuIndex("18446744073709551617-1")); // 2^64 + 1 + EXPECT_EQ(-1, ParseMaxCpuIndex("999999999999999999999999999999999999999999999999-1")); +} + +} // namespace doris diff --git a/be/src/gutil/sysinfo.cc b/be/src/gutil/sysinfo.cc new file mode 100644 index 0000000000..4365b18ff8 --- /dev/null +++ b/be/src/gutil/sysinfo.cc @@ -0,0 +1,474 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#if (defined(_WIN32) || defined(__MINGW32__)) && !defined(__CYGWIN__) && !defined(__CYGWIN32) +# define PLATFORM_WINDOWS 1 +#endif + +#include +#include // for open() +#include // for read() + +#if defined __MACH__ // Mac OS X, almost certainly +#include // how we figure out numcpu's on OS X +#include +#elif defined __FreeBSD__ +#include +#elif defined __sun__ // Solaris +#include // for, e.g., prmap_t +#elif defined(PLATFORM_WINDOWS) +#include // for getpid() (actually, _getpid()) +#include // for SHGetValueA() +#include // for Module32First() +#endif + +#include "gutil/sysinfo.h" + +#include +#include // for errno +#include // for snprintf(), sscanf() +#include // for getenv() +#include // for memmove(), memchr(), etc. +#include +#include +#include + +#include + +#include "gutil/dynamic_annotations.h" // for RunningOnValgrind +#include "gutil/integral_types.h" +#include "gutil/macros.h" +#include "gutil/port.h" +#include "gutil/walltime.h" + +using std::numeric_limits; + +// This isn't in the 'base' namespace in tcmallc. But, tcmalloc +// exports these functions, so we need to namespace them to avoid +// the conflict. +namespace base { + +// ---------------------------------------------------------------------- +// CyclesPerSecond() +// NumCPUs() +// It's important this not call malloc! -- they may be called at +// global-construct time, before we've set up all our proper malloc +// hooks and such. +// ---------------------------------------------------------------------- + +static double cpuinfo_cycles_per_second = 1.0; // 0.0 might be dangerous +static int cpuinfo_num_cpus = 1; // Conservative guess +static int cpuinfo_max_cpu_index = -1; + +void SleepForNanoseconds(int64_t nanoseconds) { + // Sleep for nanosecond duration + struct timespec sleep_time; + sleep_time.tv_sec = nanoseconds / 1000 / 1000 / 1000; + sleep_time.tv_nsec = (nanoseconds % (1000 * 1000 * 1000)); + while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) + ; // Ignore signals and wait for the full interval to elapse. +} + +void SleepForMilliseconds(int64_t milliseconds) { + SleepForNanoseconds(milliseconds * 1000 * 1000); +} + +// Helper function estimates cycles/sec by observing cycles elapsed during +// sleep(). Using small sleep time decreases accuracy significantly. +static int64 EstimateCyclesPerSecond(const int estimate_time_ms) { + CHECK(estimate_time_ms > 0); + if (estimate_time_ms <= 0) + return 1; + double multiplier = 1000.0 / (double)estimate_time_ms; // scale by this much + + const int64 start_ticks = CycleClock::Now(); + SleepForMilliseconds(estimate_time_ms); + const int64 guess = int64(multiplier * (CycleClock::Now() - start_ticks)); + return guess; +} + +// ReadIntFromFile is only called on linux and cygwin platforms. +#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__) + +// Slurp a file with a single read() call into 'buf'. This is only safe to use on small +// files in places like /proc where we are guaranteed not to get a partial read. +// Any remaining bytes in the buffer are zeroed. +// +// 'buflen' must be more than large enough to hold the whole file, or else this will +// issue a FATAL error. +static bool SlurpSmallTextFile(const char* file, char* buf, int buflen) { + bool ret = false; + int fd; + RETRY_ON_EINTR(fd, open(file, O_RDONLY)); + if (fd == -1) return ret; + + memset(buf, '\0', buflen); + int n; + RETRY_ON_EINTR(n, read(fd, buf, buflen - 1)); + CHECK_NE(n, buflen - 1) << "buffer of len " << buflen << " not large enough to store " + << "contents of " << file; + if (n > 0) { + ret = true; + } + + int close_ret; + RETRY_ON_EINTR(close_ret, close(fd)); + if (PREDICT_FALSE(close_ret != 0)) { + PLOG(WARNING) << "Failed to close fd " << fd; + } + + return ret; +} + +// Helper function for reading an int from a file. Returns true if successful +// and the memory location pointed to by value is set to the value read. +static bool ReadIntFromFile(const char *file, int *value) { + char line[1024]; + if (!SlurpSmallTextFile(file, line, arraysize(line))) { + return false; + } + char* err; + const int temp_value = strtol(line, &err, 10); + if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { + *value = temp_value; + return true; + } + return false; +} + +static int ReadMaxCPUIndex() { + char buf[1024]; + // TODO(tarmstrong): KUDU-2730: 'present' doesn't include CPUs that could be hotplugged + // in the future. 'possible' does, but using it instead could result in a blow-up in the + // number of per-CPU data structures. + CHECK(SlurpSmallTextFile("/sys/devices/system/cpu/present", buf, arraysize(buf))); + int max_idx = ParseMaxCpuIndex(buf); + CHECK_GE(max_idx, 0) << "unable to parse max CPU index from: " << buf; + return max_idx; +} + +int ParseMaxCpuIndex(const char* str) { + DCHECK(str != nullptr); + const char* pos = str; + // Initialize max_idx to invalid so we can just return if we find zero ranges. + int max_idx = -1; + + while (true) { + const char* range_start = pos; + const char* dash = nullptr; + // Scan forward until we find the separator indicating end of range, which is always a + // newline or comma if the input is valid. + for (; *pos != ',' && *pos != '\n'; pos++) { + // Check for early end of string - bail here to avoid advancing past end. + if (*pos == '\0') return -1; + if (*pos == '-') { + // Multiple dashes in range is invalid. + if (dash != nullptr) return -1; + dash = pos; + } else if (!isdigit(*pos)) { + return -1; + } + } + + // At this point we found a range [range_start, pos) comprised of digits and an + // optional dash. + const char* num_start = dash == nullptr ? range_start : dash + 1; + // Check for ranges with missing numbers, e.g. "", "3-", "-3". + if (num_start == pos || dash == range_start) return -1; + // The numbers are comprised only of digits, so it can only fail if it is out of + // range of int (the return type of this function). + unsigned long start_idx = strtoul(range_start, nullptr, 10); + if (start_idx > numeric_limits::max()) return -1; + unsigned long end_idx = strtoul(num_start, nullptr, 10); + if (end_idx > numeric_limits::max() || start_idx > end_idx) { + return -1; + } + // Keep track of the max index we've seen so far. + max_idx = std::max(static_cast(end_idx), max_idx); + // End of line, expect no more input. + if (*pos == '\n') break; + ++pos; + } + // String must have a single newline at the very end. + if (*pos != '\n' || *(pos + 1) != '\0') return -1; + return max_idx; +} + +#endif + +// WARNING: logging calls back to InitializeSystemInfo() so it must +// not invoke any logging code. Also, InitializeSystemInfo() can be +// called before main() -- in fact it *must* be since already_called +// isn't protected -- before malloc hooks are properly set up, so +// we make an effort not to call any routines which might allocate +// memory. + +static void InitializeSystemInfo() { + static bool already_called = false; // safe if we run before threads + if (already_called) return; + already_called = true; + + bool saw_mhz = false; + + if (RunningOnValgrind()) { + // Valgrind may slow the progress of time artificially (--scale-time=N + // option). We thus can't rely on CPU Mhz info stored in /sys or /proc + // files. Thus, actually measure the cps. + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(100); + saw_mhz = true; + } + +#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__) + char line[1024]; + char* err; + int freq; + + // If the kernel is exporting the tsc frequency use that. There are issues + // where cpuinfo_max_freq cannot be relied on because the BIOS may be + // exporintg an invalid p-state (on x86) or p-states may be used to put the + // processor in a new mode (turbo mode). Essentially, those frequencies + // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as + // well. + if (!saw_mhz && + ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) { + // The value is in kHz (as the file name suggests). For example, on a + // 2GHz warpstation, the file contains the value "2000000". + cpuinfo_cycles_per_second = freq * 1000.0; + saw_mhz = true; + } + + // If CPU scaling is in effect, we want to use the *maximum* frequency, + // not whatever CPU speed some random processor happens to be using now. + if (!saw_mhz && + ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", + &freq)) { + // The value is in kHz. For example, on a 2GHz machine, the file + // contains the value "2000000". + cpuinfo_cycles_per_second = freq * 1000.0; + saw_mhz = true; + } + + // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq. + const char* pname = "/proc/cpuinfo"; + int fd; + RETRY_ON_EINTR(fd, open(pname, O_RDONLY)); + if (fd == -1) { + PLOG(FATAL) << "Unable to read CPU info from /proc. procfs must be mounted."; + } + + double bogo_clock = 1.0; + bool saw_bogo = false; + int num_cpus = 0; + line[0] = line[1] = '\0'; + int chars_read = 0; + do { // we'll exit when the last read didn't read anything + // Move the next line to the beginning of the buffer + const int oldlinelen = strlen(line); + if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line + line[0] = '\0'; + else // still other lines left to save + memmove(line, line + oldlinelen+1, sizeof(line) - (oldlinelen+1)); + // Terminate the new line, reading more if we can't find the newline + char* newline = strchr(line, '\n'); + if (newline == NULL) { + const int linelen = strlen(line); + const int bytes_to_read = sizeof(line)-1 - linelen; + CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes + RETRY_ON_EINTR(chars_read, read(fd, line + linelen, bytes_to_read)); + line[linelen + chars_read] = '\0'; + newline = strchr(line, '\n'); + } + if (newline != NULL) + *newline = '\0'; + +#if defined(__powerpc__) || defined(__ppc__) + // PowerPC cpus report the frequency in "clock" line + if (strncasecmp(line, "clock", sizeof("clock")-1) == 0) { + const char* freqstr = strchr(line, ':'); + if (freqstr) { + // PowerPC frequencies are only reported as MHz (check 'show_cpuinfo' + // function at arch/powerpc/kernel/setup-common.c) + char *endp = strstr(line, "MHz"); + if (endp) { + *endp = 0; + cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0; + if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) + saw_mhz = true; + } + } +#else + // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only + // accept postive values. Some environments (virtual machines) report zero, + // which would cause infinite looping in WallTime_Init. + if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) { + const char* freqstr = strchr(line, ':'); + if (freqstr) { + cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0; + if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) + saw_mhz = true; + } + } else if (strncasecmp(line, "bogomips", sizeof("bogomips")-1) == 0) { + const char* freqstr = strchr(line, ':'); + if (freqstr) { + bogo_clock = strtod(freqstr+1, &err) * 1000000.0; + if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0) + saw_bogo = true; + } +#endif + } else if (strncasecmp(line, "processor", sizeof("processor")-1) == 0) { + num_cpus++; // count up every time we see an "processor :" entry + } + } while (chars_read > 0); + int ret; + RETRY_ON_EINTR(ret, close(fd)); + if (PREDICT_FALSE(ret != 0)) { + PLOG(WARNING) << "Failed to close fd " << fd; + } + + if (!saw_mhz) { + if (saw_bogo) { + // If we didn't find anything better, we'll use bogomips, but + // we're not happy about it. + cpuinfo_cycles_per_second = bogo_clock; + } else { + // If we don't even have bogomips, we'll use the slow estimation. + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + } + } + if (cpuinfo_cycles_per_second == 0.0) { + cpuinfo_cycles_per_second = 1.0; // maybe unnecessary, but safe + } + if (num_cpus > 0) { + cpuinfo_num_cpus = num_cpus; + } + cpuinfo_max_cpu_index = ReadMaxCPUIndex(); + +#elif defined __FreeBSD__ + // For this sysctl to work, the machine must be configured without + // SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0 + // and later. Before that, it's a 32-bit quantity (and gives the + // wrong answer on machines faster than 2^32 Hz). See + // http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html + // But also compare FreeBSD 7.0: + // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223 + // 231 error = sysctl_handle_quad(oidp, &freq, 0, req); + // To FreeBSD 6.3 (it's the same in 6-STABLE): + // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131 + // 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); +#if __FreeBSD__ >= 7 + uint64_t hz = 0; +#else + unsigned int hz = 0; +#endif + size_t sz = sizeof(hz); + const char *sysctl_path = "machdep.tsc_freq"; + if ( sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0 ) { + fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", + sysctl_path, strerror(errno)); + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + } else { + cpuinfo_cycles_per_second = hz; + } + // TODO(csilvers): also figure out cpuinfo_num_cpus + +#elif defined(PLATFORM_WINDOWS) +# pragma comment(lib, "shlwapi.lib") // for SHGetValue() + // In NT, read MHz from the registry. If we fail to do so or we're in win9x + // then make a crude estimate. + OSVERSIONINFO os; + os.dwOSVersionInfoSize = sizeof(os); + DWORD data, data_size = sizeof(data); + if (GetVersionEx(&os) && + os.dwPlatformId == VER_PLATFORM_WIN32_NT && + SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE, + "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", + "~MHz", NULL, &data, &data_size))) + cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz + else + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500? + + // Get the number of processors. + SYSTEM_INFO info; + GetSystemInfo(&info); + cpuinfo_num_cpus = info.dwNumberOfProcessors; + +#elif defined(__MACH__) && defined(__APPLE__) + // returning "mach time units" per second. the current number of elapsed + // mach time units can be found by calling uint64 mach_absolute_time(); + // while not as precise as actual CPU cycles, it is accurate in the face + // of CPU frequency scaling and multi-cpu/core machines. + // Our mac users have these types of machines, and accuracy + // (i.e. correctness) trumps precision. + // See cycleclock.h: CycleClock::Now(), which returns number of mach time + // units on Mac OS X. + mach_timebase_info_data_t timebase_info; + mach_timebase_info(&timebase_info); + double mach_time_units_per_nanosecond = + static_cast(timebase_info.denom) / + static_cast(timebase_info.numer); + cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9; + + int num_cpus = 0; + size_t size = sizeof(num_cpus); + int numcpus_name[] = { CTL_HW, HW_NCPU }; + if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, nullptr, 0) + == 0 + && (size == sizeof(num_cpus))) + cpuinfo_num_cpus = num_cpus; + +#else + // Generic cycles per second counter + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); +#endif + + // On platforms where we can't determine the max CPU index, just use the + // number of CPUs. This might break if CPUs are taken offline, but + // better than a wild guess. + if (cpuinfo_max_cpu_index < 0) { + cpuinfo_max_cpu_index = cpuinfo_num_cpus - 1; + } +} + +double CyclesPerSecond(void) { + InitializeSystemInfo(); + return cpuinfo_cycles_per_second; +} + +int NumCPUs(void) { + InitializeSystemInfo(); + return cpuinfo_num_cpus; +} + +int MaxCPUIndex(void) { + InitializeSystemInfo(); + return cpuinfo_max_cpu_index; +} + +} // namespace base diff --git a/be/src/gutil/sysinfo.h b/be/src/gutil/sysinfo.h new file mode 100644 index 0000000000..d46cfe5504 --- /dev/null +++ b/be/src/gutil/sysinfo.h @@ -0,0 +1,69 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef _SYSINFO_H_ +#define _SYSINFO_H_ + +#include + +namespace base { + +// Return the number of online CPUs. This is computed and cached the first time this or +// NumCPUs() is called, so does not reflect any CPUs enabled or disabled at a later +// point in time. +// +// Note that, if not all CPUs are online, this may return a value lower than the maximum +// value of sched_getcpu(). +extern int NumCPUs(); + +// Return the maximum CPU index that may be returned by sched_getcpu(). For example, on +// an 8-core machine, this will return '7' even if some of the CPUs have been disabled. +extern int MaxCPUIndex(); + +void SleepForNanoseconds(int64_t nanoseconds); +void SleepForMilliseconds(int64_t milliseconds); + +// processor cycles per second of each processor. Thread-safe. +extern double CyclesPerSecond(void); + +// Parse the maximum CPU index from 'str'. The list is in the format of the CPU lists +// under /sys/devices/system/cpu/, e.g. /sys/devices/system/cpu/present. Returns the +// index of the max CPU or -1 if the string could not be parsed. +// Examples of the format and the expected output include: +// * "0\n" -> 0 +// * "0-8\n" -> 8 +// * "0-15,32-47\n" -> 47 +// * "2,4-127,128-143\n" -> 143 +// Ref: https://www.kernel.org/doc/Documentation/cputopology.txt +// Exposed for testing. +extern int ParseMaxCpuIndex(const char* str); + +} // namespace base +#endif /* #ifndef _SYSINFO_H_ */ diff --git a/be/src/gutil/threading/thread_collision_warner.cc b/be/src/gutil/threading/thread_collision_warner.cc new file mode 100644 index 0000000000..26329cfb9c --- /dev/null +++ b/be/src/gutil/threading/thread_collision_warner.cc @@ -0,0 +1,93 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gutil/threading/thread_collision_warner.h" + +#ifdef __linux__ +#include +#else +#include +#endif + +#include + +#include +#include + +#include + +namespace base { + +void DCheckAsserter::warn(int64_t previous_thread_id, int64_t current_thread_id) { + LOG(FATAL) << "Thread Collision! Previous thread id: " << previous_thread_id + << ", current thread id: " << current_thread_id; +} + +#if 0 +// Original source from Chromium -- we didn't import their threading library +// into Kudu source as of yet + +static subtle::Atomic32 CurrentThread() { + const PlatformThreadId current_thread_id = PlatformThread::CurrentId(); + // We need to get the thread id into an atomic data type. This might be a + // truncating conversion, but any loss-of-information just increases the + // chance of a fault negative, not a false positive. + const subtle::Atomic32 atomic_thread_id = + static_cast(current_thread_id); + + return atomic_thread_id; +} +#else + +static subtle::Atomic64 CurrentThread() { +#if defined(__APPLE__) + uint64_t tid; + CHECK_EQ(0, pthread_threadid_np(NULL, &tid)); + return tid; +#elif defined(__linux__) + return syscall(__NR_gettid); +#endif +} + +#endif + +void ThreadCollisionWarner::EnterSelf() { + // If the active thread is 0 then I'll write the current thread ID + // if two or more threads arrive here only one will succeed to + // write on valid_thread_id_ the current thread ID. + subtle::Atomic64 current_thread_id = CurrentThread(); + + int64_t previous_thread_id = subtle::NoBarrier_CompareAndSwap(&valid_thread_id_, + 0, + current_thread_id); + if (previous_thread_id != 0 && previous_thread_id != current_thread_id) { + // gotcha! a thread is trying to use the same class and that is + // not current thread. + asserter_->warn(previous_thread_id, current_thread_id); + } + + subtle::NoBarrier_AtomicIncrement(&counter_, 1); +} + +void ThreadCollisionWarner::Enter() { + subtle::Atomic64 current_thread_id = CurrentThread(); + + int64_t previous_thread_id = subtle::NoBarrier_CompareAndSwap(&valid_thread_id_, + 0, + current_thread_id); + if (previous_thread_id != 0) { + // gotcha! another thread is trying to use the same class. + asserter_->warn(previous_thread_id, current_thread_id); + } + + subtle::NoBarrier_AtomicIncrement(&counter_, 1); +} + +void ThreadCollisionWarner::Leave() { + if (subtle::Barrier_AtomicIncrement(&counter_, -1) == 0) { + subtle::NoBarrier_Store(&valid_thread_id_, 0); + } +} + +} // namespace base diff --git a/be/src/gutil/threading/thread_collision_warner.h b/be/src/gutil/threading/thread_collision_warner.h new file mode 100644 index 0000000000..5d9a9ac692 --- /dev/null +++ b/be/src/gutil/threading/thread_collision_warner.h @@ -0,0 +1,247 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_THREADING_THREAD_COLLISION_WARNER_H_ +#define BASE_THREADING_THREAD_COLLISION_WARNER_H_ + +#include + +#include "gutil/atomicops.h" +#include "gutil/macros.h" + +#ifndef BASE_EXPORT +#define BASE_EXPORT +#endif + +// A helper class alongside macros to be used to verify assumptions about thread +// safety of a class. +// +// Example: Queue implementation non thread-safe but still usable if clients +// are synchronized somehow. +// +// In this case the macro DFAKE_SCOPED_LOCK has to be +// used, it checks that if a thread is inside the push/pop then +// noone else is still inside the pop/push +// +// class NonThreadSafeQueue { +// public: +// ... +// void push(int) { DFAKE_SCOPED_LOCK(push_pop_); ... } +// int pop() { DFAKE_SCOPED_LOCK(push_pop_); ... } +// ... +// private: +// DFAKE_MUTEX(push_pop_); +// }; +// +// +// Example: Queue implementation non thread-safe but still usable if clients +// are synchronized somehow, it calls a method to "protect" from +// a "protected" method +// +// In this case the macro DFAKE_SCOPED_RECURSIVE_LOCK +// has to be used, it checks that if a thread is inside the push/pop +// then noone else is still inside the pop/push +// +// class NonThreadSafeQueue { +// public: +// void push(int) { +// DFAKE_SCOPED_LOCK(push_pop_); +// ... +// } +// int pop() { +// DFAKE_SCOPED_RECURSIVE_LOCK(push_pop_); +// bar(); +// ... +// } +// void bar() { DFAKE_SCOPED_RECURSIVE_LOCK(push_pop_); ... } +// ... +// private: +// DFAKE_MUTEX(push_pop_); +// }; +// +// +// Example: Queue implementation not usable even if clients are synchronized, +// so only one thread in the class life cycle can use the two members +// push/pop. +// +// In this case the macro DFAKE_SCOPED_LOCK_THREAD_LOCKED pins the +// specified +// critical section the first time a thread enters push or pop, from +// that time on only that thread is allowed to execute push or pop. +// +// class NonThreadSafeQueue { +// public: +// ... +// void push(int) { DFAKE_SCOPED_LOCK_THREAD_LOCKED(push_pop_); ... } +// int pop() { DFAKE_SCOPED_LOCK_THREAD_LOCKED(push_pop_); ... } +// ... +// private: +// DFAKE_MUTEX(push_pop_); +// }; +// +// +// Example: Class that has to be contructed/destroyed on same thread, it has +// a "shareable" method (with external synchronization) and a not +// shareable method (even with external synchronization). +// +// In this case 3 Critical sections have to be defined +// +// class ExoticClass { +// public: +// ExoticClass() { DFAKE_SCOPED_LOCK_THREAD_LOCKED(ctor_dtor_); ... } +// ~ExoticClass() { DFAKE_SCOPED_LOCK_THREAD_LOCKED(ctor_dtor_); ... } +// +// void Shareable() { DFAKE_SCOPED_LOCK(shareable_section_); ... } +// void NotShareable() { DFAKE_SCOPED_LOCK_THREAD_LOCKED(ctor_dtor_); ... } +// ... +// private: +// DFAKE_MUTEX(ctor_dtor_); +// DFAKE_MUTEX(shareable_section_); +// }; + + +#if !defined(NDEBUG) + +// Defines a class member that acts like a mutex. It is used only as a +// verification tool. +#define DFAKE_MUTEX(obj) \ + mutable base::ThreadCollisionWarner obj +// Asserts the call is never called simultaneously in two threads. Used at +// member function scope. +#define DFAKE_SCOPED_LOCK(obj) \ + base::ThreadCollisionWarner::ScopedCheck s_check_##obj(&obj) +// Asserts the call is never called simultaneously in two threads. Used at +// member function scope. Same as DFAKE_SCOPED_LOCK but allows recursive locks. +#define DFAKE_SCOPED_RECURSIVE_LOCK(obj) \ + base::ThreadCollisionWarner::ScopedRecursiveCheck sr_check_##obj(&obj) +// Asserts the code is always executed in the same thread. +#define DFAKE_SCOPED_LOCK_THREAD_LOCKED(obj) \ + base::ThreadCollisionWarner::Check check_##obj(&obj) + +#else + +#define DFAKE_MUTEX(obj) typedef void InternalFakeMutexType##obj +#define DFAKE_SCOPED_LOCK(obj) ((void)0) +#define DFAKE_SCOPED_RECURSIVE_LOCK(obj) ((void)0) +#define DFAKE_SCOPED_LOCK_THREAD_LOCKED(obj) ((void)0) + +#endif + +namespace base { + +// The class ThreadCollisionWarner uses an Asserter to notify the collision +// AsserterBase is the interfaces and DCheckAsserter is the default asserter +// used. During the unit tests is used another class that doesn't "DCHECK" +// in case of collision (check thread_collision_warner_unittests.cc) +struct BASE_EXPORT AsserterBase { + virtual ~AsserterBase() {} + virtual void warn(int64_t previous_thread_id, int64_t current_thread_id) = 0; +}; + +struct BASE_EXPORT DCheckAsserter : public AsserterBase { + virtual ~DCheckAsserter() {} + void warn(int64_t previous_thread_id, int64_t current_thread_id) override; +}; + +class BASE_EXPORT ThreadCollisionWarner { + public: + // The parameter asserter is there only for test purpose + explicit ThreadCollisionWarner(AsserterBase* asserter = new DCheckAsserter()) + : valid_thread_id_(0), + counter_(0), + asserter_(asserter) {} + + ~ThreadCollisionWarner() { + delete asserter_; + } + + // This class is meant to be used through the macro + // DFAKE_SCOPED_LOCK_THREAD_LOCKED + // it doesn't leave the critical section, as opposed to ScopedCheck, + // because the critical section being pinned is allowed to be used only + // from one thread + class BASE_EXPORT Check { + public: + explicit Check(ThreadCollisionWarner* warner) + : warner_(warner) { + warner_->EnterSelf(); + } + + ~Check() {} + + private: + ThreadCollisionWarner* warner_; + + DISALLOW_COPY_AND_ASSIGN(Check); + }; + + // This class is meant to be used through the macro + // DFAKE_SCOPED_LOCK + class BASE_EXPORT ScopedCheck { + public: + explicit ScopedCheck(ThreadCollisionWarner* warner) + : warner_(warner) { + warner_->Enter(); + } + + ~ScopedCheck() { + warner_->Leave(); + } + + private: + ThreadCollisionWarner* warner_; + + DISALLOW_COPY_AND_ASSIGN(ScopedCheck); + }; + + // This class is meant to be used through the macro + // DFAKE_SCOPED_RECURSIVE_LOCK + class BASE_EXPORT ScopedRecursiveCheck { + public: + explicit ScopedRecursiveCheck(ThreadCollisionWarner* warner) + : warner_(warner) { + warner_->EnterSelf(); + } + + ~ScopedRecursiveCheck() { + warner_->Leave(); + } + + private: + ThreadCollisionWarner* warner_; + + DISALLOW_COPY_AND_ASSIGN(ScopedRecursiveCheck); + }; + + private: + // This method stores the current thread identifier and does a DCHECK + // if a another thread has already done it, it is safe if same thread + // calls this multiple time (recursion allowed). + void EnterSelf(); + + // Same as EnterSelf but recursion is not allowed. + void Enter(); + + // Removes the thread_id stored in order to allow other threads to + // call EnterSelf or Enter. + void Leave(); + + // This stores the thread id that is inside the critical section, if the + // value is 0 then no thread is inside. + volatile subtle::Atomic64 valid_thread_id_; + + // Counter to trace how many time a critical section was "pinned" + // (when allowed) in order to unpin it when counter_ reaches 0. + volatile subtle::Atomic64 counter_; + + // Here only for class unit tests purpose, during the test I need to not + // DCHECK but notify the collision with something else. + AsserterBase* asserter_; + + DISALLOW_COPY_AND_ASSIGN(ThreadCollisionWarner); +}; + +} // namespace base + +#endif // BASE_THREADING_THREAD_COLLISION_WARNER_H_ diff --git a/be/src/gutil/walltime.h b/be/src/gutil/walltime.h new file mode 100644 index 0000000000..337535f449 --- /dev/null +++ b/be/src/gutil/walltime.h @@ -0,0 +1,216 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifndef GUTIL_WALLTIME_H_ +#define GUTIL_WALLTIME_H_ + +#include + +#include +#include + +#if defined(__APPLE__) +#include +#include +#include + +#include + +#include "gutil/once.h" +#endif // #if defined(__APPLE__) + +#include "gutil/integral_types.h" + +typedef double WallTime; + +// Append result to a supplied string. +// If an error occurs during conversion 'dst' is not modified. +void StringAppendStrftime(std::string* dst, + const char* format, + time_t when, + bool local); + +// Return the given timestamp (in seconds since the epoch) as a string suitable +// for user display in the current timezone. +std::string TimestampAsString(time_t timestamp_secs); + +// Return the local time as a string suitable for user display. +std::string LocalTimeAsString(); + +// Similar to the WallTime_Parse, but it takes a boolean flag local as +// argument specifying if the time_spec is in local time or UTC +// time. If local is set to true, the same exact result as +// WallTime_Parse is returned. +bool WallTime_Parse_Timezone(const char* time_spec, + const char* format, + const struct tm* default_time, + bool local, + WallTime* result); + +// Return current time in seconds as a WallTime. +WallTime WallTime_Now(); + +typedef int64 MicrosecondsInt64; + +namespace walltime_internal { + +#if defined(__APPLE__) + +extern GoogleOnceType timebase_info_once; +extern mach_timebase_info_data_t timebase_info; +extern void InitializeTimebaseInfo(); + +inline void GetCurrentTime(mach_timespec_t* ts) { + clock_serv_t cclock; + host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); + CHECK_EQ(KERN_SUCCESS, clock_get_time(cclock, ts)); + mach_port_deallocate(mach_task_self(), cclock); +} + +inline MicrosecondsInt64 GetCurrentTimeMicros() { + mach_timespec_t ts; + GetCurrentTime(&ts); + // 'tv_sec' is just 4 bytes on macOS, need to be careful not + // to convert to nanos until we've moved to a larger int. + MicrosecondsInt64 micros_from_secs = ts.tv_sec; + micros_from_secs *= 1000 * 1000; + micros_from_secs += ts.tv_nsec / 1000; + return micros_from_secs; +} + +inline int64_t GetMonoTimeNanos() { + // See Apple Technical Q&A QA1398 for further detail on mono time in OS X. + GoogleOnceInit(&timebase_info_once, &InitializeTimebaseInfo); + + uint64_t time = mach_absolute_time(); + + // mach_absolute_time returns ticks, which need to be scaled by the timebase + // info to get nanoseconds. + return time * timebase_info.numer / timebase_info.denom; +} + +inline MicrosecondsInt64 GetMonoTimeMicros() { + return GetMonoTimeNanos() / 1000; +} + +inline MicrosecondsInt64 GetThreadCpuTimeMicros() { + // See https://www.gnu.org/software/hurd/gnumach-doc/Thread-Information.html + // and Chromium base/time/time_mac.cc. + task_t thread = mach_thread_self(); + if (thread == MACH_PORT_NULL) { + LOG(WARNING) << "Failed to get mach_thread_self()"; + return 0; + } + + mach_msg_type_number_t thread_info_count = THREAD_BASIC_INFO_COUNT; + thread_basic_info_data_t thread_info_data; + + kern_return_t result = thread_info( + thread, + THREAD_BASIC_INFO, + reinterpret_cast(&thread_info_data), + &thread_info_count); + + if (result != KERN_SUCCESS) { + LOG(WARNING) << "Failed to get thread_info()"; + return 0; + } + + return thread_info_data.user_time.seconds * 1000000 + thread_info_data.user_time.microseconds; +} + +#else + +inline MicrosecondsInt64 GetClockTimeMicros(clockid_t clock) { + timespec ts; + clock_gettime(clock, &ts); + // 'tv_sec' is usually 8 bytes, but the spec says it only + // needs to be 'a signed int'. Moved to a 64 bit var before + // converting to micros to be safe. + MicrosecondsInt64 micros_from_secs = ts.tv_sec; + micros_from_secs *= 1000 * 1000; + micros_from_secs += ts.tv_nsec / 1000; + return micros_from_secs; +} + +#endif // defined(__APPLE__) + +} // namespace walltime_internal + +// Returns the time since the Epoch measured in microseconds. +inline MicrosecondsInt64 GetCurrentTimeMicros() { +#if defined(__APPLE__) + return walltime_internal::GetCurrentTimeMicros(); +#else + return walltime_internal::GetClockTimeMicros(CLOCK_REALTIME); +#endif // defined(__APPLE__) +} + +// Returns the time since some arbitrary reference point, measured in microseconds. +// Guaranteed to be monotonic (and therefore useful for measuring intervals), +// but the underlying clock is subject for adjustment by adjtime() and +// the kernel's NTP discipline. For example, the underlying clock might +// be slewed a bit to reach some reference point, time to time adjusted to be +// of the desired result frequency, etc. +inline MicrosecondsInt64 GetMonoTimeMicros() { +#if defined(__APPLE__) + // In fact, walltime_internal::GetMonoTimeMicros() is implemented via + // mach_absolute_time() which is not actually affected by adjtime() + // or the NTP discipline. On Darwin 16.0 and newer (macOS 10.12 and newer), + // it's the same as clock_gettime(CLOCK_UPTIME_RAW); see 'man clock_gettime' + // on macOS 10.12 and newer. + return walltime_internal::GetMonoTimeMicros(); +#else + return walltime_internal::GetClockTimeMicros(CLOCK_MONOTONIC); +#endif // defined(__APPLE__) +} + +// Returns the time since some arbitrary reference point, measured in microseconds. +// Guaranteed to be monotonic and not affected at all by frequency and time +// adjustments such as adjtime() or the kernel's NTP discipline. +inline MicrosecondsInt64 GetMonoTimeMicrosRaw() { +#if defined(__APPLE__) + return walltime_internal::GetMonoTimeMicros(); +#else + return walltime_internal::GetClockTimeMicros(CLOCK_MONOTONIC_RAW); +#endif // defined(__APPLE__) +} + +// Returns the time spent in user CPU on the current thread, measured in microseconds. +inline MicrosecondsInt64 GetThreadCpuTimeMicros() { +#if defined(__APPLE__) + return walltime_internal::GetThreadCpuTimeMicros(); +#else + return walltime_internal::GetClockTimeMicros(CLOCK_THREAD_CPUTIME_ID); +#endif // defined(__APPLE__) +} + +// A CycleClock yields the value of a cycle counter that increments at a rate +// that is approximately constant. +class CycleClock { + public: + // Return the value of the counter. + static inline int64 Now(); + + private: + CycleClock(); +}; + +// inline method bodies +#include "gutil/cycleclock-inl.h" // IWYU pragma: export +#endif // GUTIL_WALLTIME_H_