Add faststring and cpu util (#1281)
This commit is contained in:
@ -45,7 +45,8 @@ add_library(Gutil STATIC
|
||||
strings/substitute.cc
|
||||
strings/util.cc
|
||||
strtoint.cc
|
||||
utf/rune.c)
|
||||
utf/rune.c
|
||||
cpu.cc)
|
||||
|
||||
set_target_properties(Gutil PROPERTIES COMPILE_FLAGS "-funsigned-char -Wno-deprecated -Wno-char-subscripts")
|
||||
# target_link_libraries(Gutil glog protobuf rt)
|
||||
|
||||
286
be/src/gutil/cpu.cc
Normal file
286
be/src/gutil/cpu.cc
Normal file
@ -0,0 +1,286 @@
|
||||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "gutil/cpu.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <utility>
|
||||
|
||||
#include "gutil/integral_types.h"
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#include <immintrin.h> // For _xgetbv()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace base {
|
||||
|
||||
CPU::CPU()
|
||||
: signature_(0),
|
||||
type_(0),
|
||||
family_(0),
|
||||
model_(0),
|
||||
stepping_(0),
|
||||
ext_model_(0),
|
||||
ext_family_(0),
|
||||
has_mmx_(false),
|
||||
has_sse_(false),
|
||||
has_sse2_(false),
|
||||
has_sse3_(false),
|
||||
has_ssse3_(false),
|
||||
has_sse41_(false),
|
||||
has_sse42_(false),
|
||||
has_avx_(false),
|
||||
has_avx2_(false),
|
||||
has_aesni_(false),
|
||||
has_non_stop_time_stamp_counter_(false),
|
||||
has_broken_neon_(false),
|
||||
cpu_vendor_("unknown") {
|
||||
Initialize();
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#ifndef _MSC_VER
|
||||
|
||||
#if defined(__pic__) && defined(__i386__)
|
||||
|
||||
void __cpuid(int cpu_info[4], int info_type) {
|
||||
__asm__ volatile (
|
||||
"mov %%ebx, %%edi\n"
|
||||
"cpuid\n"
|
||||
"xchg %%edi, %%ebx\n"
|
||||
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
||||
: "a"(info_type)
|
||||
);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void __cpuid(int cpu_info[4], int info_type) {
|
||||
__asm__ volatile (
|
||||
"cpuid\n"
|
||||
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
||||
: "a"(info_type), "c"(0)
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// _xgetbv returns the value of an Intel Extended Control Register (XCR).
|
||||
// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
|
||||
uint64 _xgetbv(uint32 xcr) {
|
||||
uint32 eax, edx;
|
||||
|
||||
__asm__ volatile (
|
||||
"xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
|
||||
return (static_cast<uint64>(edx) << 32) | eax;
|
||||
}
|
||||
|
||||
#endif // !_MSC_VER
|
||||
#endif // __x86_64__
|
||||
|
||||
#if defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX))
|
||||
class LazyCpuInfoValue {
|
||||
public:
|
||||
LazyCpuInfoValue() : has_broken_neon_(false) {
|
||||
// This function finds the value from /proc/cpuinfo under the key "model
|
||||
// name" or "Processor". "model name" is used in Linux 3.8 and later (3.7
|
||||
// and later for arm64) and is shown once per CPU. "Processor" is used in
|
||||
// earler versions and is shown only once at the top of /proc/cpuinfo
|
||||
// regardless of the number CPUs.
|
||||
const char kModelNamePrefix[] = "model name\t: ";
|
||||
const char kProcessorPrefix[] = "Processor\t: ";
|
||||
|
||||
// This function also calculates whether we believe that this CPU has a
|
||||
// broken NEON unit based on these fields from cpuinfo:
|
||||
unsigned implementer = 0, architecture = 0, variant = 0, part = 0,
|
||||
revision = 0;
|
||||
const struct {
|
||||
const char key[17];
|
||||
unsigned int* result;
|
||||
} kUnsignedValues[] = {
|
||||
{"CPU implementer", &implementer},
|
||||
{"CPU architecture", &architecture},
|
||||
{"CPU variant", &variant},
|
||||
{"CPU part", &part},
|
||||
{"CPU revision", &revision},
|
||||
};
|
||||
|
||||
std::string contents;
|
||||
ReadFileToString(FilePath("/proc/cpuinfo"), &contents);
|
||||
DCHECK(!contents.empty());
|
||||
if (contents.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::istringstream iss(contents);
|
||||
std::string line;
|
||||
while (std::getline(iss, line)) {
|
||||
if (brand_.empty() &&
|
||||
(line.compare(0, strlen(kModelNamePrefix), kModelNamePrefix) == 0 ||
|
||||
line.compare(0, strlen(kProcessorPrefix), kProcessorPrefix) == 0)) {
|
||||
brand_.assign(line.substr(strlen(kModelNamePrefix)));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < arraysize(kUnsignedValues); i++) {
|
||||
const char *key = kUnsignedValues[i].key;
|
||||
const size_t len = strlen(key);
|
||||
|
||||
if (line.compare(0, len, key) == 0 &&
|
||||
line.size() >= len + 1 &&
|
||||
(line[len] == '\t' || line[len] == ' ' || line[len] == ':')) {
|
||||
size_t colon_pos = line.find(':', len);
|
||||
if (colon_pos == std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const StringPiece line_sp(line);
|
||||
StringPiece value_sp = line_sp.substr(colon_pos + 1);
|
||||
while (!value_sp.empty() &&
|
||||
(value_sp[0] == ' ' || value_sp[0] == '\t')) {
|
||||
value_sp = value_sp.substr(1);
|
||||
}
|
||||
|
||||
// The string may have leading "0x" or not, so we use strtoul to
|
||||
// handle that.
|
||||
char* endptr;
|
||||
std::string value(value_sp.as_string());
|
||||
unsigned long int result = strtoul(value.c_str(), &endptr, 0);
|
||||
if (*endptr == 0 && result <= UINT_MAX) {
|
||||
*kUnsignedValues[i].result = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
has_broken_neon_ =
|
||||
implementer == 0x51 &&
|
||||
architecture == 7 &&
|
||||
variant == 1 &&
|
||||
part == 0x4d &&
|
||||
revision == 0;
|
||||
}
|
||||
|
||||
const std::string& brand() const { return brand_; }
|
||||
bool has_broken_neon() const { return has_broken_neon_; }
|
||||
|
||||
private:
|
||||
std::string brand_;
|
||||
bool has_broken_neon_;
|
||||
DISALLOW_COPY_AND_ASSIGN(LazyCpuInfoValue);
|
||||
};
|
||||
|
||||
base::LazyInstance<LazyCpuInfoValue>::Leaky g_lazy_cpuinfo =
|
||||
LAZY_INSTANCE_INITIALIZER;
|
||||
|
||||
#endif // defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) ||
|
||||
// defined(OS_LINUX))
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
void CPU::Initialize() {
|
||||
#if defined(__x86_64__)
|
||||
int cpu_info[4] = {-1};
|
||||
char cpu_string[48];
|
||||
|
||||
// __cpuid with an InfoType argument of 0 returns the number of
|
||||
// valid Ids in CPUInfo[0] and the CPU identification string in
|
||||
// the other three array elements. The CPU identification string is
|
||||
// not in linear order. The code below arranges the information
|
||||
// in a human readable form. The human readable order is CPUInfo[1] |
|
||||
// CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped
|
||||
// before using memcpy to copy these three array elements to cpu_string.
|
||||
__cpuid(cpu_info, 0);
|
||||
int num_ids = cpu_info[0];
|
||||
std::swap(cpu_info[2], cpu_info[3]);
|
||||
memcpy(cpu_string, &cpu_info[1], 3 * sizeof(cpu_info[1]));
|
||||
cpu_vendor_.assign(cpu_string, 3 * sizeof(cpu_info[1]));
|
||||
|
||||
// Interpret CPU feature information.
|
||||
if (num_ids > 0) {
|
||||
int cpu_info7[4] = {0};
|
||||
__cpuid(cpu_info, 1);
|
||||
if (num_ids >= 7) {
|
||||
__cpuid(cpu_info7, 7);
|
||||
}
|
||||
signature_ = cpu_info[0];
|
||||
stepping_ = cpu_info[0] & 0xf;
|
||||
model_ = ((cpu_info[0] >> 4) & 0xf) + ((cpu_info[0] >> 12) & 0xf0);
|
||||
family_ = (cpu_info[0] >> 8) & 0xf;
|
||||
type_ = (cpu_info[0] >> 12) & 0x3;
|
||||
ext_model_ = (cpu_info[0] >> 16) & 0xf;
|
||||
ext_family_ = (cpu_info[0] >> 20) & 0xff;
|
||||
has_mmx_ = (cpu_info[3] & 0x00800000) != 0;
|
||||
has_sse_ = (cpu_info[3] & 0x02000000) != 0;
|
||||
has_sse2_ = (cpu_info[3] & 0x04000000) != 0;
|
||||
has_sse3_ = (cpu_info[2] & 0x00000001) != 0;
|
||||
has_ssse3_ = (cpu_info[2] & 0x00000200) != 0;
|
||||
has_sse41_ = (cpu_info[2] & 0x00080000) != 0;
|
||||
has_sse42_ = (cpu_info[2] & 0x00100000) != 0;
|
||||
// AVX instructions will generate an illegal instruction exception unless
|
||||
// a) they are supported by the CPU,
|
||||
// b) XSAVE is supported by the CPU and
|
||||
// c) XSAVE is enabled by the kernel.
|
||||
// See http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
|
||||
//
|
||||
// In addition, we have observed some crashes with the xgetbv instruction
|
||||
// even after following Intel's example code. (See crbug.com/375968.)
|
||||
// Because of that, we also test the XSAVE bit because its description in
|
||||
// the CPUID documentation suggests that it signals xgetbv support.
|
||||
has_avx_ =
|
||||
(cpu_info[2] & 0x10000000) != 0 &&
|
||||
(cpu_info[2] & 0x04000000) != 0 /* XSAVE */ &&
|
||||
(cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ &&
|
||||
(_xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */;
|
||||
has_aesni_ = (cpu_info[2] & 0x02000000) != 0;
|
||||
has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0;
|
||||
}
|
||||
|
||||
// Get the brand string of the cpu.
|
||||
__cpuid(cpu_info, 0x80000000);
|
||||
const int parameter_end = 0x80000004;
|
||||
int max_parameter = cpu_info[0];
|
||||
|
||||
if (cpu_info[0] >= parameter_end) {
|
||||
char* cpu_string_ptr = cpu_string;
|
||||
|
||||
for (int parameter = 0x80000002; parameter <= parameter_end &&
|
||||
cpu_string_ptr < &cpu_string[sizeof(cpu_string)]; parameter++) {
|
||||
__cpuid(cpu_info, parameter);
|
||||
memcpy(cpu_string_ptr, cpu_info, sizeof(cpu_info));
|
||||
cpu_string_ptr += sizeof(cpu_info);
|
||||
}
|
||||
cpu_brand_.assign(cpu_string, cpu_string_ptr - cpu_string);
|
||||
}
|
||||
|
||||
const int parameter_containing_non_stop_time_stamp_counter = 0x80000007;
|
||||
if (max_parameter >= parameter_containing_non_stop_time_stamp_counter) {
|
||||
__cpuid(cpu_info, parameter_containing_non_stop_time_stamp_counter);
|
||||
has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0;
|
||||
}
|
||||
#elif defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX))
|
||||
cpu_brand_.assign(g_lazy_cpuinfo.Get().brand());
|
||||
has_broken_neon_ = g_lazy_cpuinfo.Get().has_broken_neon();
|
||||
#else
|
||||
#error unknown architecture
|
||||
#endif
|
||||
}
|
||||
|
||||
CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const {
|
||||
if (has_avx2()) return AVX2;
|
||||
if (has_avx()) return AVX;
|
||||
if (has_sse42()) return SSE42;
|
||||
if (has_sse41()) return SSE41;
|
||||
if (has_ssse3()) return SSSE3;
|
||||
if (has_sse3()) return SSE3;
|
||||
if (has_sse2()) return SSE2;
|
||||
if (has_sse()) return SSE;
|
||||
return PENTIUM;
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
90
be/src/gutil/cpu.h
Normal file
90
be/src/gutil/cpu.h
Normal file
@ -0,0 +1,90 @@
|
||||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_CPU_H_
|
||||
#define BASE_CPU_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace base {
|
||||
|
||||
// Query information about the processor.
|
||||
class CPU {
|
||||
public:
|
||||
// Constructor
|
||||
CPU();
|
||||
|
||||
enum IntelMicroArchitecture {
|
||||
PENTIUM,
|
||||
SSE,
|
||||
SSE2,
|
||||
SSE3,
|
||||
SSSE3,
|
||||
SSE41,
|
||||
SSE42,
|
||||
AVX,
|
||||
AVX2,
|
||||
MAX_INTEL_MICRO_ARCHITECTURE
|
||||
};
|
||||
|
||||
// Accessors for CPU information.
|
||||
const std::string& vendor_name() const { return cpu_vendor_; }
|
||||
int signature() const { return signature_; }
|
||||
int stepping() const { return stepping_; }
|
||||
int model() const { return model_; }
|
||||
int family() const { return family_; }
|
||||
int type() const { return type_; }
|
||||
int extended_model() const { return ext_model_; }
|
||||
int extended_family() const { return ext_family_; }
|
||||
bool has_mmx() const { return has_mmx_; }
|
||||
bool has_sse() const { return has_sse_; }
|
||||
bool has_sse2() const { return has_sse2_; }
|
||||
bool has_sse3() const { return has_sse3_; }
|
||||
bool has_ssse3() const { return has_ssse3_; }
|
||||
bool has_sse41() const { return has_sse41_; }
|
||||
bool has_sse42() const { return has_sse42_; }
|
||||
bool has_avx() const { return has_avx_; }
|
||||
bool has_avx2() const { return has_avx2_; }
|
||||
bool has_aesni() const { return has_aesni_; }
|
||||
bool has_non_stop_time_stamp_counter() const {
|
||||
return has_non_stop_time_stamp_counter_;
|
||||
}
|
||||
// has_broken_neon is only valid on ARM chips. If true, it indicates that we
|
||||
// believe that the NEON unit on the current CPU is flawed and cannot execute
|
||||
// some code. See https://code.google.com/p/chromium/issues/detail?id=341598
|
||||
bool has_broken_neon() const { return has_broken_neon_; }
|
||||
|
||||
IntelMicroArchitecture GetIntelMicroArchitecture() const;
|
||||
const std::string& cpu_brand() const { return cpu_brand_; }
|
||||
|
||||
private:
|
||||
// Query the processor for CPUID information.
|
||||
void Initialize();
|
||||
|
||||
int signature_; // raw form of type, family, model, and stepping
|
||||
int type_; // process type
|
||||
int family_; // family of the processor
|
||||
int model_; // model of processor
|
||||
int stepping_; // processor revision number
|
||||
int ext_model_;
|
||||
int ext_family_;
|
||||
bool has_mmx_;
|
||||
bool has_sse_;
|
||||
bool has_sse2_;
|
||||
bool has_sse3_;
|
||||
bool has_ssse3_;
|
||||
bool has_sse41_;
|
||||
bool has_sse42_;
|
||||
bool has_avx_;
|
||||
bool has_avx2_;
|
||||
bool has_aesni_;
|
||||
bool has_non_stop_time_stamp_counter_;
|
||||
bool has_broken_neon_;
|
||||
std::string cpu_vendor_;
|
||||
std::string cpu_brand_;
|
||||
};
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_CPU_H_
|
||||
@ -75,6 +75,7 @@ set(UTIL_FILES
|
||||
string_util.cpp
|
||||
md5.cpp
|
||||
frontend_helper.cpp
|
||||
faststring.cc
|
||||
)
|
||||
|
||||
if (WITH_MYSQL)
|
||||
|
||||
72
be/src/util/faststring.cc
Normal file
72
be/src/util/faststring.cc
Normal file
@ -0,0 +1,72 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "util/faststring.h"
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <memory>
|
||||
|
||||
namespace kudu {
|
||||
|
||||
void faststring::GrowByAtLeast(size_t count) {
|
||||
// Not enough space, need to reserve more.
|
||||
// Don't reserve exactly enough space for the new string -- that makes it
|
||||
// too easy to write perf bugs where you get O(n^2) append.
|
||||
// Instead, alwayhs expand by at least 50%.
|
||||
|
||||
size_t to_reserve = len_ + count;
|
||||
if (len_ + count < len_ * 3 / 2) {
|
||||
to_reserve = len_ * 3 / 2;
|
||||
}
|
||||
GrowArray(to_reserve);
|
||||
}
|
||||
|
||||
void faststring::GrowArray(size_t newcapacity) {
|
||||
DCHECK_GE(newcapacity, capacity_);
|
||||
std::unique_ptr<uint8_t[]> newdata(new uint8_t[newcapacity]);
|
||||
if (len_ > 0) {
|
||||
memcpy(&newdata[0], &data_[0], len_);
|
||||
}
|
||||
capacity_ = newcapacity;
|
||||
if (data_ != initial_data_) {
|
||||
delete[] data_;
|
||||
} else {
|
||||
ASAN_POISON_MEMORY_REGION(initial_data_, arraysize(initial_data_));
|
||||
}
|
||||
|
||||
data_ = newdata.release();
|
||||
ASAN_POISON_MEMORY_REGION(data_ + len_, capacity_ - len_);
|
||||
}
|
||||
|
||||
void faststring::ShrinkToFitInternal() {
|
||||
DCHECK_NE(data_, initial_data_);
|
||||
if (len_ <= kInitialCapacity) {
|
||||
ASAN_UNPOISON_MEMORY_REGION(initial_data_, len_);
|
||||
memcpy(initial_data_, &data_[0], len_);
|
||||
delete[] data_;
|
||||
data_ = initial_data_;
|
||||
capacity_ = kInitialCapacity;
|
||||
} else {
|
||||
std::unique_ptr<uint8_t[]> newdata(new uint8_t[len_]);
|
||||
memcpy(&newdata[0], &data_[0], len_);
|
||||
delete[] data_;
|
||||
data_ = newdata.release();
|
||||
capacity_ = len_;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace kudu
|
||||
257
be/src/util/faststring.h
Normal file
257
be/src/util/faststring.h
Normal file
@ -0,0 +1,257 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
|
||||
#include "gutil/dynamic_annotations.h"
|
||||
#include "gutil/macros.h"
|
||||
#include "gutil/port.h"
|
||||
#include "gutil/strings/fastmem.h"
|
||||
|
||||
namespace kudu {
|
||||
|
||||
// A faststring is similar to a std::string, except that it is faster for many
|
||||
// common use cases (in particular, resize() will fill with uninitialized data
|
||||
// instead of memsetting to \0)
|
||||
class faststring {
|
||||
public:
|
||||
enum {
|
||||
kInitialCapacity = 32
|
||||
};
|
||||
|
||||
faststring() :
|
||||
data_(initial_data_),
|
||||
len_(0),
|
||||
capacity_(kInitialCapacity) {
|
||||
}
|
||||
|
||||
// Construct a string with the given capacity, in bytes.
|
||||
explicit faststring(size_t capacity)
|
||||
: data_(initial_data_),
|
||||
len_(0),
|
||||
capacity_(kInitialCapacity) {
|
||||
if (capacity > capacity_) {
|
||||
data_ = new uint8_t[capacity];
|
||||
capacity_ = capacity;
|
||||
}
|
||||
ASAN_POISON_MEMORY_REGION(data_, capacity_);
|
||||
}
|
||||
|
||||
~faststring() {
|
||||
ASAN_UNPOISON_MEMORY_REGION(initial_data_, arraysize(initial_data_));
|
||||
if (data_ != initial_data_) {
|
||||
delete[] data_;
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the valid length of the string to 0.
|
||||
//
|
||||
// This does not free up any memory. The capacity of the string remains unchanged.
|
||||
void clear() {
|
||||
resize(0);
|
||||
ASAN_POISON_MEMORY_REGION(data_, capacity_);
|
||||
}
|
||||
|
||||
// Resize the string to the given length.
|
||||
// If the new length is larger than the old length, the capacity is expanded as necessary.
|
||||
//
|
||||
// NOTE: in contrast to std::string's implementation, Any newly "exposed" bytes of data are
|
||||
// not cleared.
|
||||
void resize(size_t newsize) {
|
||||
if (newsize > capacity_) {
|
||||
reserve(newsize);
|
||||
}
|
||||
len_ = newsize;
|
||||
ASAN_POISON_MEMORY_REGION(data_ + len_, capacity_ - len_);
|
||||
ASAN_UNPOISON_MEMORY_REGION(data_, len_);
|
||||
}
|
||||
|
||||
// Releases the underlying array; after this, the buffer is left empty.
|
||||
//
|
||||
// NOTE: the data pointer returned by release() is not necessarily the pointer
|
||||
uint8_t *release() WARN_UNUSED_RESULT {
|
||||
uint8_t *ret = data_;
|
||||
if (ret == initial_data_) {
|
||||
ret = new uint8_t[len_];
|
||||
memcpy(ret, data_, len_);
|
||||
}
|
||||
len_ = 0;
|
||||
capacity_ = kInitialCapacity;
|
||||
data_ = initial_data_;
|
||||
ASAN_POISON_MEMORY_REGION(data_, capacity_);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Reserve space for the given total amount of data. If the current capacity is already
|
||||
// larger than the newly requested capacity, this is a no-op (i.e. it does not ever free memory).
|
||||
//
|
||||
// NOTE: even though the new capacity is reserved, it is illegal to begin writing into that memory
|
||||
// directly using pointers. If ASAN is enabled, this is ensured using manual memory poisoning.
|
||||
void reserve(size_t newcapacity) {
|
||||
if (PREDICT_TRUE(newcapacity <= capacity_)) return;
|
||||
GrowArray(newcapacity);
|
||||
}
|
||||
|
||||
// Append the given data to the string, resizing capacity as necessary.
|
||||
void append(const void *src_v, size_t count) {
|
||||
const uint8_t *src = reinterpret_cast<const uint8_t *>(src_v);
|
||||
EnsureRoomForAppend(count);
|
||||
ASAN_UNPOISON_MEMORY_REGION(data_ + len_, count);
|
||||
|
||||
// appending short values is common enough that this
|
||||
// actually helps, according to benchmarks. In theory
|
||||
// memcpy_inlined should already be just as good, but this
|
||||
// was ~20% faster for reading a large prefix-coded string file
|
||||
// where each string was only a few chars different
|
||||
if (count <= 4) {
|
||||
uint8_t *p = &data_[len_];
|
||||
for (int i = 0; i < count; i++) {
|
||||
*p++ = *src++;
|
||||
}
|
||||
} else {
|
||||
strings::memcpy_inlined(&data_[len_], src, count);
|
||||
}
|
||||
len_ += count;
|
||||
}
|
||||
|
||||
// Append the given string to this string.
|
||||
void append(const std::string &str) {
|
||||
append(str.data(), str.size());
|
||||
}
|
||||
|
||||
// Append the given character to this string.
|
||||
void push_back(const char byte) {
|
||||
EnsureRoomForAppend(1);
|
||||
ASAN_UNPOISON_MEMORY_REGION(data_ + len_, 1);
|
||||
data_[len_] = byte;
|
||||
len_++;
|
||||
}
|
||||
|
||||
// Return the valid length of this string.
|
||||
size_t length() const {
|
||||
return len_;
|
||||
}
|
||||
|
||||
// Return the valid length of this string (identical to length())
|
||||
size_t size() const {
|
||||
return len_;
|
||||
}
|
||||
|
||||
// Return the allocated capacity of this string.
|
||||
size_t capacity() const {
|
||||
return capacity_;
|
||||
}
|
||||
|
||||
// Return a pointer to the data in this string. Note that this pointer
|
||||
// may be invalidated by any later non-const operation.
|
||||
const uint8_t *data() const {
|
||||
return &data_[0];
|
||||
}
|
||||
|
||||
// Return a pointer to the data in this string. Note that this pointer
|
||||
// may be invalidated by any later non-const operation.
|
||||
uint8_t *data() {
|
||||
return &data_[0];
|
||||
}
|
||||
|
||||
// Return the given element of this string. Note that this does not perform
|
||||
// any bounds checking.
|
||||
const uint8_t &at(size_t i) const {
|
||||
return data_[i];
|
||||
}
|
||||
|
||||
// Return the given element of this string. Note that this does not perform
|
||||
// any bounds checking.
|
||||
const uint8_t &operator[](size_t i) const {
|
||||
return data_[i];
|
||||
}
|
||||
|
||||
// Return the given element of this string. Note that this does not perform
|
||||
// any bounds checking.
|
||||
uint8_t &operator[](size_t i) {
|
||||
return data_[i];
|
||||
}
|
||||
|
||||
// Reset the contents of this string by copying 'len' bytes from 'src'.
|
||||
void assign_copy(const uint8_t *src, size_t len) {
|
||||
// Reset length so that the first resize doesn't need to copy the current
|
||||
// contents of the array.
|
||||
len_ = 0;
|
||||
resize(len);
|
||||
memcpy(data(), src, len);
|
||||
}
|
||||
|
||||
// Reset the contents of this string by copying from the given std::string.
|
||||
void assign_copy(const std::string &str) {
|
||||
assign_copy(reinterpret_cast<const uint8_t *>(str.c_str()),
|
||||
str.size());
|
||||
}
|
||||
|
||||
// Reallocates the internal storage to fit only the current data.
|
||||
//
|
||||
// This may revert to using internal storage if the current length is shorter than
|
||||
// kInitialCapacity. Note that, in that case, after this call, capacity() will return
|
||||
// a capacity larger than the data length.
|
||||
//
|
||||
// Any pointers within this instance are invalidated.
|
||||
void shrink_to_fit() {
|
||||
if (data_ == initial_data_ || capacity_ == len_) return;
|
||||
ShrinkToFitInternal();
|
||||
}
|
||||
|
||||
// Return a copy of this string as a std::string.
|
||||
std::string ToString() const {
|
||||
return std::string(reinterpret_cast<const char *>(data()),
|
||||
len_);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(faststring);
|
||||
|
||||
// If necessary, expand the buffer to fit at least 'count' more bytes.
|
||||
// If the array has to be grown, it is grown by at least 50%.
|
||||
void EnsureRoomForAppend(size_t count) {
|
||||
if (PREDICT_TRUE(len_ + count <= capacity_)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Call the non-inline slow path - this reduces the number of instructions
|
||||
// on the hot path.
|
||||
GrowByAtLeast(count);
|
||||
}
|
||||
|
||||
// The slow path of MakeRoomFor. Grows the buffer by either
|
||||
// 'count' bytes, or 50%, whichever is more.
|
||||
void GrowByAtLeast(size_t count);
|
||||
|
||||
// Grow the array to the given capacity, which must be more than
|
||||
// the current capacity.
|
||||
void GrowArray(size_t newcapacity);
|
||||
|
||||
void ShrinkToFitInternal();
|
||||
|
||||
uint8_t* data_;
|
||||
uint8_t initial_data_[kInitialCapacity];
|
||||
size_t len_;
|
||||
size_t capacity_;
|
||||
};
|
||||
|
||||
} // namespace kudu
|
||||
@ -40,3 +40,4 @@ ADD_BE_TEST(arena_test)
|
||||
ADD_BE_TEST(aes_util_test)
|
||||
ADD_BE_TEST(md5_test)
|
||||
ADD_BE_TEST(bitmap_test)
|
||||
ADD_BE_TEST(faststring_test)
|
||||
83
be/test/util/faststring_test.cpp
Normal file
83
be/test/util/faststring_test.cpp
Normal file
@ -0,0 +1,83 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "util/faststring.h"
|
||||
#include "util/random.h"
|
||||
|
||||
namespace kudu {
|
||||
class FaststringTest : public ::testing::Test {};
|
||||
|
||||
void RandomString(void* dest, size_t n, doris::Random* rng) {
|
||||
size_t i = 0;
|
||||
uint32_t random = rng->Next();
|
||||
char* cdest = static_cast<char*>(dest);
|
||||
static const size_t sz = sizeof(random);
|
||||
if (n >= sz) {
|
||||
for (i = 0; i <= n - sz; i += sz) {
|
||||
memcpy(&cdest[i], &random, sizeof(random));
|
||||
random = rng->Next();
|
||||
}
|
||||
}
|
||||
memcpy(cdest + i, &random, n - i);
|
||||
}
|
||||
|
||||
TEST_F(FaststringTest, TestShrinkToFit_Empty) {
|
||||
faststring s;
|
||||
s.shrink_to_fit();
|
||||
ASSERT_EQ(faststring::kInitialCapacity, s.capacity());
|
||||
}
|
||||
|
||||
// Test that, if the string contents is shorter than the initial capacity
|
||||
// of the faststring, shrink_to_fit() leaves the string in the built-in
|
||||
// array.
|
||||
TEST_F(FaststringTest, TestShrinkToFit_SmallerThanInitialCapacity) {
|
||||
faststring s;
|
||||
s.append("hello");
|
||||
s.shrink_to_fit();
|
||||
ASSERT_EQ(faststring::kInitialCapacity, s.capacity());
|
||||
}
|
||||
|
||||
TEST_F(FaststringTest, TestShrinkToFit_Random) {
|
||||
doris::Random r(time(nullptr));
|
||||
int kMaxSize = faststring::kInitialCapacity * 2;
|
||||
std::unique_ptr<char[]> random_bytes(new char[kMaxSize]);
|
||||
RandomString(random_bytes.get(), kMaxSize, &r);
|
||||
|
||||
faststring s;
|
||||
for (int i = 0; i < 100; i++) {
|
||||
int new_size = r.Uniform(kMaxSize);
|
||||
s.resize(new_size);
|
||||
memcpy(s.data(), random_bytes.get(), new_size);
|
||||
s.shrink_to_fit();
|
||||
ASSERT_EQ(0, memcmp(s.data(), random_bytes.get(), new_size));
|
||||
ASSERT_EQ(std::max<int>(faststring::kInitialCapacity, new_size), s.capacity());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace kudu
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
@ -152,6 +152,7 @@ ${DORIS_TEST_BINARY_DIR}/util/uid_util_test
|
||||
${DORIS_TEST_BINARY_DIR}/util/aes_util_test
|
||||
${DORIS_TEST_BINARY_DIR}/util/string_util_test
|
||||
${DORIS_TEST_BINARY_DIR}/util/coding_test
|
||||
${DORIS_TEST_BINARY_DIR}/util/faststring_test
|
||||
|
||||
## Running common Unittest
|
||||
${DORIS_TEST_BINARY_DIR}/common/resource_tls_test
|
||||
|
||||
Reference in New Issue
Block a user