Files
doris/be/src/vec/common/allocator.h

347 lines
14 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Allocator.h
// and modified by Doris
#pragma once
// TODO: Readable
#include <fmt/format.h>
#include <string.h>
#include <exception>
#include "common/config.h"
#include "common/status.h"
#include "runtime/memory/chunk.h"
#include "runtime/memory/chunk_allocator.h"
#include "runtime/thread_context.h"
#ifdef NDEBUG
#define ALLOCATOR_ASLR 0
#else
#define ALLOCATOR_ASLR 1
#endif
#if !defined(__APPLE__) && !defined(__FreeBSD__)
#include <malloc.h>
#endif
#include <sys/mman.h>
#include <algorithm>
#include <cstdlib>
#include "common/compiler_util.h"
#ifdef THREAD_SANITIZER
/// Thread sanitizer does not intercept mremap. The usage of mremap will lead to false positives.
#define DISABLE_MREMAP 1
#endif
#include "vec/common/allocator_fwd.h"
#include "vec/common/exception.h"
#include "vec/common/mremap.h"
/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
#ifdef NDEBUG
/**
* Many modern allocators (for example, tcmalloc) do not do a mremap for
* realloc, even in case of large enough chunks of memory. Although this allows
* you to increase performance and reduce memory consumption during realloc.
* To fix this, we do mremap manually if the chunk of memory is large enough.
* The threshold (64 MB) is chosen quite large, since changing the address
* space is very slow, especially in the case of a large number of threads. We
* expect that the set of operations mmap/something to do/mremap can only be
* performed about 1000 times per second.
*
* P.S. This is also required, because tcmalloc can not allocate a chunk of
* memory greater than 16 GB.
*/
static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20);
/**
* Memory allocation between 4KB and 64MB will be through ChunkAllocator,
* those less than 4KB will be through malloc (for example, tcmalloc),
* and those greater than 64MB will be through MMAP.
* In the actual test, chunkallocator allocates less than 4KB of memory slower than malloc,
* and chunkallocator allocates more than 64MB of memory slower than MMAP,
* but the 4KB threshold is an empirical value, which needs to be determined
* by more detailed test later.
*/
static constexpr size_t CHUNK_THRESHOLD = 4096;
#else
/**
* In debug build, use small mmap threshold to reproduce more memory
* stomping bugs. Along with ASLR it will hopefully detect more issues than
* ASan. The program may fail due to the limit on number of memory mappings.
*/
static constexpr size_t MMAP_THRESHOLD = 4096;
static constexpr size_t CHUNK_THRESHOLD = 1024;
#endif
static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
* Also used in hash tables.
* The interface is different from std::allocator
* - the presence of the method realloc, which for large chunks of memory uses mremap;
* - passing the size into the `free` method;
* - by the presence of the `alignment` argument;
* - the possibility of zeroing memory (used in hash tables);
* - random hint address for mmap
* - mmap_threshold for using mmap less or more
*/
template <bool clear_memory_, bool mmap_populate>
class Allocator {
public:
/// Allocate memory range.
void* alloc(size_t size, size_t alignment = 0) {
void* buf;
if (size >= MMAP_THRESHOLD) {
if (alignment > MMAP_MIN_ALIGNMENT)
throw doris::vectorized::Exception(
fmt::format(
"Too large alignment {}: more than page size when allocating {}.",
alignment, size),
doris::TStatusCode::VEC_BAD_ARGUMENTS);
CONSUME_THREAD_MEM_TRACKER(size);
buf = mmap(get_mmap_hint(), size, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
if (MAP_FAILED == buf) {
RELEASE_THREAD_MEM_TRACKER(size);
auto err = fmt::format("Allocator: Cannot mmap {}.", size);
doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err);
doris::vectorized::throwFromErrno(err,
doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY);
}
/// No need for zero-fill, because mmap guarantees it.
} else if (!doris::config::disable_chunk_allocator_in_vec && size >= CHUNK_THRESHOLD) {
doris::Chunk chunk;
if (!doris::ChunkAllocator::instance()->allocate_align(size, &chunk)) {
auto err = fmt::format("Allocator: Cannot allocate chunk {}.", size);
doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err);
doris::vectorized::throwFromErrno(err,
doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY);
}
buf = chunk.data;
if constexpr (clear_memory) memset(buf, 0, chunk.size);
} else {
if (alignment <= MALLOC_MIN_ALIGNMENT) {
if constexpr (clear_memory)
buf = ::calloc(size, 1);
else
buf = ::malloc(size);
if (nullptr == buf) {
auto err = fmt::format("Allocator: Cannot malloc {}.", size);
doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err);
doris::vectorized::throwFromErrno(
err, doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY);
}
} else {
buf = nullptr;
int res = posix_memalign(&buf, alignment, size);
if (0 != res) {
auto err = fmt::format("Cannot allocate memory (posix_memalign) {}.", size);
doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err);
doris::vectorized::throwFromErrno(
err, doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY, res);
}
if constexpr (clear_memory) memset(buf, 0, size);
}
}
return buf;
}
/// Free memory range.
void free(void* buf, size_t size) {
if (size >= MMAP_THRESHOLD) {
if (0 != munmap(buf, size)) {
auto err = fmt::format("Allocator: Cannot munmap {}.", size);
doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err);
doris::vectorized::throwFromErrno(err, doris::TStatusCode::VEC_CANNOT_MUNMAP);
} else {
RELEASE_THREAD_MEM_TRACKER(size);
}
} else if (!doris::config::disable_chunk_allocator_in_vec && size >= CHUNK_THRESHOLD &&
((size & (size - 1)) == 0)) {
// Only power-of-two length are added to ChunkAllocator
doris::ChunkAllocator::instance()->free((uint8_t*)buf, size);
} else {
::free(buf);
}
}
/** Enlarge memory range.
* Data from old range is moved to the beginning of new range.
* Address of memory range could change.
*/
void* realloc(void* buf, size_t old_size, size_t new_size, size_t alignment = 0) {
if (old_size == new_size) {
/// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc.
} else if (old_size < CHUNK_THRESHOLD && new_size < CHUNK_THRESHOLD &&
alignment <= MALLOC_MIN_ALIGNMENT) {
/// Resize malloc'd memory region with no special alignment requirement.
void* new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf) {
auto err =
fmt::format("Allocator: Cannot realloc from {} to {}.", old_size, new_size);
doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err);
doris::vectorized::throwFromErrno(err,
doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY);
}
buf = new_buf;
if constexpr (clear_memory)
if (new_size > old_size)
memset(reinterpret_cast<char*>(buf) + old_size, 0, new_size - old_size);
} else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD) {
/// Resize mmap'd memory region.
CONSUME_THREAD_MEM_TRACKER(new_size - old_size);
// On apple and freebsd self-implemented mremap used (common/mremap.h)
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE,
mmap_flags, -1, 0);
if (MAP_FAILED == buf) {
RELEASE_THREAD_MEM_TRACKER(new_size - old_size);
auto err = fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.",
old_size, new_size);
doris::ExecEnv::GetInstance()->process_mem_tracker()->print_log_usage(err);
doris::vectorized::throwFromErrno(err, doris::TStatusCode::VEC_CANNOT_MREMAP);
}
/// No need for zero-fill, because mmap guarantees it.
if constexpr (mmap_populate) {
// MAP_POPULATE seems have no effect for mremap as for mmap,
// Clear enlarged memory range explicitly to pre-fault the pages
if (new_size > old_size)
memset(reinterpret_cast<char*>(buf) + old_size, 0, new_size - old_size);
}
} else {
// CHUNK_THRESHOLD <= old_size <= MMAP_THRESHOLD use system realloc is slow, use ChunkAllocator.
// Big allocs that requires a copy.
void* new_buf = alloc(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
free(buf, old_size);
buf = new_buf;
}
return buf;
}
protected:
static constexpr size_t get_stack_threshold() { return 0; }
static constexpr bool clear_memory = clear_memory_;
// Freshly mmapped pages are copy-on-write references to a global zero page.
// On the first write, a page fault occurs, and an actual writable page is
// allocated. If we are going to use this memory soon, such as when resizing
// hash tables, it makes sense to pre-fault the pages by passing
// MAP_POPULATE to mmap(). This takes some time, but should be faster
// overall than having a hot loop interrupted by page faults.
// It is only supported on Linux.
static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS
#if defined(OS_LINUX)
| (mmap_populate ? MAP_POPULATE : 0)
#endif
;
private:
#ifndef NDEBUG
/// In debug builds, request mmap() at random addresses (a kind of ASLR), to
/// reproduce more memory stomping bugs. Note that Linux doesn't do it by
/// default. This may lead to worse TLB performance.
void* get_mmap_hint() {
// return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(thread_local_rng));
return nullptr;
}
#else
void* get_mmap_hint() { return nullptr; }
#endif
};
/** When using AllocatorWithStackMemory, located on the stack,
* GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack.
* In fact, the combination of conditions inside AllocatorWithStackMemory does not allow this.
*/
#if !__clang__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfree-nonheap-object"
#endif
/** Allocator with optimization to place small memory ranges in automatic memory.
*/
template <typename Base, size_t N, size_t Alignment>
class AllocatorWithStackMemory : private Base {
private:
alignas(Alignment) char stack_memory[N];
public:
/// Do not use boost::noncopyable to avoid the warning about direct base
/// being inaccessible due to ambiguity, when derived classes are also
/// noncopiable (-Winaccessible-base).
AllocatorWithStackMemory(const AllocatorWithStackMemory&) = delete;
AllocatorWithStackMemory& operator=(const AllocatorWithStackMemory&) = delete;
AllocatorWithStackMemory() = default;
~AllocatorWithStackMemory() = default;
void* alloc(size_t size) {
if (size <= N) {
if constexpr (Base::clear_memory) memset(stack_memory, 0, N);
return stack_memory;
}
return Base::alloc(size, Alignment);
}
void free(void* buf, size_t size) {
if (size > N) Base::free(buf, size);
}
void* realloc(void* buf, size_t old_size, size_t new_size) {
/// Was in stack_memory, will remain there.
if (new_size <= N) return buf;
/// Already was big enough to not fit in stack_memory.
if (old_size > N) return Base::realloc(buf, old_size, new_size, Alignment);
/// Was in stack memory, but now will not fit there.
void* new_buf = Base::alloc(new_size, Alignment);
memcpy(new_buf, buf, old_size);
return new_buf;
}
protected:
static constexpr size_t get_stack_threshold() { return N; }
};
#if !__clang__
#pragma GCC diagnostic pop
#endif