[fix](memory) Remove ChunkAllocator & fix Allocator no use mmap (#21259)
This commit is contained in:
@ -506,25 +506,6 @@ DEFINE_Int32(min_buffer_size, "1024"); // 1024, The minimum read buffer size (in
|
||||
// With 1024B through 8MB buffers, this is up to ~2GB of buffers.
|
||||
DEFINE_Int32(max_free_io_buffers, "128");
|
||||
|
||||
// Whether to disable the memory cache pool,
|
||||
// including MemPool, ChunkAllocator, DiskIO free buffer.
|
||||
DEFINE_Bool(disable_mem_pools, "false");
|
||||
|
||||
// The reserved bytes limit of Chunk Allocator, usually set as a percentage of mem_limit.
|
||||
// defaults to bytes if no unit is given, the number of bytes must be a multiple of 2.
|
||||
// must larger than 0. and if larger than physical memory size, it will be set to physical memory size.
|
||||
// increase this variable can improve performance,
|
||||
// but will acquire more free memory which can not be used by other modules.
|
||||
DEFINE_mString(chunk_reserved_bytes_limit, "0");
|
||||
// 1024, The minimum chunk allocator size (in bytes)
|
||||
DEFINE_Int32(min_chunk_reserved_bytes, "1024");
|
||||
// Disable Chunk Allocator in Vectorized Allocator, this will reduce memory cache.
|
||||
// For high concurrent queries, using Chunk Allocator with vectorized Allocator can reduce the impact
|
||||
// of gperftools tcmalloc central lock.
|
||||
// Jemalloc or google tcmalloc have core cache, Chunk Allocator may no longer be needed after replacing
|
||||
// gperftools tcmalloc.
|
||||
DEFINE_mBool(disable_chunk_allocator_in_vec, "true");
|
||||
|
||||
// The probing algorithm of partitioned hash table.
|
||||
// Enable quadratic probing hash table
|
||||
DEFINE_Bool(enable_quadratic_probing, "false");
|
||||
|
||||
@ -543,25 +543,6 @@ DECLARE_Int32(min_buffer_size); // 1024, The minimum read buffer size (in bytes)
|
||||
// With 1024B through 8MB buffers, this is up to ~2GB of buffers.
|
||||
DECLARE_Int32(max_free_io_buffers);
|
||||
|
||||
// Whether to disable the memory cache pool,
|
||||
// including MemPool, ChunkAllocator, DiskIO free buffer.
|
||||
DECLARE_Bool(disable_mem_pools);
|
||||
|
||||
// The reserved bytes limit of Chunk Allocator, usually set as a percentage of mem_limit.
|
||||
// defaults to bytes if no unit is given, the number of bytes must be a multiple of 2.
|
||||
// must larger than 0. and if larger than physical memory size, it will be set to physical memory size.
|
||||
// increase this variable can improve performance,
|
||||
// but will acquire more free memory which can not be used by other modules.
|
||||
DECLARE_mString(chunk_reserved_bytes_limit);
|
||||
// 1024, The minimum chunk allocator size (in bytes)
|
||||
DECLARE_Int32(min_chunk_reserved_bytes);
|
||||
// Disable Chunk Allocator in Vectorized Allocator, this will reduce memory cache.
|
||||
// For high concurrent queries, using Chunk Allocator with vectorized Allocator can reduce the impact
|
||||
// of gperftools tcmalloc central lock.
|
||||
// Jemalloc or google tcmalloc have core cache, Chunk Allocator may no longer be needed after replacing
|
||||
// gperftools tcmalloc.
|
||||
DECLARE_mBool(disable_chunk_allocator_in_vec);
|
||||
|
||||
// The probing algorithm of partitioned hash table.
|
||||
// Enable quadratic probing hash table
|
||||
DECLARE_Bool(enable_quadratic_probing);
|
||||
|
||||
@ -55,7 +55,6 @@
|
||||
#include "runtime/heartbeat_flags.h"
|
||||
#include "runtime/load_channel_mgr.h"
|
||||
#include "runtime/load_path_mgr.h"
|
||||
#include "runtime/memory/chunk_allocator.h"
|
||||
#include "runtime/memory/mem_tracker.h"
|
||||
#include "runtime/memory/mem_tracker_limiter.h"
|
||||
#include "runtime/memory/thread_mem_tracker_mgr.h"
|
||||
@ -314,23 +313,6 @@ Status ExecEnv::_init_mem_env() {
|
||||
|
||||
// 4. init other managers
|
||||
RETURN_IF_ERROR(_block_spill_mgr->init());
|
||||
|
||||
// 5. init chunk allocator
|
||||
if (!BitUtil::IsPowerOf2(config::min_chunk_reserved_bytes)) {
|
||||
ss << "Config min_chunk_reserved_bytes must be a power-of-two: "
|
||||
<< config::min_chunk_reserved_bytes;
|
||||
return Status::InternalError(ss.str());
|
||||
}
|
||||
|
||||
int64_t chunk_reserved_bytes_limit =
|
||||
ParseUtil::parse_mem_spec(config::chunk_reserved_bytes_limit, MemInfo::mem_limit(),
|
||||
MemInfo::physical_mem(), &is_percent);
|
||||
chunk_reserved_bytes_limit =
|
||||
BitUtil::RoundDown(chunk_reserved_bytes_limit, config::min_chunk_reserved_bytes);
|
||||
ChunkAllocator::init_instance(chunk_reserved_bytes_limit);
|
||||
LOG(INFO) << "Chunk allocator memory limit: "
|
||||
<< PrettyPrinter::print(chunk_reserved_bytes_limit, TUnit::BYTES)
|
||||
<< ", origin config value: " << config::chunk_reserved_bytes_limit;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@ -1,35 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace doris {
|
||||
|
||||
// A chunk of continuous memory.
|
||||
// Almost all files depend on this struct, and each modification
|
||||
// will result in recompilation of all files. So, we put it in a
|
||||
// file to keep this file simple and infrequently changed.
|
||||
struct Chunk {
|
||||
uint8_t* data = nullptr;
|
||||
size_t size = 0;
|
||||
int core_id = -1;
|
||||
};
|
||||
|
||||
} // namespace doris
|
||||
@ -1,281 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "runtime/memory/chunk_allocator.h"
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <sanitizer/asan_interface.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/status.h"
|
||||
#include "runtime/memory/chunk.h"
|
||||
#include "runtime/memory/system_allocator.h"
|
||||
#include "runtime/thread_context.h"
|
||||
#include "util/bit_util.h"
|
||||
#include "util/cpu_info.h"
|
||||
#include "util/doris_metrics.h"
|
||||
#include "util/metrics.h"
|
||||
#include "util/runtime_profile.h"
|
||||
#include "util/spinlock.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
// <= MIN_CHUNK_SIZE, A large number of small chunks will waste extra storage and increase lock time.
|
||||
static constexpr size_t MIN_CHUNK_SIZE = 4096; // 4K
|
||||
// >= MAX_CHUNK_SIZE, Large chunks may not be used for a long time, wasting memory.
|
||||
static constexpr size_t MAX_CHUNK_SIZE = 64 * (1ULL << 20); // 64M
|
||||
|
||||
ChunkAllocator* ChunkAllocator::_s_instance = nullptr;
|
||||
|
||||
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(chunk_pool_local_core_alloc_count, MetricUnit::NOUNIT);
|
||||
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(chunk_pool_other_core_alloc_count, MetricUnit::NOUNIT);
|
||||
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(chunk_pool_system_alloc_count, MetricUnit::NOUNIT);
|
||||
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(chunk_pool_system_free_count, MetricUnit::NOUNIT);
|
||||
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(chunk_pool_system_alloc_cost_ns, MetricUnit::NANOSECONDS);
|
||||
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(chunk_pool_system_free_cost_ns, MetricUnit::NANOSECONDS);
|
||||
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(chunk_pool_reserved_bytes, MetricUnit::NOUNIT);
|
||||
|
||||
static IntCounter* chunk_pool_local_core_alloc_count;
|
||||
static IntCounter* chunk_pool_other_core_alloc_count;
|
||||
static IntCounter* chunk_pool_system_alloc_count;
|
||||
static IntCounter* chunk_pool_system_free_count;
|
||||
static IntCounter* chunk_pool_system_alloc_cost_ns;
|
||||
static IntCounter* chunk_pool_system_free_cost_ns;
|
||||
static IntGauge* chunk_pool_reserved_bytes;
|
||||
|
||||
#ifdef BE_TEST
|
||||
static std::mutex s_mutex;
|
||||
ChunkAllocator* ChunkAllocator::instance() {
|
||||
std::lock_guard<std::mutex> l(s_mutex);
|
||||
if (_s_instance == nullptr) {
|
||||
CpuInfo::init();
|
||||
ChunkAllocator::init_instance(4096);
|
||||
}
|
||||
return _s_instance;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Keep free chunk's ptr in size separated free list.
|
||||
// This class is thread-safe.
|
||||
class ChunkArena {
|
||||
int TRY_LOCK_TIMES = 3;
|
||||
|
||||
public:
|
||||
ChunkArena() : _chunk_lists(64) {}
|
||||
|
||||
~ChunkArena() {
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
if (_chunk_lists[i].empty()) continue;
|
||||
for (auto ptr : _chunk_lists[i]) {
|
||||
SystemAllocator::free(ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to pop a free chunk from corresponding free list.
|
||||
// Return true if success
|
||||
bool pop_free_chunk(size_t size, uint8_t** ptr) {
|
||||
int idx = BitUtil::Log2Ceiling64(size);
|
||||
auto& free_list = _chunk_lists[idx];
|
||||
|
||||
if (free_list.empty()) return false;
|
||||
|
||||
for (int i = 0; i < TRY_LOCK_TIMES; ++i) {
|
||||
if (_lock.try_lock()) {
|
||||
if (free_list.empty()) {
|
||||
_lock.unlock();
|
||||
return false;
|
||||
} else {
|
||||
*ptr = free_list.back();
|
||||
free_list.pop_back();
|
||||
ASAN_UNPOISON_MEMORY_REGION(*ptr, size);
|
||||
_lock.unlock();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void push_free_chunk(uint8_t* ptr, size_t size) {
|
||||
int idx = BitUtil::Log2Ceiling64(size);
|
||||
// Poison this chunk to make asan can detect invalid access
|
||||
ASAN_POISON_MEMORY_REGION(ptr, size);
|
||||
std::lock_guard<SpinLock> l(_lock);
|
||||
_chunk_lists[idx].push_back(ptr);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
std::lock_guard<SpinLock> l(_lock);
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
if (_chunk_lists[i].empty()) {
|
||||
continue;
|
||||
}
|
||||
for (auto ptr : _chunk_lists[i]) {
|
||||
::free(ptr);
|
||||
}
|
||||
std::vector<uint8_t*>().swap(_chunk_lists[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
SpinLock _lock;
|
||||
std::vector<std::vector<uint8_t*>> _chunk_lists;
|
||||
};
|
||||
|
||||
void ChunkAllocator::init_instance(size_t reserve_limit) {
|
||||
if (_s_instance != nullptr) return;
|
||||
_s_instance = new ChunkAllocator(reserve_limit);
|
||||
}
|
||||
|
||||
ChunkAllocator::ChunkAllocator(size_t reserve_limit)
|
||||
: _reserve_bytes_limit(reserve_limit),
|
||||
_steal_arena_limit(reserve_limit * 0.1),
|
||||
_reserved_bytes(0),
|
||||
_arenas(CpuInfo::get_max_num_cores()) {
|
||||
_mem_tracker =
|
||||
std::make_unique<MemTrackerLimiter>(MemTrackerLimiter::Type::GLOBAL, "ChunkAllocator");
|
||||
for (int i = 0; i < _arenas.size(); ++i) {
|
||||
_arenas[i].reset(new ChunkArena());
|
||||
}
|
||||
|
||||
_chunk_allocator_metric_entity =
|
||||
DorisMetrics::instance()->metric_registry()->register_entity("chunk_allocator");
|
||||
INT_COUNTER_METRIC_REGISTER(_chunk_allocator_metric_entity, chunk_pool_local_core_alloc_count);
|
||||
INT_COUNTER_METRIC_REGISTER(_chunk_allocator_metric_entity, chunk_pool_other_core_alloc_count);
|
||||
INT_COUNTER_METRIC_REGISTER(_chunk_allocator_metric_entity, chunk_pool_system_alloc_count);
|
||||
INT_COUNTER_METRIC_REGISTER(_chunk_allocator_metric_entity, chunk_pool_system_free_count);
|
||||
INT_COUNTER_METRIC_REGISTER(_chunk_allocator_metric_entity, chunk_pool_system_alloc_cost_ns);
|
||||
INT_COUNTER_METRIC_REGISTER(_chunk_allocator_metric_entity, chunk_pool_system_free_cost_ns);
|
||||
INT_GAUGE_METRIC_REGISTER(_chunk_allocator_metric_entity, chunk_pool_reserved_bytes);
|
||||
}
|
||||
|
||||
Status ChunkAllocator::allocate_align(size_t size, Chunk* chunk) {
|
||||
CHECK(size > 0);
|
||||
size = BitUtil::RoundUpToPowerOfTwo(size);
|
||||
// fast path: allocate from current core arena
|
||||
int core_id = CpuInfo::get_current_core();
|
||||
chunk->size = size;
|
||||
chunk->core_id = core_id;
|
||||
|
||||
if (_reserve_bytes_limit < 1) {
|
||||
// allocate from system allocator
|
||||
chunk->data = SystemAllocator::allocate(size);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
if (_arenas[core_id]->pop_free_chunk(size, &chunk->data)) {
|
||||
DCHECK_GE(_reserved_bytes, 0);
|
||||
_reserved_bytes.fetch_sub(size);
|
||||
chunk_pool_local_core_alloc_count->increment(1);
|
||||
// transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker.
|
||||
THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get());
|
||||
return Status::OK();
|
||||
}
|
||||
// Second path: try to allocate from other core's arena
|
||||
// When the reserved bytes is greater than the limit, the chunk is stolen from other arena.
|
||||
// Otherwise, it is allocated from the system first, which can reserve enough memory as soon as possible.
|
||||
// After that, allocate from current core arena as much as possible.
|
||||
if (_reserved_bytes > _steal_arena_limit) {
|
||||
++core_id;
|
||||
for (int i = 1; i < _arenas.size(); ++i, ++core_id) {
|
||||
if (_arenas[core_id % _arenas.size()]->pop_free_chunk(size, &chunk->data)) {
|
||||
DCHECK_GE(_reserved_bytes, 0);
|
||||
_reserved_bytes.fetch_sub(size);
|
||||
chunk_pool_other_core_alloc_count->increment(1);
|
||||
// reset chunk's core_id to other
|
||||
chunk->core_id = core_id % _arenas.size();
|
||||
// transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker.
|
||||
THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get());
|
||||
return Status::OK();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int64_t cost_ns = 0;
|
||||
{
|
||||
SCOPED_RAW_TIMER(&cost_ns);
|
||||
// allocate from system allocator
|
||||
chunk->data = SystemAllocator::allocate(size);
|
||||
}
|
||||
chunk_pool_system_alloc_count->increment(1);
|
||||
chunk_pool_system_alloc_cost_ns->increment(cost_ns);
|
||||
if (chunk->data == nullptr) {
|
||||
return Status::MemoryAllocFailed("ChunkAllocator failed to allocate chunk {} bytes", size);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void ChunkAllocator::free(const Chunk& chunk) {
|
||||
DCHECK(chunk.core_id != -1);
|
||||
CHECK((chunk.size & (chunk.size - 1)) == 0);
|
||||
if (config::disable_mem_pools || _reserve_bytes_limit < 1) {
|
||||
SystemAllocator::free(chunk.data);
|
||||
return;
|
||||
}
|
||||
|
||||
int64_t old_reserved_bytes = _reserved_bytes;
|
||||
int64_t new_reserved_bytes = 0;
|
||||
do {
|
||||
new_reserved_bytes = old_reserved_bytes + chunk.size;
|
||||
if (chunk.size <= MIN_CHUNK_SIZE || chunk.size >= MAX_CHUNK_SIZE ||
|
||||
new_reserved_bytes > _reserve_bytes_limit) {
|
||||
int64_t cost_ns = 0;
|
||||
{
|
||||
SCOPED_RAW_TIMER(&cost_ns);
|
||||
SystemAllocator::free(chunk.data);
|
||||
}
|
||||
chunk_pool_system_free_count->increment(1);
|
||||
chunk_pool_system_free_cost_ns->increment(cost_ns);
|
||||
|
||||
return;
|
||||
}
|
||||
} while (!_reserved_bytes.compare_exchange_weak(old_reserved_bytes, new_reserved_bytes));
|
||||
|
||||
// The memory size of allocate/free is a multiple of 2, so `_reserved_bytes% 100 == 32`
|
||||
// will definitely happen, and the latest `_reserved_bytes` value will be set every time.
|
||||
// The real-time and accurate `_reserved_bytes` value is not required. Usually,
|
||||
// the value of `_reserved_bytes` is equal to ChunkAllocator MemTracker.
|
||||
// The `_reserved_bytes` metric is only concerned when verifying the accuracy of MemTracker.
|
||||
// Therefore, reduce the number of sets and reduce the performance impact.
|
||||
if (_reserved_bytes % 100 == 32) {
|
||||
chunk_pool_reserved_bytes->set_value(_reserved_bytes);
|
||||
}
|
||||
// The chunk's memory ownership is transferred from tls tracker to ChunkAllocator.
|
||||
THREAD_MEM_TRACKER_TRANSFER_TO(chunk.size, _mem_tracker.get());
|
||||
_arenas[chunk.core_id]->push_free_chunk(chunk.data, chunk.size);
|
||||
}
|
||||
|
||||
void ChunkAllocator::free(uint8_t* data, size_t size) {
|
||||
Chunk chunk;
|
||||
chunk.data = data;
|
||||
chunk.size = size;
|
||||
chunk.core_id = CpuInfo::get_current_core();
|
||||
free(chunk);
|
||||
}
|
||||
|
||||
void ChunkAllocator::clear() {
|
||||
for (int i = 0; i < _arenas.size(); ++i) {
|
||||
_arenas[i]->clear();
|
||||
}
|
||||
THREAD_MEM_TRACKER_TRANSFER_FROM(_mem_tracker->consumption(), _mem_tracker.get());
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
@ -1,99 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "runtime/memory/mem_tracker_limiter.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
struct Chunk;
|
||||
class ChunkArena;
|
||||
class MetricEntity;
|
||||
class Status;
|
||||
|
||||
// Used to allocate memory with power-of-two length.
|
||||
// This Allocator allocate memory from system and cache free chunks for
|
||||
// later use.
|
||||
//
|
||||
// ChunkAllocator has one ChunkArena for each CPU core, it will try to allocate
|
||||
// memory from current core arena firstly. In this way, there will be no lock contention
|
||||
// between concurrently-running threads. If this fails, ChunkAllocator will try to allocate
|
||||
// memory from other core's arena.
|
||||
//
|
||||
// Memory Reservation
|
||||
// ChunkAllocator has a limit about how much free chunk bytes it can reserve, above which
|
||||
// chunk will released to system memory. For the worst case, when the limits is 0, it will
|
||||
// act as allocating directly from system.
|
||||
//
|
||||
// ChunkArena will keep a separate free list for each chunk size. In common case, chunk will
|
||||
// be allocated from current core arena. In this case, there is no lock contention.
|
||||
//
|
||||
// Must call CpuInfo::init() and DorisMetrics::instance()->initialize() to achieve good performance
|
||||
// before first object is created. And call init_instance() before use instance is called.
|
||||
class ChunkAllocator {
|
||||
public:
|
||||
static void init_instance(size_t reserve_limit);
|
||||
|
||||
#ifdef BE_TEST
|
||||
static ChunkAllocator* instance();
|
||||
#else
|
||||
static ChunkAllocator* instance() { return _s_instance; }
|
||||
#endif
|
||||
|
||||
// Up size to 2^n length, allocate a chunk.
|
||||
Status allocate_align(size_t size, Chunk* chunk);
|
||||
|
||||
// Free chunk allocated from this allocator
|
||||
void free(const Chunk& chunk);
|
||||
|
||||
// Transfer the memory ownership to the chunk allocator.
|
||||
// If the chunk allocator is full, then free to the system.
|
||||
// Note: make sure that the length of 'data' is equal to size,
|
||||
// otherwise the capacity of chunk allocator will be wrong.
|
||||
void free(uint8_t* data, size_t size);
|
||||
|
||||
void clear();
|
||||
|
||||
int64_t mem_consumption() { return _reserved_bytes; }
|
||||
|
||||
private:
|
||||
ChunkAllocator(size_t reserve_limit);
|
||||
|
||||
private:
|
||||
static ChunkAllocator* _s_instance;
|
||||
|
||||
size_t _reserve_bytes_limit;
|
||||
// When the reserved chunk memory size is greater than the limit,
|
||||
// it is allowed to steal the chunks of other arenas.
|
||||
size_t _steal_arena_limit;
|
||||
std::atomic<int64_t> _reserved_bytes;
|
||||
// each core has a ChunkArena
|
||||
std::vector<std::unique_ptr<ChunkArena>> _arenas;
|
||||
|
||||
std::shared_ptr<MetricEntity> _chunk_allocator_metric_entity;
|
||||
|
||||
std::unique_ptr<MemTrackerLimiter> _mem_tracker;
|
||||
};
|
||||
|
||||
} // namespace doris
|
||||
@ -1,76 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "runtime/memory/system_allocator.h"
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <new>
|
||||
#include <string>
|
||||
|
||||
#if !defined(__APPLE__) || !defined(_POSIX_C_SOURCE)
|
||||
#include <unistd.h>
|
||||
#else
|
||||
#include <mach/vm_page_size.h>
|
||||
#endif
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "runtime/memory/mem_tracker_limiter.h"
|
||||
#include "runtime/thread_context.h"
|
||||
#include "util/sse_util.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
int get_page_size() {
|
||||
#if !defined(__APPLE__) || !defined(_POSIX_C_SOURCE)
|
||||
return getpagesize();
|
||||
#else
|
||||
return vm_page_size;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace doris {
|
||||
|
||||
uint8_t* SystemAllocator::allocate(size_t length) {
|
||||
return allocate_via_malloc(length);
|
||||
}
|
||||
|
||||
void SystemAllocator::free(uint8_t* ptr) {
|
||||
::free(ptr);
|
||||
}
|
||||
|
||||
uint8_t* SystemAllocator::allocate_via_malloc(size_t length) {
|
||||
void* ptr = nullptr;
|
||||
// try to use a whole page instead of parts of one page
|
||||
int res = posix_memalign(&ptr, get_page_size(), length);
|
||||
if (res != 0) {
|
||||
char buf[64];
|
||||
auto err = fmt::format("fail to allocate mem via posix_memalign, res={}, errmsg={}.", res,
|
||||
strerror_r(res, buf, 64));
|
||||
LOG(ERROR) << err;
|
||||
if (enable_thread_catch_bad_alloc) throw std::bad_alloc {};
|
||||
MemTrackerLimiter::print_log_process_usage(err);
|
||||
return nullptr;
|
||||
}
|
||||
return (uint8_t*)ptr;
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
@ -1,38 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace doris {
|
||||
|
||||
// Allocate memory from system allocator, this allocator can be configured
|
||||
// to allocate memory via mmap or malloc.
|
||||
class SystemAllocator {
|
||||
public:
|
||||
static uint8_t* allocate(size_t length);
|
||||
|
||||
static void free(uint8_t* ptr);
|
||||
|
||||
private:
|
||||
static uint8_t* allocate_via_mmap(size_t length);
|
||||
static uint8_t* allocate_via_malloc(size_t length);
|
||||
};
|
||||
|
||||
} // namespace doris
|
||||
@ -43,7 +43,7 @@ void faststring::GrowArray(size_t newcapacity) {
|
||||
}
|
||||
capacity_ = newcapacity;
|
||||
if (data_ != initial_data_) {
|
||||
Allocator::free_no_munmap(data_);
|
||||
Allocator::free(data_);
|
||||
} else {
|
||||
ASAN_POISON_MEMORY_REGION(initial_data_, arraysize(initial_data_));
|
||||
}
|
||||
@ -57,13 +57,13 @@ void faststring::ShrinkToFitInternal() {
|
||||
if (len_ <= kInitialCapacity) {
|
||||
ASAN_UNPOISON_MEMORY_REGION(initial_data_, len_);
|
||||
memcpy(initial_data_, &data_[0], len_);
|
||||
Allocator::free_no_munmap(data_);
|
||||
Allocator::free(data_);
|
||||
data_ = initial_data_;
|
||||
capacity_ = kInitialCapacity;
|
||||
} else {
|
||||
std::unique_ptr<uint8_t[]> newdata(reinterpret_cast<uint8_t*>(Allocator::alloc(len_)));
|
||||
memcpy(&newdata[0], &data_[0], len_);
|
||||
Allocator::free_no_munmap(data_);
|
||||
Allocator::free(data_);
|
||||
data_ = newdata.release();
|
||||
capacity_ = len_;
|
||||
}
|
||||
|
||||
@ -54,7 +54,7 @@ public:
|
||||
~faststring() {
|
||||
ASAN_UNPOISON_MEMORY_REGION(initial_data_, arraysize(initial_data_));
|
||||
if (data_ != initial_data_) {
|
||||
Allocator::free_no_munmap(data_);
|
||||
Allocator::free(data_);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -41,7 +41,6 @@
|
||||
#include "olap/page_cache.h"
|
||||
#include "olap/rowset/segment_v2/inverted_index_cache.h"
|
||||
#include "olap/segment_loader.h"
|
||||
#include "runtime/memory/chunk_allocator.h"
|
||||
#include "runtime/memory/mem_tracker_limiter.h"
|
||||
#include "runtime/task_group/task_group.h"
|
||||
#include "runtime/task_group/task_group_manager.h"
|
||||
@ -104,10 +103,6 @@ void MemInfo::refresh_allocator_mem() {
|
||||
void MemInfo::process_cache_gc(int64_t& freed_mem) {
|
||||
// TODO, free more cache, and should free a certain percentage of capacity, not all.
|
||||
int32_t min_free_size = 33554432; // 32M
|
||||
if (ChunkAllocator::instance()->mem_consumption() > min_free_size) {
|
||||
freed_mem += ChunkAllocator::instance()->mem_consumption();
|
||||
ChunkAllocator::instance()->clear();
|
||||
}
|
||||
|
||||
if (StoragePageCache::instance()->get_page_cache_mem_consumption(segment_v2::DATA_PAGE) >
|
||||
min_free_size) {
|
||||
|
||||
@ -279,7 +279,7 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
~OwnedSlice() { Allocator::free_no_munmap(_slice.data); }
|
||||
~OwnedSlice() { Allocator::free(_slice.data); }
|
||||
|
||||
const Slice& slice() const { return _slice; }
|
||||
|
||||
|
||||
@ -28,8 +28,6 @@
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/status.h"
|
||||
#include "runtime/memory/chunk.h"
|
||||
#include "runtime/memory/chunk_allocator.h"
|
||||
#include "util/sse_util.hpp"
|
||||
|
||||
#ifdef NDEBUG
|
||||
@ -63,17 +61,6 @@
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Memory allocation between 4KB and 64MB will be through ChunkAllocator,
|
||||
* those less than 4KB will be through malloc (for example, tcmalloc),
|
||||
* and those greater than 64MB will be through MMAP.
|
||||
* In the actual test, chunkallocator allocates less than 4KB of memory slower than malloc,
|
||||
* and chunkallocator allocates more than 64MB of memory slower than MMAP,
|
||||
* but the 4KB threshold is an empirical value, which needs to be determined
|
||||
* by more detailed test later.
|
||||
*/
|
||||
static constexpr size_t CHUNK_THRESHOLD = 4096;
|
||||
|
||||
static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
|
||||
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
|
||||
|
||||
@ -110,7 +97,7 @@ public:
|
||||
memory_check(size);
|
||||
void* buf;
|
||||
|
||||
if (size >= doris::config::mmap_threshold && use_mmap) {
|
||||
if (use_mmap && size >= doris::config::mmap_threshold) {
|
||||
if (alignment > MMAP_MIN_ALIGNMENT)
|
||||
throw doris::Exception(
|
||||
doris::ErrorCode::INVALID_ARGUMENT,
|
||||
@ -125,13 +112,6 @@ public:
|
||||
}
|
||||
|
||||
/// No need for zero-fill, because mmap guarantees it.
|
||||
} else if (!doris::config::disable_chunk_allocator_in_vec && size >= CHUNK_THRESHOLD) {
|
||||
doris::Chunk chunk;
|
||||
if (!doris::ChunkAllocator::instance()->allocate_align(size, &chunk)) {
|
||||
throw_bad_alloc(fmt::format("Allocator: Cannot allocate chunk {}.", size));
|
||||
}
|
||||
buf = chunk.data;
|
||||
if constexpr (clear_memory) memset(buf, 0, chunk.size);
|
||||
} else {
|
||||
if (alignment <= MALLOC_MIN_ALIGNMENT) {
|
||||
if constexpr (clear_memory)
|
||||
@ -158,28 +138,19 @@ public:
|
||||
}
|
||||
|
||||
/// Free memory range.
|
||||
void free(void* buf, size_t size) {
|
||||
if (size >= doris::config::mmap_threshold && use_mmap) {
|
||||
void free(void* buf, size_t size = -1) {
|
||||
if (use_mmap && size >= doris::config::mmap_threshold) {
|
||||
DCHECK(size != -1);
|
||||
if (0 != munmap(buf, size)) {
|
||||
throw_bad_alloc(fmt::format("Allocator: Cannot munmap {}.", size));
|
||||
} else {
|
||||
release_memory(size);
|
||||
}
|
||||
} else if (!doris::config::disable_chunk_allocator_in_vec && size >= CHUNK_THRESHOLD &&
|
||||
((size & (size - 1)) == 0)) {
|
||||
// Only power-of-two length are added to ChunkAllocator
|
||||
doris::ChunkAllocator::instance()->free((uint8_t*)buf, size);
|
||||
} else {
|
||||
::free(buf);
|
||||
}
|
||||
}
|
||||
|
||||
// Free memory range by ::free.
|
||||
void free_no_munmap(void* buf) {
|
||||
CHECK(!use_mmap);
|
||||
::free(buf);
|
||||
}
|
||||
|
||||
/** Enlarge memory range.
|
||||
* Data from old range is moved to the beginning of new range.
|
||||
* Address of memory range could change.
|
||||
@ -188,8 +159,9 @@ public:
|
||||
if (old_size == new_size) {
|
||||
/// nothing to do.
|
||||
/// BTW, it's not possible to change alignment while doing realloc.
|
||||
} else if (old_size < CHUNK_THRESHOLD && new_size < CHUNK_THRESHOLD &&
|
||||
alignment <= MALLOC_MIN_ALIGNMENT) {
|
||||
} else if (!use_mmap || (old_size < doris::config::mmap_threshold &&
|
||||
new_size < doris::config::mmap_threshold &&
|
||||
alignment <= MALLOC_MIN_ALIGNMENT)) {
|
||||
memory_check(new_size);
|
||||
/// Resize malloc'd memory region with no special alignment requirement.
|
||||
void* new_buf = ::realloc(buf, new_size);
|
||||
@ -203,7 +175,7 @@ public:
|
||||
if (new_size > old_size)
|
||||
memset(reinterpret_cast<char*>(buf) + old_size, 0, new_size - old_size);
|
||||
} else if (old_size >= doris::config::mmap_threshold &&
|
||||
new_size >= doris::config::mmap_threshold && use_mmap) {
|
||||
new_size >= doris::config::mmap_threshold) {
|
||||
memory_check(new_size);
|
||||
/// Resize mmap'd memory region.
|
||||
consume_memory(new_size - old_size);
|
||||
@ -226,7 +198,6 @@ public:
|
||||
}
|
||||
} else {
|
||||
memory_check(new_size);
|
||||
// CHUNK_THRESHOLD <= old_size <= MMAP_THRESHOLD use system realloc is slow, use ChunkAllocator.
|
||||
// Big allocs that requires a copy.
|
||||
void* new_buf = alloc(new_size, alignment);
|
||||
memcpy(new_buf, buf, std::min(old_size, new_size));
|
||||
|
||||
@ -39,6 +39,7 @@
|
||||
#include "olap/olap_common.h"
|
||||
#include "runtime/client_cache.h"
|
||||
#include "runtime/exec_env.h"
|
||||
#include "util/string_util.h"
|
||||
#include "util/thrift_rpc_helper.h"
|
||||
#include "vec/columns/column.h"
|
||||
#include "vec/columns/column_array.h"
|
||||
|
||||
@ -33,12 +33,13 @@
|
||||
#include "runtime/define_primitive_type.h"
|
||||
#include "runtime/primitive_type.h"
|
||||
#include "runtime/types.h"
|
||||
#include "util/runtime_profile.h"
|
||||
#include "util/string_util.h"
|
||||
#include "vec/aggregate_functions/aggregate_function.h"
|
||||
#include "vec/common/string_ref.h"
|
||||
#include "vec/data_types/data_type.h"
|
||||
|
||||
namespace doris {
|
||||
class RuntimeProfile;
|
||||
class RuntimeState;
|
||||
|
||||
namespace vectorized {
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "runtime/memory/system_allocator.h"
|
||||
#include "vec/common/allocator.h"
|
||||
|
||||
#include <gtest/gtest-message.h>
|
||||
#include <gtest/gtest-test-part.h>
|
||||
@ -23,25 +23,39 @@
|
||||
#include <memory>
|
||||
|
||||
#include "gtest/gtest_pred_impl.h"
|
||||
#include "vec/common/allocator_fwd.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
template <typename T>
|
||||
void test_allocator(T allocator) {
|
||||
auto ptr = allocator.alloc(4096);
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
ptr = allocator.realloc(ptr, 4096, 4096 * 1024);
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
allocator.free(ptr, 4096 * 1024);
|
||||
|
||||
ptr = allocator.alloc(100);
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
ptr = allocator.realloc(ptr, 100, 100 * 1024);
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
allocator.free(ptr, 100 * 1024);
|
||||
}
|
||||
|
||||
void test_normal() {
|
||||
{
|
||||
auto ptr = SystemAllocator::allocate(4096);
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
EXPECT_EQ(0, (uint64_t)ptr % 4096);
|
||||
SystemAllocator::free(ptr);
|
||||
}
|
||||
{
|
||||
auto ptr = SystemAllocator::allocate(100);
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
EXPECT_EQ(0, (uint64_t)ptr % 4096);
|
||||
SystemAllocator::free(ptr);
|
||||
test_allocator(Allocator<false, false, false>());
|
||||
test_allocator(Allocator<false, false, true>());
|
||||
test_allocator(Allocator<false, true, false>());
|
||||
test_allocator(Allocator<false, true, true>());
|
||||
test_allocator(Allocator<true, false, false>());
|
||||
test_allocator(Allocator<true, false, true>());
|
||||
test_allocator(Allocator<true, true, false>());
|
||||
test_allocator(Allocator<true, true, true>());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SystemAllocatorTest, TestNormal) {
|
||||
TEST(AllocatorTest, TestNormal) {
|
||||
test_normal();
|
||||
}
|
||||
|
||||
@ -1,39 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "runtime/memory/chunk_allocator.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/status.h"
|
||||
#include "runtime/memory/chunk.h"
|
||||
#include "util/cpu_info.h"
|
||||
#include "util/doris_metrics.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
TEST(ChunkAllocatorTest, Normal) {
|
||||
for (size_t size = 4096; size <= 1024 * 1024; size <<= 1) {
|
||||
Chunk chunk;
|
||||
EXPECT_TRUE(ChunkAllocator::instance()->allocate_align(size, &chunk).ok());
|
||||
EXPECT_NE(nullptr, chunk.data);
|
||||
EXPECT_EQ(size, chunk.size);
|
||||
ChunkAllocator::instance()->free(chunk);
|
||||
}
|
||||
}
|
||||
} // namespace doris
|
||||
@ -38,7 +38,6 @@
|
||||
#include "runtime/descriptors.h"
|
||||
#include "runtime/jsonb_value.h"
|
||||
#include "runtime/large_int_value.h"
|
||||
#include "runtime/memory/chunk_allocator.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "testutil/desc_tbl_builder.h"
|
||||
#include "vec/core/field.h"
|
||||
@ -49,7 +48,6 @@
|
||||
#include "vec/utils/util.hpp"
|
||||
|
||||
TEST(TEST_VEXPR, ABSTEST) {
|
||||
doris::ChunkAllocator::init_instance(4096);
|
||||
doris::ObjectPool object_pool;
|
||||
doris::DescriptorTblBuilder builder(&object_pool);
|
||||
builder.declare_tuple() << doris::TYPE_INT << doris::TYPE_DOUBLE;
|
||||
|
||||
Reference in New Issue
Block a user