// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #ifndef IMPALA_RUNTIME_BUFFERPOOL_SUBALLOCATOR_H #define IMPALA_RUNTIME_BUFFERPOOL_SUBALLOCATOR_H #include #include #include "runtime/bufferpool/buffer_pool.h" namespace doris { class Suballocation; /// Helper class to subdivide buffers from the buffer pool. Implements a buddy /// allocation algorithm optimised for power-of-two allocations. At or above the /// 'min_buffer_len' value, each allocation is backed by a power-of-two buffer from /// a BufferPool. Below that threshold, each allocation is backed by a /// 'min_buffer_len' buffer split recursively into equal-sized buddies until the /// desired allocation size is reached. Every time an allocation is freed, /// free buddies are coalesced eagerly and whole buffers are freed eagerly. /// /// The algorithms used are asymptotically efficient: O(log(max allocation size)), but /// the implementation's constant-factor overhead is not optimised. Thus, the allocator /// is best suited for relatively large allocations where the constant CPU/memory /// overhead per allocation is not paramount, e.g. bucket directories of hash tables. /// All allocations less than MIN_ALLOCATION_BYTES are rounded up to that amount. /// /// Methods of Suballocator are not thread safe. /// /// Implementation: /// --------------- /// The allocator uses two key data structures: a number of binary trees representing /// the buddy relationships between allocations and a set of free lists, one for each /// power-of-two size. /// /// Each buffer allocated from the buffer pool has a tree of Suballocations associated /// with it that use the memory from that buffer. The root of the tree is the /// Suballocation corresponding to the entire buffer. Each node has either zero children /// (if it hasn't been split) or two children (if it has been split into two buddy /// allocations). Each non-root Suballocation has pointers to its buddy and its parent /// to enable coalescing the buddies into the parent when both are free. /// /// Suballocations are eagerly coalesced when freed, so a Suballocation only has children /// if one of its descendants is allocated. /// /// The free lists are doubly-linked lists of free Suballocation objects that support /// O(1) add and remove. The next and previous pointers are stored in the /// Suballocation object so no auxiliary memory is required. class Suballocator { public: /// Constructs a suballocator that allocates memory from 'pool' with 'client'. /// Suballocations smaller than 'min_buffer_len' are handled by allocating a /// buffer of 'min_buffer_len' and recursively splitting it. Suballocator( BufferPool* pool, BufferPool::ClientHandle* client, int64_t min_buffer_len); ~Suballocator(); /// Allocate bytes from BufferPool. The allocation is nullptr if unsuccessful because /// the client's reservation was insufficient. If an unexpected error is encountered, /// returns that status. The allocation size is rounded up to the next power-of-two. /// The caller must always free the allocation by calling Free() (otherwise destructing /// the returned 'result' will DCHECK on debug builds or otherwise misbehave on release /// builds). /// /// Allocate() will try to increase the client's buffer pool reservation to fulfill /// the requested allocation if needed. /// /// The memory returned is at least 8-byte aligned. Status Allocate(int64_t bytes, std::unique_ptr* result); /// Free the allocation. Does nothing if allocation is nullptr (e.g. was the result of a /// failed Allocate() call). void Free(std::unique_ptr allocation); /// Upper bounds on the max allocation size and the number of different /// power-of-two allocation sizes. Used to bound the number of free lists. static constexpr int LOG_MAX_ALLOCATION_BYTES = BufferPool::LOG_MAX_BUFFER_BYTES; static constexpr int64_t MAX_ALLOCATION_BYTES = BufferPool::MAX_BUFFER_BYTES; /// Don't support allocations less than 4kb to avoid high overhead. static constexpr int LOG_MIN_ALLOCATION_BYTES = 12; static constexpr int64_t MIN_ALLOCATION_BYTES = 1L << LOG_MIN_ALLOCATION_BYTES; private: DISALLOW_COPY_AND_ASSIGN(Suballocator); /// Compute the index for allocations of size 'bytes' in 'free_lists_'. 'bytes' is /// rounded up to the next power-of-two if it is not already a power-of-two. int ComputeListIndex(int64_t bytes) const; /// Allocate a buffer of size 'bytes' < MAX_ALLOCATION_BYTES from the buffer pool and /// initialize 'result' with it. If the reservation is insufficient, try to increase /// the reservation to fit. Status AllocateBuffer(int64_t bytes, std::unique_ptr* result); /// Split the free allocation until we get an allocation of 'target_bytes' rounded up /// to a power-of-two. This allocation is returned. The other allocations resulting /// from the splits are added to free lists. node->in_use must be false and 'node' /// must not be in any free list. Can fail if allocating memory for data structures /// fails. Status SplitToSize(std::unique_ptr node, int64_t target_bytes, std::unique_ptr* result); // Add allocation to the free list with given index. void AddToFreeList(std::unique_ptr node); // Remove allocation from its free list. std::unique_ptr RemoveFromFreeList(Suballocation* node); // Get the allocation at the head of the free list at index 'list_idx'. Return nullptr // if list is empty. std::unique_ptr PopFreeListHead(int list_idx); /// Coalesce two free buddies, 'b1' and 'b2'. Frees 'b1' and 'b2' and marks the parent /// not in use. std::unique_ptr CoalesceBuddies( std::unique_ptr b1, std::unique_ptr b2); /// The pool and corresponding client to allocate buffers from. BufferPool* pool_; BufferPool::ClientHandle* client_; /// The minimum length of buffer to allocate. To serve allocations below this threshold, /// a larger buffer is allocated and split into multiple allocations. const int64_t min_buffer_len_; /// Track how much memory has been returned in allocations but not freed. int64_t allocated_; /// Free lists for each supported power-of-two size. Statically allocate the maximum /// possible number of lists for simplicity. Indexed by log2 of the allocation size /// minus log2 of the minimum allocation size, e.g. 16k allocations are at index 2. /// Each free list should only include one buddy of each pair: if both buddies are /// free, they should have been coalesced. /// /// Each free list is implemented as a doubly-linked list. static constexpr int NUM_FREE_LISTS = LOG_MAX_ALLOCATION_BYTES - LOG_MIN_ALLOCATION_BYTES + 1; std::unique_ptr free_lists_[NUM_FREE_LISTS]; }; /// An allocation made by a Suballocator. Each allocation returned by Suballocator must /// be freed with Suballocator::Free(). /// /// Unique_ptr is used to manage ownership of these Suballocations as a guard against /// memory leaks. The owner of the unique_ptr is either: /// - client code, if the suballocation is in use /// - the free list array, if the suballocation is the head of a free list /// - the previous free list entry, if the suballocation is a subsequent free list entry /// - the suballocation's left child, if the suballocation is split class Suballocation { public: // Checks that the allocation is not in use (therefore not leaked). ~Suballocation() { DCHECK(!in_use_); } uint8_t* data() const { return data_; } int64_t len() const { return len_; } private: friend class Suballocator; DISALLOW_COPY_AND_ASSIGN(Suballocation); /// Static constructor for Suballocation. Can fail if new fails to allocate memory. static Status Create(std::unique_ptr* new_suballocation); // The actual constructor - Create() is used for its better error handling. Suballocation() : data_(nullptr), len_(-1), buddy_(nullptr), prev_free_(nullptr), in_use_(false) {} /// The allocation's data and its length. uint8_t* data_; int64_t len_; /// The buffer backing the Suballocation, if the Suballocation is backed by an entire /// buffer. Otherwise uninitialized. 'buffer_' is open iff 'buddy_' is nullptr. BufferPool::BufferHandle buffer_; /// If this is a left child, the parent of this and its buddy. The parent's allocation /// is the contiguous memory buffer comprised of the two allocations. We store the /// parent in only the left child so that it is uniquely owned. std::unique_ptr parent_; /// The buddy allocation of this allocation. The buddy's memory buffer is the same /// size and adjacent in memory. Two buddy Suballocation objects have the same /// lifetime: they are created in SplitToSize() and destroyed in CoalesceBuddies(). Suballocation* buddy_; /// If this is in a free list, the next element in the list. nullptr if this is the last /// element in the free list. This pointer owns the next element in the linked list, /// which itself stores a raw back-pointer. std::unique_ptr next_free_; /// If this is in a free list, the previous element in the list. nullptr if this is the /// first element. If non-nullptr, this Suballocation is owned by 'prev_free_'. Suballocation* prev_free_; /// True if was returned from Allocate() and hasn't been freed yet, or if it has been /// split into two child Suballocations. bool in_use_; }; } #endif