From e5793249cd6479beae073b9b5cb61b25bf19cd7e Mon Sep 17 00:00:00 2001 From: Xinyi Zou Date: Fri, 31 Mar 2023 09:28:11 +0800 Subject: [PATCH] [opt](hashtable) Modify default filled strategy to 75% (#18242) --- be/CMakeLists.txt | 8 - be/src/common/config.h | 9 + be/src/vec/common/hash_table/hash_table.h | 35 +- be/src/vec/common/hash_table/join_hash_map.h | 133 ---- .../vec/common/hash_table/join_hash_table.h | 733 ------------------ be/src/vec/exec/join/vhash_join_node.cpp | 9 +- build.sh | 5 - run-be-ut.sh | 1 - 8 files changed, 34 insertions(+), 899 deletions(-) delete mode 100644 be/src/vec/common/hash_table/join_hash_map.h delete mode 100644 be/src/vec/common/hash_table/join_hash_table.h diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 18b058badd..8bb1100c51 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -564,14 +564,6 @@ if (USE_BTHREAD_SCANNER) set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DUSE_BTHREAD_SCANNER") endif() -# STRICT_MEMORY_USE=ON` expects BE to use less memory, and gives priority to ensuring stability -# when the cluster memory is limited. -# TODO In the future, expect a dynamic soft memory limit, combined with real-time memory usage of the cluster, -# to control the main memory consumers, including HashTable, LRU Cache elimination strategy, -# ChunkAllocator cache strategy, Disk IO buffer cache strategy, etc. -if (STRICT_MEMORY_USE) - add_compile_options(-DSTRICT_MEMORY_USE) -endif() if (ENABLE_STACKTRACE) add_compile_options(-DENABLE_STACKTRACE) endif() diff --git a/be/src/common/config.h b/be/src/common/config.h index 1d899eccd8..dbc9887d08 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -67,6 +67,15 @@ CONF_String(mem_limit, "auto"); // Soft memory limit as a fraction of hard memory limit. CONF_Double(soft_mem_limit_frac, "0.9"); +// When hash table capacity is greater than 2^double_grow_degree(default 2G), grow when 75% of the capacity is satisfied. +// Increase can reduce the number of hash table resize, but may waste more memory. +CONF_mInt32(hash_table_double_grow_degree, "31"); + +// Expand the hash table before inserting data, the maximum expansion size. +// There are fewer duplicate keys, reducing the number of resize hash tables +// There are many duplicate keys, and the hash table filled bucket is far less than the hash table build bucket. +CONF_mInt64(hash_table_pre_expanse_max_rows, "65535"); + // The maximum low water mark of the system `/proc/meminfo/MemAvailable`, Unit byte, default 1.6G, // actual low water mark=min(1.6G, MemTotal * 10%), avoid wasting too much memory on machines // with large memory larger than 16G. diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h index ea4a6dda5a..1a1eafcc13 100644 --- a/be/src/vec/common/hash_table/hash_table.h +++ b/be/src/vec/common/hash_table/hash_table.h @@ -244,21 +244,17 @@ template struct HashTableGrower { /// The state of this structure is enough to get the buffer size of the hash table. doris::vectorized::UInt8 size_degree = initial_size_degree; - doris::vectorized::Int64 double_grow_degree = 31; // 2GB + doris::vectorized::Int64 double_grow_degree = doris::config::hash_table_double_grow_degree; /// The size of the hash table in the cells. size_t buf_size() const { return 1ULL << size_degree; } -#ifndef STRICT_MEMORY_USE - size_t max_fill() const { return 1ULL << (size_degree - 1); } -#else - // When capacity is greater than 2G, grow when 75% of the capacity is satisfied. + // When capacity is greater than 2^double_grow_degree, grow when 75% of the capacity is satisfied. size_t max_fill() const { return size_degree < double_grow_degree ? 1ULL << (size_degree - 1) : (1ULL << size_degree) - (1ULL << (size_degree - 2)); } -#endif size_t mask() const { return buf_size() - 1; } @@ -279,9 +275,6 @@ struct HashTableGrower { /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table. void set(size_t num_elems) { -#ifndef STRICT_MEMORY_USE - size_t fill_capacity = static_cast(log2(num_elems - 1)) + 2; -#else size_t fill_capacity = static_cast(log2(num_elems - 1)) + 1; fill_capacity = fill_capacity < double_grow_degree @@ -289,7 +282,7 @@ struct HashTableGrower { : (num_elems < (1ULL << fill_capacity) - (1ULL << (fill_capacity - 2)) ? fill_capacity : fill_capacity + 1); -#endif + size_degree = num_elems <= 1 ? initial_size_degree : (initial_size_degree > fill_capacity ? initial_size_degree : fill_capacity); @@ -311,6 +304,7 @@ class alignas(64) HashTableGrowerWithPrecalculation { doris::vectorized::UInt8 size_degree_ = initial_size_degree; size_t precalculated_mask = (1ULL << initial_size_degree) - 1; size_t precalculated_max_fill = 1ULL << (initial_size_degree - 1); + doris::vectorized::Int64 double_grow_degree = doris::config::hash_table_double_grow_degree; public: doris::vectorized::UInt8 size_degree() const { return size_degree_; } @@ -319,7 +313,9 @@ public: size_degree_ += delta; DCHECK(size_degree_ <= 64); precalculated_mask = (1ULL << size_degree_) - 1; - precalculated_max_fill = 1ULL << (size_degree_ - 1); + precalculated_max_fill = size_degree_ < double_grow_degree + ? 1ULL << (size_degree_ - 1) + : (1ULL << size_degree_) - (1ULL << (size_degree_ - 2)); } static constexpr auto initial_count = 1ULL << initial_size_degree; @@ -344,12 +340,17 @@ public: /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table. void set(size_t num_elems) { - size_degree_ = - num_elems <= 1 - ? initial_size_degree - : ((initial_size_degree > static_cast(log2(num_elems - 1)) + 2) - ? initial_size_degree - : (static_cast(log2(num_elems - 1)) + 2)); + size_t fill_capacity = static_cast(log2(num_elems - 1)) + 1; + fill_capacity = + fill_capacity < double_grow_degree + ? fill_capacity + 1 + : (num_elems < (1ULL << fill_capacity) - (1ULL << (fill_capacity - 2)) + ? fill_capacity + : fill_capacity + 1); + + size_degree_ = num_elems <= 1 ? initial_size_degree + : (initial_size_degree > fill_capacity ? initial_size_degree + : fill_capacity); increase_size_degree(0); } diff --git a/be/src/vec/common/hash_table/join_hash_map.h b/be/src/vec/common/hash_table/join_hash_map.h deleted file mode 100644 index 089e968766..0000000000 --- a/be/src/vec/common/hash_table/join_hash_map.h +++ /dev/null @@ -1,133 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/HashMap.h -// and modified by Doris - -#pragma once - -#include "vec/common/hash_table/hash_map.h" -#include "vec/common/hash_table/join_hash_table.h" - -/** NOTE JoinHashMap could only be used for memmoveable (position independent) types. - * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++. - * Also, key in hash table must be of type, that zero bytes is compared equals to zero key. - */ - -template , - typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator> -class JoinHashMapTable : public JoinHashTable { -public: - using Self = JoinHashMapTable; - using Base = JoinHashTable; - - using key_type = Key; - using value_type = typename Cell::value_type; - using mapped_type = typename Cell::Mapped; - - using LookupResult = typename Base::LookupResult; - - using JoinHashTable::JoinHashTable; - - /// Merge every cell's value of current map into the destination map via emplace. - /// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced). - /// Each filled cell in current map will invoke func once. If that map doesn't - /// have a key equals to the given cell, a new cell gets emplaced into that map, - /// and func is invoked with the third argument emplaced set to true. Otherwise - /// emplaced is set to false. - template - void ALWAYS_INLINE merge_to_via_emplace(Self& that, Func&& func) { - for (auto it = this->begin(), end = this->end(); it != end; ++it) { - typename Self::LookupResult res_it; - bool inserted; - that.emplace(it->get_first(), res_it, inserted, it.get_hash()); - func(*lookup_result_get_mapped(res_it), it->get_second(), inserted); - } - } - - /// Merge every cell's value of current map into the destination map via find. - /// Func should have signature void(Mapped & dst, Mapped & src, bool exist). - /// Each filled cell in current map will invoke func once. If that map doesn't - /// have a key equals to the given cell, func is invoked with the third argument - /// exist set to false. Otherwise exist is set to true. - template - void ALWAYS_INLINE merge_to_via_find(Self& that, Func&& func) { - for (auto it = this->begin(), end = this->end(); it != end; ++it) { - auto res_it = that.find(it->get_first(), it.get_hash()); - if (!res_it) - func(it->get_second(), it->get_second(), false); - else - func(*lookup_result_get_mapped(res_it), it->get_second(), true); - } - } - - /// Call func(const Key &, Mapped &) for each hash map element. - template - void for_each_value(Func&& func) { - for (auto& v : *this) func(v.get_first(), v.get_second()); - } - - /// Call func(Mapped &) for each hash map element. - template - void for_each_mapped(Func&& func) { - for (auto& v : *this) func(v.get_second()); - } - - size_t get_size() { - size_t count = 0; - for (auto& v : *this) { - count += v.get_second().get_row_count(); - } - return count; - } - - mapped_type& ALWAYS_INLINE operator[](Key x) { - typename JoinHashMapTable::LookupResult it; - bool inserted; - this->emplace(x, it, inserted); - - /** It may seem that initialization is not necessary for POD-types (or __has_trivial_constructor), - * since the hash table memory is initially initialized with zeros. - * But, in fact, an empty cell may not be initialized with zeros in the following cases: - * - ZeroValueStorage (it only zeros the key); - * - after resizing and moving a part of the cells to the new half of the hash table, the old cells also have only the key to zero. - * - * On performance, there is almost always no difference, due to the fact that it->second is usually assigned immediately - * after calling `operator[]`, and since `operator[]` is inlined, the compiler removes unnecessary initialization. - * - * Sometimes due to initialization, the performance even grows. This occurs in code like `++map[key]`. - * When we do the initialization, for new cells, it's enough to make `store 1` right away. - * And if we did not initialize, then even though there was zero in the cell, - * the compiler can not guess about this, and generates the `load`, `increment`, `store` code. - */ - if (inserted) new (lookup_result_get_mapped(it)) mapped_type(); - - return *lookup_result_get_mapped(it); - } - - char* get_null_key_data() { return nullptr; } - bool has_null_key_data() const { return false; } -}; - -template , - typename Grower = JoinHashTableGrower<>, typename Allocator = HashTableAllocator> -using JoinHashMap = JoinHashMapTable, Hash, Grower, Allocator>; - -template , - typename Grower = JoinHashTableGrower<>, typename Allocator = HashTableAllocator> -using JoinHashMapWithSavedHash = - JoinHashMapTable, Hash, Grower, Allocator>; \ No newline at end of file diff --git a/be/src/vec/common/hash_table/join_hash_table.h b/be/src/vec/common/hash_table/join_hash_table.h deleted file mode 100644 index 5e51eeb62e..0000000000 --- a/be/src/vec/common/hash_table/join_hash_table.h +++ /dev/null @@ -1,733 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "vec/common/allocator.h" -#include "vec/common/hash_table/hash_table.h" - -/** NOTE JoinHashTable could only be used for memmoveable (position independent) types. - * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++. - * Also, key in hash table must be of type, that zero bytes is compared equals to zero key. - */ - -/** Determines the size of the join hash table, and when and how much it should be resized. - */ -template -struct JoinHashTableGrower { - /// The state of this structure is enough to get the buffer size of the join hash table. - doris::vectorized::UInt8 size_degree = initial_size_degree; - doris::vectorized::Int64 double_grow_degree = 31; // 2GB - - size_t bucket_size() const { return 1ULL << (size_degree - 1); } - - /// The size of the join hash table in the cells. - size_t buf_size() const { return 1ULL << size_degree; } - - size_t max_fill() const { return buf_size(); } - - size_t mask() const { return bucket_size() - 1; } - - /// From the hash value, get the bucket id (first index) in the join hash table. - size_t place(size_t x) const { return x & mask(); } - - /// Whether the join hash table is full. You need to increase the size of the hash table, or remove something unnecessary from it. - bool overflow(size_t elems) const { return elems >= max_fill(); } - - /// Increase the size of the join hash table. - void increase_size() { size_degree += size_degree >= 23 ? 1 : 2; } - - /// Set the buffer size by the number of elements in the join hash table. Used when deserializing a join hash table. - void set(size_t num_elems) { -#ifndef STRICT_MEMORY_USE - size_t fill_capacity = static_cast(log2(num_elems - 1)) + 2; -#else - size_t fill_capacity = static_cast(log2(num_elems - 1)) + 1; - fill_capacity = - fill_capacity < double_grow_degree - ? fill_capacity + 1 - : (num_elems < (1ULL << fill_capacity) - (1ULL << (fill_capacity - 2)) - ? fill_capacity - : fill_capacity + 1); -#endif - size_degree = num_elems <= 1 ? initial_size_degree - : (initial_size_degree > fill_capacity ? initial_size_degree - : fill_capacity); - } - - void set_buf_size(size_t buf_size_) { - size_degree = static_cast(log2(buf_size_ - 1) + 1); - } -}; - -/** Determines the size of the join hash table, and when and how much it should be resized. - * This structure is aligned to cache line boundary and also occupies it all. - * Precalculates some values to speed up lookups and insertion into the JoinHashTable (and thus has bigger memory footprint than JoinHashTableGrower). - */ -template -class alignas(64) JoinHashTableGrowerWithPrecalculation { - /// The state of this structure is enough to get the buffer size of the join hash table. - - doris::vectorized::UInt8 size_degree_ = initial_size_degree; - size_t precalculated_mask = (1ULL << (initial_size_degree - 1)) - 1; - size_t precalculated_max_fill = 1ULL << initial_size_degree; - -public: - doris::vectorized::UInt8 size_degree() const { return size_degree_; } - - void increase_size_degree(doris::vectorized::UInt8 delta) { - size_degree_ += delta; - precalculated_mask = (1ULL << (size_degree_ - 1)) - 1; - precalculated_max_fill = 1ULL << size_degree_; - } - - static constexpr auto initial_count = 1ULL << initial_size_degree; - - /// If collision resolution chains are contiguous, we can implement erase operation by moving the elements. - static constexpr auto performs_linear_probing_with_single_step = true; - - size_t bucket_size() const { return 1ULL << (size_degree_ - 1); } - - /// The size of the join hash table in the cells. - size_t buf_size() const { return 1ULL << size_degree_; } - - /// From the hash value, get the cell number in the join hash table. - size_t place(size_t x) const { return x & precalculated_mask; } - - /// Whether the join hash table is full. You need to increase the size of the hash table, or remove something unnecessary from it. - bool overflow(size_t elems) const { return elems >= precalculated_max_fill; } - - /// Increase the size of the join hash table. - void increase_size() { increase_size_degree(size_degree_ >= 23 ? 1 : 2); } - - /// Set the buffer size by the number of elements in the join hash table. Used when deserializing a join hash table. - void set(size_t num_elems) { - size_degree_ = - num_elems <= 1 - ? initial_size_degree - : ((initial_size_degree > static_cast(log2(num_elems - 1)) + 2) - ? initial_size_degree - : (static_cast(log2(num_elems - 1)) + 2)); - increase_size_degree(0); - } - - void set_buf_size(size_t buf_size_) { - size_degree_ = static_cast(log2(buf_size_ - 1) + 1); - increase_size_degree(0); - } -}; - -static_assert(sizeof(JoinHashTableGrowerWithPrecalculation<>) == 64); - -/** When used as a Grower, it turns a hash table into something like a lookup table. - * It remains non-optimal - the cells store the keys. - * Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed. - * TODO Make a proper lookup table. - */ -template -struct JoinHashTableFixedGrower { - size_t bucket_size() const { return 1ULL << (key_bits - 1); } - size_t buf_size() const { return 1ULL << key_bits; } - size_t place(size_t x) const { return x & (bucket_size() - 1); } - bool overflow(size_t /*elems*/) const { return false; } - - void increase_size() { __builtin_unreachable(); } - void set(size_t /*num_elems*/) {} - void set_buf_size(size_t /*buf_size_*/) {} -}; - -template -class JoinHashTable : private boost::noncopyable, - protected Hash, - protected Allocator, - protected Cell::State, - protected ZeroValueStorage /// empty base optimization -{ -protected: - friend class const_iterator; - friend class iterator; - friend class Reader; - - template - friend class TwoLevelHashTable; - - template - friend class StringHashTable; - - using HashValue = size_t; - using Self = JoinHashTable; - using cell_type = Cell; - - size_t m_size = 0; /// Amount of elements - size_t m_no_zero_size = 0; /// Amount of elements except the element with zero key. - Cell* buf; /// A piece of memory for all elements except the element with zero key. - - // bucket-chained hash table - // "first" is the buckets of the hash map, and it holds the index of the first key value saved in each bucket, - // while other keys can be found by following the indices saved in - // "next". "next[0]" represents the end of the list of keys in a bucket. - // https://dare.uva.nl/search?identifier=5ccbb60a-38b8-4eeb-858a-e7735dd37487 - size_t* first; - size_t* next; - - Grower grower; - int64_t _resize_timer_ns; - - //factor that will trigger growing the hash table on insert. - static constexpr float MAX_BUCKET_OCCUPANCY_FRACTION = 1.0f; - -#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS - mutable size_t collisions = 0; -#endif - - /// Find a cell with the same key or an empty cell, starting from the specified position and further along the collision resolution chain. - size_t ALWAYS_INLINE find_cell(const Key& x, size_t hash_value, size_t place_value) const { - while (place_value && !buf[place_value - 1].is_zero(*this) && - !buf[place_value - 1].key_equals(x, hash_value, *this)) { - place_value = next[place_value]; -#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS - ++collisions; -#endif - } - - return place_value; - } - - std::pair ALWAYS_INLINE find_cell_opt(const Key& x, size_t hash_value, - size_t place_value) const { - bool is_zero = false; - do { - if (!place_value) return {true, place_value}; - is_zero = buf[place_value - 1].is_zero(*this); /// - if (is_zero || buf[place_value - 1].key_equals(x, hash_value, *this)) break; - place_value = next[place_value]; - } while (true); - - return {is_zero, place_value}; - } - - /// Find an empty cell, starting with the specified position and further along the collision resolution chain. - size_t ALWAYS_INLINE find_empty_cell(size_t place_value) const { - while (place_value && !buf[place_value - 1].is_zero(*this)) { - place_value = next[place_value]; -#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS - ++collisions; -#endif - } - - return place_value; - } - - void alloc(const Grower& new_grower) { - buf = reinterpret_cast(Allocator::alloc(new_grower.buf_size() * sizeof(Cell))); - first = reinterpret_cast( - Allocator::alloc(new_grower.bucket_size() * sizeof(size_t))); - memset(first, 0, new_grower.bucket_size() * sizeof(size_t)); - next = reinterpret_cast( - Allocator::alloc((new_grower.buf_size() + 1) * sizeof(size_t))); - memset(next, 0, (new_grower.buf_size() + 1) * sizeof(size_t)); - grower = new_grower; - } - - void free() { - if (buf) { - Allocator::free(buf, get_buffer_size_in_bytes()); - buf = nullptr; - } - if (first) { - Allocator::free(first, grower.bucket_size() * sizeof(size_t)); - first = nullptr; - } - if (next) { - Allocator::free(next, (grower.buf_size() + 1) * sizeof(size_t)); - next = nullptr; - } - } - - /// Increase the size of the buffer. - void resize(size_t for_num_elems = 0, size_t for_buf_size = 0) { - SCOPED_RAW_TIMER(&_resize_timer_ns); -#ifdef DBMS_HASH_MAP_DEBUG_RESIZES - Stopwatch watch; -#endif - - size_t old_size = grower.buf_size(); - - /** In case of exception for the object to remain in the correct state, - * changing the variable `grower` (which determines the buffer size of the hash table) - * is postponed for a moment after a real buffer change. - * The temporary variable `new_grower` is used to determine the new size. - */ - Grower new_grower = grower; - if (for_num_elems) { - new_grower.set(for_num_elems); - if (new_grower.buf_size() <= old_size) return; - } else if (for_buf_size) { - new_grower.set_buf_size(for_buf_size); - if (new_grower.buf_size() <= old_size) return; - } else - new_grower.increase_size(); - - /// Expand the space. - buf = reinterpret_cast(Allocator::realloc(buf, get_buffer_size_in_bytes(), - new_grower.buf_size() * sizeof(Cell))); - first = reinterpret_cast(Allocator::realloc( - first, get_bucket_size_in_bytes(), new_grower.bucket_size() * sizeof(size_t))); - memset(first, 0, new_grower.bucket_size() * sizeof(size_t)); - next = reinterpret_cast(Allocator::realloc( - next, get_buffer_size_in_bytes(), (new_grower.buf_size() + 1) * sizeof(size_t))); - memset(next, 0, (new_grower.buf_size() + 1) * sizeof(size_t)); - grower = new_grower; - - /** Now some items may need to be moved to a new location. - * The element can stay in place, or move to a new location "on the right", - * or move to the left of the collision resolution chain, because the elements to the left of it have been moved to the new "right" location. - */ - size_t i = 0; - for (; i < m_no_zero_size; ++i) - if (!buf[i].is_zero(*this)) reinsert(i + 1, buf[i], buf[i].get_hash(*this)); - -#ifdef DBMS_HASH_MAP_DEBUG_RESIZES - watch.stop(); - std::cerr << std::fixed << std::setprecision(3) << "Resize from " << old_size << " to " - << grower.buf_size() << " took " << watch.elapsedSeconds() << " sec." - << std::endl; -#endif - } - - /** Paste into the new buffer the value that was in the old buffer. - * Used when increasing the buffer size. - */ - void reinsert(size_t place_value, Cell& x, size_t hash_value) { - size_t bucket_value = grower.place(hash_value); - next[place_value] = first[bucket_value]; - first[bucket_value] = place_value; - } - - void destroy_elements() { - if (!std::is_trivially_destructible_v) - for (iterator it = begin(), it_end = end(); it != it_end; ++it) it.ptr->~Cell(); - } - - template - class iterator_base { - using Container = std::conditional_t; - using cell_type = std::conditional_t; - - Container* container; - cell_type* ptr; - - friend class JoinHashTable; - - public: - iterator_base() {} - iterator_base(Container* container_, cell_type* ptr_) : container(container_), ptr(ptr_) {} - - bool operator==(const iterator_base& rhs) const { return ptr == rhs.ptr; } - bool operator!=(const iterator_base& rhs) const { return ptr != rhs.ptr; } - - Derived& operator++() { - /// If iterator was pointed to ZeroValueStorage, move it to the beginning of the main buffer. - if (UNLIKELY(ptr->is_zero(*container))) - ptr = container->buf; - else - ++ptr; - - /// Skip empty cells in the main buffer. - auto buf_end = container->buf + container->m_no_zero_size; - while (ptr < buf_end && ptr->is_zero(*container)) ++ptr; - - return static_cast(*this); - } - - auto& operator*() const { return *ptr; } - auto* operator->() const { return ptr; } - - auto get_ptr() const { return ptr; } - size_t get_hash() const { return ptr->get_hash(*container); } - - size_t get_collision_chain_length() const { ////////////// ????????? - return 0; - } - - operator Cell*() const { return nullptr; } - }; - -public: - using key_type = Key; - using value_type = typename Cell::value_type; - - // Use lookup_result_get_mapped/Key to work with these values. - using LookupResult = Cell*; - using ConstLookupResult = const Cell*; - - void reset_resize_timer() { _resize_timer_ns = 0; } - int64_t get_resize_timer_value() const { return _resize_timer_ns; } - - size_t hash(const Key& x) const { return Hash::operator()(x); } - - JoinHashTable() { - if (Cell::need_zero_value_storage) this->zero_value()->set_zero(); - alloc(grower); - } - - JoinHashTable(size_t reserve_for_num_elements) { - if (Cell::need_zero_value_storage) this->zero_value()->set_zero(); - grower.set(reserve_for_num_elements); - alloc(grower); - } - - JoinHashTable(JoinHashTable&& rhs) : buf(nullptr) { *this = std::move(rhs); } - - ~JoinHashTable() { - destroy_elements(); - free(); - } - - JoinHashTable& operator=(JoinHashTable&& rhs) { - destroy_elements(); - free(); - - std::swap(buf, rhs.buf); - std::swap(m_size, rhs.m_size); - std::swap(m_no_zero_size, rhs.m_no_zero_size); - std::swap(first, rhs.first); - std::swap(next, rhs.next); - std::swap(grower, rhs.grower); - - Hash::operator=(std::move(rhs)); - Allocator::operator=(std::move(rhs)); - Cell::State::operator=(std::move(rhs)); - ZeroValueStorage::operator=(std::move(rhs)); - - return *this; - } - - class iterator : public iterator_base { - public: - using iterator_base::iterator_base; - }; - - class const_iterator : public iterator_base { - public: - using iterator_base::iterator_base; - }; - - const_iterator begin() const { - if (!buf) return end(); - - if (this->get_has_zero()) return iterator_to_zero(); - - const Cell* ptr = buf; - auto buf_end = buf + m_no_zero_size; - while (ptr < buf_end && ptr->is_zero(*this)) ++ptr; - - return const_iterator(this, ptr); - } - - const_iterator cbegin() const { return begin(); } - - iterator begin() { - if (!buf) return end(); - - if (this->get_has_zero()) return iterator_to_zero(); - - Cell* ptr = buf; - auto buf_end = buf + m_no_zero_size; - while (ptr < buf_end && ptr->is_zero(*this)) ++ptr; - - return iterator(this, ptr); - } - - const_iterator end() const { return const_iterator(this, buf + m_no_zero_size); } - const_iterator cend() const { return end(); } - iterator end() { return iterator(this, buf + m_no_zero_size); } - -protected: - const_iterator iterator_to(const Cell* ptr) const { return const_iterator(this, ptr); } - iterator iterator_to(Cell* ptr) { return iterator(this, ptr); } - const_iterator iterator_to_zero() const { return iterator_to(this->zero_value()); } - iterator iterator_to_zero() { return iterator_to(this->zero_value()); } - - /// If the key is zero, insert it into a special place and return true. - /// We don't have to persist a zero key, because it's not actually inserted. - /// That's why we just take a Key by value, an not a key holder. - bool ALWAYS_INLINE emplace_if_zero(Key x, LookupResult& it, bool& inserted, size_t hash_value) { - /// If it is claimed that the zero key can not be inserted into the table. - if (!Cell::need_zero_value_storage) return false; - - if (Cell::is_zero(x, *this)) { - it = this->zero_value(); - - if (!this->get_has_zero()) { - ++m_size; - this->set_get_has_zero(); - this->zero_value()->set_hash(hash_value); - inserted = true; - } else - inserted = false; - - return true; - } - - return false; - } - - /// Only for non-zero keys. Find the right place, insert the key there, if it does not already exist. Set iterator to the cell in output parameter. - template - void ALWAYS_INLINE emplace_non_zero(KeyHolder&& key_holder, LookupResult& it, bool& inserted, - size_t hash_value) { - it = &buf[m_no_zero_size]; - - if (!buf[m_no_zero_size].is_zero(*this)) { - key_holder_discard_key(key_holder); - inserted = false; - return; - } - - key_holder_persist_key(key_holder); - const auto& key = key_holder_get_key(key_holder); - - new (&buf[m_no_zero_size]) Cell(key, *this); - buf[m_no_zero_size].set_hash(hash_value); - size_t bucket_value = grower.place(hash_value); - inserted = true; - ++m_size; - ++m_no_zero_size; - next[m_no_zero_size] = first[bucket_value]; - first[bucket_value] = m_no_zero_size; - - if (UNLIKELY(grower.overflow(m_size))) { - try { - resize(); - } catch (...) { - /** If we have not resized successfully, then there will be problems. - * There remains a key, but uninitialized mapped-value, - * which, perhaps, can not even be called a destructor. - */ - first[bucket_value] = next[m_no_zero_size]; - next[m_no_zero_size] = 0; - --m_size; - --m_no_zero_size; - buf[m_no_zero_size].set_zero(); - throw; - } - } - } - -public: - void expanse_for_add_elem(size_t num_elem) { - std::cout << "expanse_for_add_elem\n"; - if (add_elem_size_overflow(num_elem)) { - resize(grower.buf_size() + num_elem); - } - } - - /// Insert a value. In the case of any more complex values, it is better to use the `emplace` function. - std::pair ALWAYS_INLINE insert(const value_type& x) { - std::pair res; - size_t hash_value = hash(Cell::get_key(x)); - if (!emplace_if_zero(Cell::get_key(x), res.first, res.second, hash_value)) { - emplace_non_zero(Cell::get_key(x), res.first, res.second, hash_value); - } - - if (res.second) insert_set_mapped(lookup_result_get_mapped(res.first), x); - - return res; - } - - template - void ALWAYS_INLINE prefetch(KeyHolder& key_holder) { - key_holder_get_key(key_holder); - __builtin_prefetch(&buf[m_no_zero_size]); - } - - /// Reinsert node pointed to by iterator - // void ALWAYS_INLINE reinsert(iterator& it, size_t hash_value) { - // reinsert(*it.get_ptr(), hash_value); - // } - - /** Insert the key. - * Return values: - * 'it' -- a LookupResult pointing to the corresponding key/mapped pair. - * 'inserted' -- whether a new key was inserted. - * - * You have to make `placement new` of value if you inserted a new key, - * since when destroying a hash table, it will call the destructor! - * - * Example usage: - * - * Map::iterator it; - * bool inserted; - * map.emplace(key, it, inserted); - * if (inserted) - * new(&it->second) Mapped(value); - */ - template - void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, bool& inserted) { - const auto& key = key_holder_get_key(key_holder); - emplace(key_holder, it, inserted, hash(key)); - } - - template - void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, bool& inserted, - size_t hash_value) { - const auto& key = key_holder_get_key(key_holder); - if (!emplace_if_zero(key, it, inserted, hash_value)) - emplace_non_zero(key_holder, it, inserted, hash_value); - } - - /// Copy the cell from another hash table. It is assumed that the cell is not zero, and also that there was no such key in the table yet. - void ALWAYS_INLINE insert_unique_non_zero(const Cell* cell, size_t hash_value) { - memcpy(static_cast(&buf[m_no_zero_size]), cell, sizeof(*cell)); - size_t bucket_value = grower.place(); - ++m_size; - ++m_no_zero_size; - next[m_no_zero_size] = first[bucket_value]; - first[bucket_value] = m_no_zero_size; - - if (UNLIKELY(grower.overflow(m_size))) resize(); - } - - LookupResult ALWAYS_INLINE find(Key x) { - if (Cell::is_zero(x, *this)) return this->get_has_zero() ? this->zero_value() : nullptr; - - size_t hash_value = hash(x); - auto [is_zero, place_value] = find_cell_opt(x, hash_value, first[grower.place(hash_value)]); - - if (!place_value) return nullptr; - - return !is_zero ? &buf[place_value - 1] : nullptr; - } - - ConstLookupResult ALWAYS_INLINE find(Key x) const { - return const_cast*>(this)->find(x); - } - - LookupResult ALWAYS_INLINE find(Key x, size_t hash_value) { - if (Cell::is_zero(x, *this)) return this->get_has_zero() ? this->zero_value() : nullptr; - - size_t place_value = find_cell(x, hash_value, first[grower.place(hash_value)]); - - if (!place_value) return nullptr; - - return !buf[place_value - 1].is_zero(*this) ? &buf[place_value - 1] : nullptr; - } - - bool ALWAYS_INLINE has(Key x) const { - if (Cell::is_zero(x, *this)) return this->get_has_zero(); - - size_t hash_value = hash(x); - size_t place_value = find_cell(x, hash_value, first[grower.place(hash_value)]); - return !place_value && !buf[place_value - 1].is_zero(*this); - } - - bool ALWAYS_INLINE has(Key x, size_t hash_value) const { - if (Cell::is_zero(x, *this)) return this->get_has_zero(); - - size_t place_value = find_cell(x, hash_value, first[grower.place(hash_value)]); - return !place_value && !buf[place_value - 1].is_zero(*this); - } - - void write(doris::vectorized::BufferWritable& wb) const { - Cell::State::write(wb); - doris::vectorized::write_var_uint(m_size, wb); - - if (this->get_has_zero()) this->zero_value()->write(wb); - - for (auto ptr = buf, buf_end = buf + m_no_zero_size; ptr < buf_end; ++ptr) - if (!ptr->is_zero(*this)) ptr->write(wb); - } - - void read(doris::vectorized::BufferReadable& rb) { - Cell::State::read(rb); - - destroy_elements(); - this->clear_get_has_zero(); - m_size = 0; - - size_t new_size = 0; - doris::vectorized::read_var_uint(new_size, rb); - - free(); - Grower new_grower = grower; - new_grower.set(new_size); - alloc(new_grower); - - for (size_t i = 0; i < new_size; ++i) { - Cell x; - x.read(rb); - insert(Cell::get_key(x.get_value())); - } - } - - size_t size() const { return m_size; } - - size_t no_zero_size() const { return m_no_zero_size; } - - bool empty() const { return 0 == m_size; } - - float get_factor() const { return MAX_BUCKET_OCCUPANCY_FRACTION; } - - bool should_be_shrink(int64_t valid_row) { return valid_row < get_factor() * (size() / 2.0); } - - void init_buf_size(size_t reserve_for_num_elements) { - free(); - grower.set(reserve_for_num_elements); - alloc(grower); - } - - void delete_zero_key(Key key) { - if (this->get_has_zero() && Cell::is_zero(key, *this)) { - --m_size; - this->clear_get_has_zero(); - } - } - - void clear() { - destroy_elements(); - this->clear_get_has_zero(); - m_size = 0; - m_no_zero_size = 0; - - memset(static_cast(buf), 0, grower.buf_size() * sizeof(*buf)); - } - - /// After executing this function, the table can only be destroyed, - /// and also you can use the methods `size`, `empty`, `begin`, `end`. - void clear_and_shrink() { - destroy_elements(); - this->clear_get_has_zero(); - m_size = 0; - m_no_zero_size = 0; - free(); - } - - size_t get_buffer_size_in_bytes() const { return grower.buf_size() * sizeof(Cell); } - - size_t get_bucket_size_in_bytes() const { return grower.bucket_size() * sizeof(Cell); } - - size_t get_buffer_size_in_cells() const { return grower.buf_size(); } - - bool add_elem_size_overflow(size_t add_size) const { - return grower.overflow(add_size + m_size); - } -#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS - size_t getCollisions() const { return collisions; } -#endif -}; \ No newline at end of file diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index 1baa7ddd31..9c8a8a10f0 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -100,9 +100,14 @@ struct ProcessHashTableBuild { hash_table_ctx.hash_table.reset_resize_timer(); // only not build_unique, we need expanse hash table before insert data + // 1. There are fewer duplicate keys, reducing the number of resize hash tables + // can improve performance to a certain extent, about 2%-5% + // 2. There are many duplicate keys, and the hash table filled bucket is far less than + // the hash table build bucket, which may waste a lot of memory. + // TODO, use the NDV expansion of the key column in the optimizer statistics if (!_join_node->_build_unique) { - // _rows contains null row, which will cause hash table resize to be large. - RETURN_IF_CATCH_BAD_ALLOC(hash_table_ctx.hash_table.expanse_for_add_elem(_rows)); + RETURN_IF_CATCH_BAD_ALLOC(hash_table_ctx.hash_table.expanse_for_add_elem( + std::min(_rows, config::hash_table_pre_expanse_max_rows))); } vector& inserted_rows = _join_node->_inserted_rows[&_acquired_block]; diff --git a/build.sh b/build.sh index fc1f84749f..8c5f18fa9a 100755 --- a/build.sh +++ b/build.sh @@ -295,9 +295,6 @@ fi if [[ -z "${ENABLE_STACKTRACE}" ]]; then ENABLE_STACKTRACE='ON' fi -if [[ -z "${STRICT_MEMORY_USE}" ]]; then - STRICT_MEMORY_USE='OFF' -fi if [[ -z "${USE_DWARF}" ]]; then USE_DWARF='OFF' @@ -363,7 +360,6 @@ echo "Get params: USE_MEM_TRACKER -- ${USE_MEM_TRACKER} USE_JEMALLOC -- ${USE_JEMALLOC} USE_BTHREAD_SCANNER -- ${USE_BTHREAD_SCANNER} - STRICT_MEMORY_USE -- ${STRICT_MEMORY_USE} ENABLE_STACKTRACE -- ${ENABLE_STACKTRACE} DENABLE_CLANG_COVERAGE -- ${DENABLE_CLANG_COVERAGE} " @@ -436,7 +432,6 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then -DUSE_MEM_TRACKER="${USE_MEM_TRACKER}" \ -DUSE_JEMALLOC="${USE_JEMALLOC}" \ -DUSE_BTHREAD_SCANNER="${USE_BTHREAD_SCANNER}" \ - -DSTRICT_MEMORY_USE="${STRICT_MEMORY_USE}" \ -DENABLE_STACKTRACE="${ENABLE_STACKTRACE}" \ -DUSE_AVX2="${USE_AVX2}" \ -DGLIBC_COMPATIBILITY="${GLIBC_COMPATIBILITY}" \ diff --git a/run-be-ut.sh b/run-be-ut.sh index efdd0afc79..ec3a98e686 100755 --- a/run-be-ut.sh +++ b/run-be-ut.sh @@ -196,7 +196,6 @@ cd "${CMAKE_BUILD_DIR}" -DUSE_DWARF="${USE_DWARF}" \ -DUSE_MEM_TRACKER="${USE_MEM_TRACKER}" \ -DUSE_JEMALLOC=OFF \ - -DSTRICT_MEMORY_USE=OFF \ -DEXTRA_CXX_FLAGS="${EXTRA_CXX_FLAGS}" \ -DENABLE_CLANG_COVERAGE="${DENABLE_CLANG_COVERAGE}" \ ${CMAKE_USE_CCACHE:+${CMAKE_USE_CCACHE}} \