From a9671b6dfd9e991c6c2ba82efea694c6cfd86f89 Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Mon, 30 Jan 2023 16:43:33 +0800 Subject: [PATCH] [feature](agg)support two level-hash map in aggregation node (#15967) --- be/src/runtime/runtime_state.h | 7 + be/src/vec/common/columns_hashing.h | 1 + be/src/vec/common/columns_hashing_impl.h | 3 +- .../vec/common/hash_table/hash_table_utils.h | 4 +- .../common/hash_table/partitioned_hash_map.h | 39 ++ .../hash_table/partitioned_hash_table.h | 84 ++++- be/src/vec/common/hash_table/ph_hash_map.h | 135 ++++--- .../vec/common/hash_table/string_hash_map.h | 4 +- .../vec/common/hash_table/string_hash_table.h | 3 +- be/src/vec/exec/vaggregation_node.cpp | 40 +- be/src/vec/exec/vaggregation_node.h | 343 ++++++++++++++---- .../org/apache/doris/qe/SessionVariable.java | 7 + gensrc/thrift/PaloInternalService.thrift | 2 + 13 files changed, 526 insertions(+), 146 deletions(-) diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index 8004373a08..b6f02c2d2c 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -342,6 +342,13 @@ public: return _query_options.partitioned_hash_join_rows_threshold; } + int partitioned_hash_agg_rows_threshold() const { + if (!_query_options.__isset.partitioned_hash_agg_rows_threshold) { + return 0; + } + return _query_options.partitioned_hash_agg_rows_threshold; + } + const std::vector& tablet_commit_infos() const { return _tablet_commit_infos; } diff --git a/be/src/vec/common/columns_hashing.h b/be/src/vec/common/columns_hashing.h index dfecead77e..64888a7beb 100644 --- a/be/src/vec/common/columns_hashing.h +++ b/be/src/vec/common/columns_hashing.h @@ -64,6 +64,7 @@ struct HashMethodOneNumber : public columns_hashing_impl::HashMethodBase< /// Find key into HashTable or HashMap. If Data is HashMap and key was found, returns ptr to value, otherwise nullptr. using Base::find_key; /// (Data & data, size_t row, Arena & pool) -> FindResult + using Base::find_key_with_hash; /// Get hash value of row. using Base::get_hash; /// (const Data & data, size_t row, Arena & pool) -> size_t diff --git a/be/src/vec/common/columns_hashing_impl.h b/be/src/vec/common/columns_hashing_impl.h index 19aac5d3b5..5b85d2732c 100644 --- a/be/src/vec/common/columns_hashing_impl.h +++ b/be/src/vec/common/columns_hashing_impl.h @@ -163,7 +163,8 @@ public: } template - ALWAYS_INLINE FindResult find_key(Data& data, size_t hash_value, size_t row, Arena& pool) { + ALWAYS_INLINE FindResult find_key_with_hash(Data& data, size_t hash_value, size_t row, + Arena& pool) { auto key_holder = static_cast(*this).get_key_holder(row, pool); return find_key_impl(key_holder_get_key(key_holder), hash_value, data); } diff --git a/be/src/vec/common/hash_table/hash_table_utils.h b/be/src/vec/common/hash_table/hash_table_utils.h index 0302b72f3c..5c081f80b9 100644 --- a/be/src/vec/common/hash_table/hash_table_utils.h +++ b/be/src/vec/common/hash_table/hash_table_utils.h @@ -18,9 +18,11 @@ // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/HashTable.h // and modified by Doris +#pragma once + template struct HashTableTraits { static constexpr bool is_phmap = false; - static constexpr bool is_parallel_phmap = false; static constexpr bool is_string_hash_table = false; + static constexpr bool is_partitioned_table = false; }; diff --git a/be/src/vec/common/hash_table/partitioned_hash_map.h b/be/src/vec/common/hash_table/partitioned_hash_map.h index b78d9a56ed..a56e01f29b 100644 --- a/be/src/vec/common/hash_table/partitioned_hash_map.h +++ b/be/src/vec/common/hash_table/partitioned_hash_map.h @@ -21,6 +21,7 @@ #include "vec/common/hash_table/hash_map.h" #include "vec/common/hash_table/partitioned_hash_table.h" +#include "vec/common/hash_table/ph_hash_map.h" template class PartitionedHashMapTable : public PartitionedHashTable { @@ -46,8 +47,46 @@ public: return *lookup_result_get_mapped(it); } + + template + void for_each_mapped(Func&& func) { + for (auto& v : *this) { + func(v.get_second()); + } + } }; template > using PartitionedHashMap = PartitionedHashMapTable>>; + +template > +using PHPartitionedHashMap = PartitionedHashMapTable>; + +template +struct HashTableTraits> { + static constexpr bool is_phmap = false; + static constexpr bool is_string_hash_table = false; + static constexpr bool is_partitioned_table = true; +}; + +template