[Improvement](aggregate) optimize for small string aggregate (#29919)

This commit is contained in:
Pxl
2024-01-18 16:47:03 +08:00
committed by yiguolei
parent f1462f6cf4
commit a5ca8833d7
14 changed files with 211 additions and 250 deletions

View File

@ -536,7 +536,7 @@ void AggSinkLocalState<DependencyType, Derived>::_emplace_into_hash_table(
agg_method.init_serialized_keys(key_columns, num_rows);
auto creator = [this](const auto& ctor, auto& key, auto& origin) {
HashMethodType::try_presis_key(key, origin, *_agg_arena_pool);
HashMethodType::try_presis_key_and_origin(key, origin, *_agg_arena_pool);
auto mapped =
Base::_shared_state->aggregate_data_container->append_data(origin);
auto st = _create_agg_status(mapped);
@ -686,7 +686,7 @@ Status AggSinkLocalState<DependencyType, Derived>::_reset_hash_table() {
((ss.total_size_of_aggregate_states + ss.align_aggregate_states - 1) /
ss.align_aggregate_states) *
ss.align_aggregate_states));
hash_table = HashTableType();
agg_method.hash_table.reset(new HashTableType());
ss.agg_arena_pool.reset(new vectorized::Arena);
return Status::OK();
},

View File

@ -17,6 +17,7 @@
#include "aggregation_source_operator.h"
#include <memory>
#include <string>
#include "common/exception.h"
@ -141,13 +142,13 @@ Status AggLocalState::_reset_hash_table() {
}
});
ss.aggregate_data_container.reset(new vectorized::AggregateDataContainer(
ss.aggregate_data_container = std::make_unique<vectorized::AggregateDataContainer>(
sizeof(typename HashTableType::key_type),
((ss.total_size_of_aggregate_states + ss.align_aggregate_states - 1) /
ss.align_aggregate_states) *
ss.align_aggregate_states));
hash_table = HashTableType();
ss.agg_arena_pool.reset(new vectorized::Arena);
ss.align_aggregate_states);
agg_method.hash_table.reset(new HashTableType());
ss.agg_arena_pool = std::make_unique<vectorized::Arena>();
return Status::OK();
},
ss.agg_data->method_variant);

View File

@ -189,7 +189,7 @@ void PartitionSortSinkOperatorX::_emplace_into_hash_table(
auto creator = [&](const auto& ctor, auto& key, auto& origin) {
HashMethodType::try_presis_key(key, origin, *local_state._agg_arena_pool);
auto aggregate_data = _pool->add(new vectorized::PartitionBlocks());
auto* aggregate_data = _pool->add(new vectorized::PartitionBlocks());
local_state._value_places.push_back(aggregate_data);
ctor(key, aggregate_data);
local_state._num_partition++;
@ -206,7 +206,7 @@ void PartitionSortSinkOperatorX::_emplace_into_hash_table(
agg_method.lazy_emplace(state, row, creator, creator_for_null_key);
mapped->add_row_idx(row);
}
for (auto place : local_state._value_places) {
for (auto* place : local_state._value_places) {
SCOPED_TIMER(local_state._selector_block_timer);
place->append_block_by_selector(input_block, _child_x->row_desc(),
_has_global_limit, _partition_inner_limit,

View File

@ -155,12 +155,6 @@ public:
using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable;
/// Call func(const Key &, Mapped &) for each hash map element.
template <typename Func>
void for_each_value(Func&& func) {
for (auto& v : *this) func(v.get_first(), v.get_second());
}
/// Call func(Mapped &) for each hash map element.
template <typename Func>
void for_each_mapped(Func&& func) {

View File

@ -128,20 +128,22 @@ struct MethodBase {
template <typename State>
ALWAYS_INLINE auto find(State& state, size_t i) {
prefetch<true>(i);
if constexpr (!is_string_hash_map()) {
prefetch<true>(i);
}
return state.find_key_with_hash(*hash_table, hash_values[i], keys[i]);
}
template <typename State, typename F, typename FF>
ALWAYS_INLINE auto& lazy_emplace(State& state, size_t i, F&& creator,
FF&& creator_for_null_key) {
prefetch<false>(i);
if constexpr (!is_string_hash_map()) {
prefetch<false>(i);
}
return state.lazy_emplace_key(*hash_table, i, keys[i], hash_values[i], creator,
creator_for_null_key);
}
static constexpr bool need_presis() { return std::is_same_v<Key, StringRef>; }
static constexpr bool is_string_hash_map() {
return std::is_same_v<StringHashMap<Mapped>, HashMap> ||
std::is_same_v<DataWithNullKey<StringHashMap<Mapped>>, HashMap>;
@ -149,7 +151,14 @@ struct MethodBase {
template <typename Key, typename Origin>
static void try_presis_key(Key& key, Origin& origin, Arena& arena) {
if constexpr (need_presis()) {
if constexpr (std::is_same_v<Key, StringRef>) {
key.data = arena.insert(key.data, key.size);
}
}
template <typename Key, typename Origin>
static void try_presis_key_and_origin(Key& key, Origin& origin, Arena& arena) {
if constexpr (std::is_same_v<Origin, StringRef>) {
origin.data = arena.insert(origin.data, origin.size);
if constexpr (!is_string_hash_map()) {
key = origin;
@ -303,7 +312,6 @@ struct MethodOneNumber : public MethodBase<TData> {
->get_raw_data()
.data
: key_columns[0]->get_raw_data().data);
std::string name = key_columns[0]->get_name();
if (is_join) {
Base::init_join_bucket_num(num_rows, bucket_size, null_map);
} else {

View File

@ -353,29 +353,6 @@ public:
}
};
static_assert(sizeof(HashTableGrowerWithPrecalculation<>) == 64);
/** When used as a Grower, it turns a hash table into something like a lookup table.
* It remains non-optimal - the cells store the keys.
* Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed.
* TODO Make a proper lookup table.
*/
template <size_t key_bits>
struct HashTableFixedGrower {
size_t buf_size() const { return 1ULL << key_bits; }
size_t place(size_t x) const { return x; }
/// You could write __builtin_unreachable(), but the compiler does not optimize everything, and it turns out less efficiently.
size_t next(size_t pos) const { return pos + 1; }
bool overflow(size_t /*elems*/) const { return false; }
void increase_size() {
LOG(FATAL) << "__builtin_unreachable";
__builtin_unreachable();
}
void set(size_t /*num_elems*/) {}
void set_buf_size(size_t /*buf_size_*/) {}
};
/** If you want to store the zero key separately - a place to store it. */
template <bool need_zero_value_storage, typename Cell>
struct ZeroValueStorage;
@ -573,11 +550,6 @@ protected:
auto get_ptr() const { return ptr; }
size_t get_hash() const { return ptr->get_hash(*container); }
size_t get_collision_chain_length() const {
return container->grower.place((ptr - container->buf) -
container->grower.place(get_hash()));
}
/**
* A hack for HashedDictionary.
*

View File

@ -97,17 +97,6 @@ public:
}
}
template <typename Func>
void ALWAYS_INLINE for_each_value(Func&& func) {
if (_is_partitioned) {
for (auto i = 0u; i < NUM_LEVEL1_SUB_TABLES; ++i) {
level1_sub_tables[i].for_each_value(func);
}
} else {
level0_sub_table.for_each_value(func);
}
}
size_t size() {
size_t count = 0;
if (_is_partitioned) {

View File

@ -102,8 +102,6 @@ public:
auto get_ptr() const { return this; }
size_t get_hash() const { return base_iterator->get_hash(); }
size_t get_collision_chain_length() const { return 0; }
};
class iterator : public iterator_base<iterator, false> {
@ -202,12 +200,6 @@ public:
_hash_map.prefetch_hash(hash_value);
}
/// Call func(const Key &, Mapped &) for each hash map element.
template <typename Func>
void for_each_value(Func&& func) {
for (auto& v : *this) func(v.get_first(), v.get_second());
}
/// Call func(Mapped &) for each hash map element.
template <typename Func>
void for_each_mapped(Func&& func) {

View File

@ -55,26 +55,6 @@ struct StringHashMapCell<StringKey16, TMapped>
static const StringKey16& get_key(const value_type& value_) { return value_.first; }
};
template <typename TMapped>
struct StringHashMapCell<StringKey24, TMapped>
: public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState> {
using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>;
using value_type = typename Base::value_type;
using Base::Base;
static constexpr bool need_zero_value_storage = false;
bool is_zero(const HashTableNoState& state) const { return is_zero(this->value.first, state); }
// Zero means unoccupied cells in hash table. Use key with last word = 0 as
// zero keys, because such keys are unrepresentable (no way to encode length).
static bool is_zero(const StringKey24& key, const HashTableNoState&) { return key.c == 0; }
void set_zero() { this->value.first.c = 0; }
// external
const doris::StringRef get_key() const { return to_string_ref(this->value.first); } /// NOLINT
// internal
static const StringKey24& get_key(const value_type& value_) { return value_.first; }
};
template <typename TMapped>
struct StringHashMapCell<doris::StringRef, TMapped>
: public HashMapCellWithSavedHash<doris::StringRef, TMapped, StringHashTableHash,
@ -108,11 +88,17 @@ struct StringHashMapCell<doris::StringRef, TMapped>
template <typename TMapped, typename Allocator>
struct StringHashMapSubMaps {
using T0 = StringHashTableEmpty<StringHashMapCell<doris::StringRef, TMapped>>;
using T1 = HashMapTable<StringKey8, StringHashMapCell<StringKey8, TMapped>, StringHashTableHash,
StringHashTableGrower<>, Allocator>;
using T2 = HashMapTable<StringKey16, StringHashMapCell<StringKey16, TMapped>,
using T1 = HashMapTable<StringHashMapSubKeys::T1,
StringHashMapCell<StringHashMapSubKeys::T1, TMapped>,
StringHashTableHash, StringHashTableGrower<4>, Allocator>;
using T2 = HashMapTable<StringHashMapSubKeys::T2,
StringHashMapCell<StringHashMapSubKeys::T2, TMapped>,
StringHashTableHash, StringHashTableGrower<>, Allocator>;
using T3 = HashMapTable<StringKey24, StringHashMapCell<StringKey24, TMapped>,
using T3 = HashMapTable<StringHashMapSubKeys::T3,
StringHashMapCell<StringHashMapSubKeys::T3, TMapped>,
StringHashTableHash, StringHashTableGrower<>, Allocator>;
using T4 = HashMapTable<StringHashMapSubKeys::T4,
StringHashMapCell<StringHashMapSubKeys::T4, TMapped>,
StringHashTableHash, StringHashTableGrower<>, Allocator>;
using Ts = HashMapTable<doris::StringRef, StringHashMapCell<doris::StringRef, TMapped>,
StringHashTableHash, StringHashTableGrower<>, Allocator>;
@ -132,41 +118,33 @@ public:
LookupResult it;
bool inserted;
this->emplace(x, it, inserted);
if (inserted) new (&it->get_mapped()) TMapped();
if (inserted) {
new (&it->get_mapped()) TMapped();
}
return it->get_mapped();
}
template <typename Func>
void ALWAYS_INLINE for_each_value(Func&& func) {
if (this->m0.size()) {
func(doris::StringRef {}, this->m0.zero_value()->get_second());
}
for (auto& v : this->m1) {
func(v.get_key(), v.get_second());
}
for (auto& v : this->m2) {
func(v.get_key(), v.get_second());
}
for (auto& v : this->m3) {
func(v.get_key(), v.get_second());
}
for (auto& v : this->ms) {
func(v.get_key(), v.get_second());
}
}
template <typename Func>
void ALWAYS_INLINE for_each_mapped(Func&& func) {
if (this->m0.size()) func(this->m0.zero_value()->get_second());
for (auto& v : this->m1) func(v.get_second());
for (auto& v : this->m2) func(v.get_second());
for (auto& v : this->m3) func(v.get_second());
for (auto& v : this->ms) func(v.get_second());
if (this->m0.size()) {
func(this->m0.zero_value()->get_second());
}
for (auto& v : this->m1) {
func(v.get_second());
}
for (auto& v : this->m2) {
func(v.get_second());
}
for (auto& v : this->m3) {
func(v.get_second());
}
for (auto& v : this->m4) {
func(v.get_second());
}
for (auto& v : this->ms) {
func(v.get_second());
}
}
template <typename MappedType>
char* get_null_key_data() {

View File

@ -24,41 +24,43 @@
#include <variant>
#include "vec/common/hash_table/hash.h"
#include "vec/common/hash_table/hash_table.h"
#include "vec/common/memcpy_small.h"
using StringKey2 = doris::vectorized::UInt16;
using StringKey4 = doris::vectorized::UInt32;
using StringKey8 = doris::vectorized::UInt64;
using StringKey16 = doris::vectorized::UInt128;
struct StringKey24 {
doris::vectorized::UInt64 a;
doris::vectorized::UInt64 b;
doris::vectorized::UInt64 c;
bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; }
struct StringHashMapSubKeys {
using T1 = StringKey2;
using T2 = StringKey4;
using T3 = StringKey8;
using T4 = StringKey16;
};
template <typename StringKey>
StringKey toStringKey(const doris::StringRef& key) {
StringKey to_string_key(const doris::StringRef& key) {
DCHECK_LE(key.size, sizeof(StringKey));
StringKey string_key {};
memcpy((char*)&string_key, key.data, key.size);
memcpy_small<sizeof(StringKey)>((char*)&string_key, key.data, key.size);
return string_key;
}
inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {
template <typename T>
inline doris::StringRef ALWAYS_INLINE to_string_ref(const T& n) {
assert(n != 0);
return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 3)};
return {reinterpret_cast<const char*>(&n), sizeof(T) - (__builtin_clzll(n) >> 3)};
}
inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) {
assert(n.high != 0);
return {reinterpret_cast<const char*>(&n), 16ul - (__builtin_clzll(n.high) >> 3)};
}
inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) {
assert(n.c != 0);
return {reinterpret_cast<const char*>(&n), 24ul - (__builtin_clzll(n.c) >> 3)};
}
struct StringHashTableHash {
#if defined(__SSE4_2__) || defined(__aarch64__)
size_t ALWAYS_INLINE operator()(StringKey8 key) const {
template <typename T>
size_t ALWAYS_INLINE operator()(T key) const {
size_t res = -1ULL;
res = _mm_crc32_u64(res, key);
return res;
@ -69,31 +71,23 @@ struct StringHashTableHash {
res = _mm_crc32_u64(res, key.high);
return res;
}
size_t ALWAYS_INLINE operator()(StringKey24 key) const {
size_t res = -1ULL;
res = _mm_crc32_u64(res, key.a);
res = _mm_crc32_u64(res, key.b);
res = _mm_crc32_u64(res, key.c);
return res;
}
#else
size_t ALWAYS_INLINE operator()(StringKey8 key) const {
return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 8);
}
size_t ALWAYS_INLINE operator()(StringKey16 key) const {
return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 16);
}
size_t ALWAYS_INLINE operator()(StringKey24 key) const {
return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 24);
template <typename T>
size_t ALWAYS_INLINE operator()(T key) const {
return util_hash::CityHash64(reinterpret_cast<const char*>(&key), sizeof(T));
}
#endif
size_t ALWAYS_INLINE operator()(doris::StringRef key) const {
if (key.size <= 8) {
return StringHashTableHash()(toStringKey<StringKey8>(key));
} else if (key.size <= 16) {
return StringHashTableHash()(toStringKey<StringKey16>(key));
} else if (key.size <= 24) {
return StringHashTableHash()(toStringKey<StringKey24>(key));
if (key.size == 0) {
return 0;
} else if (key.size <= sizeof(StringHashMapSubKeys::T1)) {
return StringHashTableHash()(to_string_key<StringHashMapSubKeys::T1>(key));
} else if (key.size <= sizeof(StringHashMapSubKeys::T2)) {
return StringHashTableHash()(to_string_key<StringHashMapSubKeys::T2>(key));
} else if (key.size <= sizeof(StringHashMapSubKeys::T3)) {
return StringHashTableHash()(to_string_key<StringHashMapSubKeys::T3>(key));
} else if (key.size <= sizeof(StringHashMapSubKeys::T4)) {
return StringHashTableHash()(to_string_key<StringHashMapSubKeys::T4>(key));
}
return doris::StringRefHash()(key);
}
@ -194,10 +188,9 @@ struct StringHashTableGrower : public HashTableGrowerWithPrecalculation<initial_
template <typename Mapped>
struct StringHashTableLookupResult {
Mapped* mapped_ptr;
StringHashTableLookupResult() : mapped_ptr(nullptr) {} /// NOLINT
StringHashTableLookupResult(Mapped* mapped_ptr_) : mapped_ptr(mapped_ptr_) {} /// NOLINT
StringHashTableLookupResult(std::nullptr_t) {} /// NOLINT
const VoidKey getKey() const { return {}; } /// NOLINT
StringHashTableLookupResult() : mapped_ptr(nullptr) {}
StringHashTableLookupResult(Mapped* mapped_ptr_) : mapped_ptr(mapped_ptr_) {}
StringHashTableLookupResult(std::nullptr_t) {}
auto& get_mapped() { return *mapped_ptr; }
auto& operator*() { return *this; }
auto& operator*() const { return *this; }
@ -226,7 +219,6 @@ ALWAYS_INLINE inline auto lookup_result_get_mapped(StringHashTableLookupResult<M
template <typename SubMaps>
class StringHashTable : private boost::noncopyable {
protected:
static constexpr size_t NUM_MAPS = 5;
// Map for storing empty string
using T0 = typename SubMaps::T0;
@ -234,18 +226,17 @@ protected:
using T1 = typename SubMaps::T1;
using T2 = typename SubMaps::T2;
using T3 = typename SubMaps::T3;
using T4 = typename SubMaps::T4;
// Long strings are stored as doris::StringRef along with saved hash
using Ts = typename SubMaps::Ts;
using Self = StringHashTable;
template <typename, typename, size_t>
friend class TwoLevelStringHashTable;
T0 m0;
T1 m1;
T2 m2;
T3 m3;
T4 m4;
Ts ms;
using Cell = typename Ts::cell_type;
@ -259,7 +250,8 @@ protected:
typename T1::iterator iterator1;
typename T2::iterator iterator2;
typename T3::iterator iterator3;
typename Ts::iterator iterator4;
typename T4::iterator iterator4;
typename Ts::iterator iterator5;
typename Ts::cell_type cell;
@ -269,39 +261,52 @@ protected:
iterator_base() = default;
iterator_base(Container* container_, bool end = false) : container(container_) {
if (end) {
sub_table_index = 4;
iterator4 = container->ms.end();
sub_table_index = 5;
iterator5 = container->ms.end();
} else {
sub_table_index = 0;
if (container->m0.size() == 0)
if (container->m0.size() == 0) {
sub_table_index++;
else
} else {
return;
}
iterator1 = container->m1.begin();
if (iterator1 == container->m1.end())
if (iterator1 == container->m1.end()) {
sub_table_index++;
else
} else {
return;
}
iterator2 = container->m2.begin();
if (iterator2 == container->m2.end())
if (iterator2 == container->m2.end()) {
sub_table_index++;
else
} else {
return;
}
iterator3 = container->m3.begin();
if (iterator3 == container->m3.end())
if (iterator3 == container->m3.end()) {
sub_table_index++;
else
} else {
return;
}
iterator4 = container->ms.begin();
iterator4 = container->m4.begin();
if (iterator4 == container->m4.end()) {
sub_table_index++;
} else {
return;
}
iterator5 = container->ms.begin();
}
}
bool operator==(const iterator_base& rhs) const {
if (sub_table_index != rhs.sub_table_index) return false;
if (sub_table_index != rhs.sub_table_index) {
return false;
}
switch (sub_table_index) {
case 0: {
return true;
@ -318,6 +323,9 @@ protected:
case 4: {
return iterator4 == rhs.iterator4;
}
case 5: {
return iterator5 == rhs.iterator5;
}
}
LOG(FATAL) << "__builtin_unreachable";
__builtin_unreachable();
@ -355,6 +363,13 @@ protected:
}
case 4: {
++iterator4;
if (iterator4 == container->m4.end()) {
need_switch_to_next = true;
}
break;
}
case 5: {
++iterator5;
break;
}
}
@ -385,7 +400,14 @@ protected:
break;
}
case 4: {
iterator4 = container->ms.begin();
iterator4 = container->m4.begin();
if (iterator4 == container->m4.end()) {
need_switch_to_next = true;
}
break;
}
case 5: {
iterator5 = container->ms.begin();
break;
}
}
@ -416,6 +438,10 @@ protected:
const_cast<iterator_base*>(this)->cell = *iterator4;
break;
}
case 5: {
const_cast<iterator_base*>(this)->cell = *iterator5;
break;
}
}
return cell;
}
@ -438,13 +464,14 @@ protected:
return iterator3->get_hash(container->m3);
}
case 4: {
return iterator4->get_hash(container->ms);
return iterator4->get_hash(container->m4);
}
case 5: {
return iterator5->get_hash(container->ms);
}
}
}
size_t get_collision_chain_length() const { return 0; }
/**
* A hack for HashedDictionary.
*
@ -476,25 +503,11 @@ public:
StringHashTable() = default;
explicit StringHashTable(size_t reserve_for_num_elements)
: m1 {reserve_for_num_elements / 4},
m2 {reserve_for_num_elements / 4},
m3 {reserve_for_num_elements / 4},
ms {reserve_for_num_elements / 4} {}
StringHashTable(StringHashTable&& rhs) noexcept
: m1(std::move(rhs.m1)),
m2(std::move(rhs.m2)),
m3(std::move(rhs.m3)),
ms(std::move(rhs.ms)) {}
StringHashTable& operator=(StringHashTable&& other) {
std::swap(m0, other.m0);
std::swap(m1, other.m1);
std::swap(m2, other.m2);
std::swap(m3, other.m3);
std::swap(ms, other.ms);
return *this;
}
: m1 {reserve_for_num_elements / 5},
m2 {reserve_for_num_elements / 5},
m3 {reserve_for_num_elements / 5},
m4 {reserve_for_num_elements / 5},
ms {reserve_for_num_elements / 5} {}
~StringHashTable() = default;
@ -524,24 +537,20 @@ public:
return func(self.ms, std::forward<KeyHolder>(key), key, hash_value);
}
switch ((sz - 1) >> 3) {
case 0: // 1..8 bytes
{
return func(self.m1, toStringKey<StringKey8>(key), key, hash_value);
if (sz <= sizeof(StringHashMapSubKeys::T1)) {
return func(self.m1, to_string_key<StringHashMapSubKeys::T1>(key), key, hash_value);
}
case 1: // 9..16 bytes
{
return func(self.m2, toStringKey<StringKey16>(key), key, hash_value);
if (sz <= sizeof(StringHashMapSubKeys::T2)) {
return func(self.m2, to_string_key<StringHashMapSubKeys::T2>(key), key, hash_value);
}
case 2: // 17..24 bytes
{
return func(self.m3, toStringKey<StringKey24>(key), key, hash_value);
}
default: // >= 25 bytes
{
return func(self.ms, std::forward<KeyHolder>(key), key, hash_value);
if (sz <= sizeof(StringHashMapSubKeys::T3)) {
return func(self.m3, to_string_key<StringHashMapSubKeys::T3>(key), key, hash_value);
}
if (sz <= sizeof(StringHashMapSubKeys::T4)) {
return func(self.m4, to_string_key<StringHashMapSubKeys::T4>(key), key, hash_value);
}
return func(self.ms, std::forward<KeyHolder>(key), key, hash_value);
}
struct EmplaceCallable {
@ -594,12 +603,14 @@ public:
if (!key.size) {
return;
}
if (key.size <= 8) {
if (key.size <= sizeof(StringHashMapSubKeys::T1)) {
m1.template prefetch<read>(hash_value);
} else if (key.size <= 16) {
} else if (key.size <= sizeof(StringHashMapSubKeys::T2)) {
m2.template prefetch<read>(hash_value);
} else if (key.size <= 24) {
} else if (key.size <= sizeof(StringHashMapSubKeys::T3)) {
m3.template prefetch<read>(hash_value);
} else if (key.size <= sizeof(StringHashMapSubKeys::T4)) {
m4.template prefetch<read>(hash_value);
} else {
ms.template prefetch<read>(hash_value);
}
@ -613,10 +624,11 @@ public:
auto ALWAYS_INLINE operator()(Submap& map, const SubmapKey& key, const Origin& origin,
size_t hash) {
auto it = map.find(key, hash);
if (!it)
if (!it) {
return decltype(&it->get_mapped()) {};
else
} else {
return &it->get_mapped();
}
}
};
@ -628,14 +640,12 @@ public:
return dispatch(*this, x, hash_value, FindCallable {});
}
bool ALWAYS_INLINE has(const Key& x, size_t = 0) const {
return dispatch(*this, x, FindCallable {}) != nullptr;
size_t size() const {
return m0.size() + m1.size() + m2.size() + m3.size() + m4.size() + ms.size();
}
size_t size() const { return m0.size() + m1.size() + m2.size() + m3.size() + ms.size(); }
bool empty() const {
return m0.empty() && m1.empty() && m2.empty() && m3.empty() && ms.empty();
return m0.empty() && m1.empty() && m2.empty() && m3.empty() && m4.empty() && ms.empty();
}
size_t get_buffer_size_in_bytes() const {
@ -666,6 +676,7 @@ public:
bool add_elem_size_overflow(size_t add_size) const {
return m1.add_elem_size_overflow(add_size) || m2.add_elem_size_overflow(add_size) ||
m3.add_elem_size_overflow(add_size) || ms.add_elem_size_overflow(add_size);
m3.add_elem_size_overflow(add_size) || m4.add_elem_size_overflow(add_size) ||
ms.add_elem_size_overflow(add_size);
}
};

View File

@ -20,8 +20,11 @@
#pragma once
#include <glog/logging.h>
#include <string.h>
#include <cstdint>
#if defined(__SSE2__) || defined(__aarch64__)
#include "util/sse_util.hpp"
@ -91,3 +94,32 @@ void memcpy_fixed(char* lhs, const char* rhs) {
memcpy(lhs, rhs, sizeof(T));
}
}
template <int max_size>
inline void memcpy_small(char* lhs, const char* rhs, size_t n) {
DCHECK_NE(n, 0);
if constexpr (max_size >= 4) {
if (n >= 4) {
memcpy_fixed<uint32_t>(lhs, rhs);
lhs += 4;
rhs += 4;
n -= 4;
}
}
while (n >= 1) {
memcpy_fixed<uint8_t>(lhs, rhs);
lhs++;
rhs++;
n--;
}
}
template <>
inline void memcpy_small<2>(char* lhs, const char* rhs, size_t n) {
DCHECK_NE(n, 0);
if (n == 2) {
memcpy_fixed<uint16_t>(lhs, rhs);
} else {
memcpy_fixed<uint8_t>(lhs, rhs);
}
}

View File

@ -95,12 +95,12 @@ void DistinctAggregationNode::_emplace_into_hash_table_to_distinct(IColumn::Sele
auto creator = [&](const auto& ctor, auto& key, auto& origin) {
HashMethodType::try_presis_key(key, origin, *_agg_arena_pool);
ctor(key, dummy_mapped_data);
distinct_row.push_back(row);
distinct_row.push_back_without_reserve(row);
};
auto creator_for_null_key = [&](auto& mapped) {
mapped = dummy_mapped_data;
distinct_row.push_back(row);
distinct_row.push_back_without_reserve(row);
};
SCOPED_TIMER(_hash_table_emplace_timer);

View File

@ -102,26 +102,10 @@ static constexpr int STREAMING_HT_MIN_REDUCTION_SIZE =
AggregationNode::AggregationNode(ObjectPool* pool, const TPlanNode& tnode,
const DescriptorTbl& descs)
: ExecNode(pool, tnode, descs),
_hash_table_compute_timer(nullptr),
_hash_table_input_counter(nullptr),
_expr_timer(nullptr),
_intermediate_tuple_id(tnode.agg_node.intermediate_tuple_id),
_intermediate_tuple_desc(nullptr),
_output_tuple_id(tnode.agg_node.output_tuple_id),
_output_tuple_desc(nullptr),
_needs_finalize(tnode.agg_node.need_finalize),
_is_merge(false),
_serialize_key_timer(nullptr),
_merge_timer(nullptr),
_get_results_timer(nullptr),
_serialize_data_timer(nullptr),
_serialize_result_timer(nullptr),
_deserialize_data_timer(nullptr),
_hash_table_iterate_timer(nullptr),
_insert_keys_to_column_timer(nullptr),
_streaming_agg_timer(nullptr),
_hash_table_size_counter(nullptr),
_max_row_size_counter(nullptr) {
_is_merge(false) {
if (tnode.agg_node.__isset.use_streaming_preaggregation) {
_is_streaming_preagg = tnode.agg_node.use_streaming_preaggregation;
if (_is_streaming_preagg) {
@ -593,7 +577,7 @@ Status AggregationNode::_get_without_key_result(RuntimeState* state, Block* bloc
}
for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
auto column = columns[i].get();
auto* column = columns[i].get();
_aggregate_evaluators[i]->insert_result_info(
_agg_data->without_key + _offsets_of_aggregate_states[i], column);
}
@ -816,13 +800,13 @@ Status AggregationNode::_reset_hash_table() {
}
});
_aggregate_data_container.reset(new AggregateDataContainer(
_aggregate_data_container = std::make_unique<AggregateDataContainer>(
sizeof(typename HashTableType::key_type),
((_total_size_of_aggregate_states + _align_aggregate_states - 1) /
_align_aggregate_states) *
_align_aggregate_states));
hash_table = HashTableType();
_agg_arena_pool.reset(new Arena);
_align_aggregate_states);
agg_method.hash_table.reset(new HashTableType());
_agg_arena_pool = std::make_unique<Arena>();
return Status::OK();
},
_agg_data->method_variant);
@ -845,7 +829,7 @@ void AggregationNode::_emplace_into_hash_table(AggregateDataPtr* places, ColumnR
auto creator = [this](const auto& ctor, auto& key, auto& origin) {
try {
HashMethodType::try_presis_key(key, origin, *_agg_arena_pool);
HashMethodType::try_presis_key_and_origin(key, origin, *_agg_arena_pool);
auto mapped = _aggregate_data_container->append_data(origin);
auto st = _create_agg_status(mapped);
if (!st) {

View File

@ -116,7 +116,7 @@ void VPartitionSortNode::_emplace_into_hash_table(const ColumnRawPtrs& key_colum
auto creator = [&](const auto& ctor, auto& key, auto& origin) {
HashMethodType::try_presis_key(key, origin, *_agg_arena_pool);
auto aggregate_data = _pool->add(new PartitionBlocks());
auto* aggregate_data = _pool->add(new PartitionBlocks());
_value_places.push_back(aggregate_data);
ctor(key, aggregate_data);
_num_partition++;