[Improvement](aggregate) optimize for small string aggregate (#29919)
This commit is contained in:
@ -536,7 +536,7 @@ void AggSinkLocalState<DependencyType, Derived>::_emplace_into_hash_table(
|
||||
agg_method.init_serialized_keys(key_columns, num_rows);
|
||||
|
||||
auto creator = [this](const auto& ctor, auto& key, auto& origin) {
|
||||
HashMethodType::try_presis_key(key, origin, *_agg_arena_pool);
|
||||
HashMethodType::try_presis_key_and_origin(key, origin, *_agg_arena_pool);
|
||||
auto mapped =
|
||||
Base::_shared_state->aggregate_data_container->append_data(origin);
|
||||
auto st = _create_agg_status(mapped);
|
||||
@ -686,7 +686,7 @@ Status AggSinkLocalState<DependencyType, Derived>::_reset_hash_table() {
|
||||
((ss.total_size_of_aggregate_states + ss.align_aggregate_states - 1) /
|
||||
ss.align_aggregate_states) *
|
||||
ss.align_aggregate_states));
|
||||
hash_table = HashTableType();
|
||||
agg_method.hash_table.reset(new HashTableType());
|
||||
ss.agg_arena_pool.reset(new vectorized::Arena);
|
||||
return Status::OK();
|
||||
},
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
|
||||
#include "aggregation_source_operator.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "common/exception.h"
|
||||
@ -141,13 +142,13 @@ Status AggLocalState::_reset_hash_table() {
|
||||
}
|
||||
});
|
||||
|
||||
ss.aggregate_data_container.reset(new vectorized::AggregateDataContainer(
|
||||
ss.aggregate_data_container = std::make_unique<vectorized::AggregateDataContainer>(
|
||||
sizeof(typename HashTableType::key_type),
|
||||
((ss.total_size_of_aggregate_states + ss.align_aggregate_states - 1) /
|
||||
ss.align_aggregate_states) *
|
||||
ss.align_aggregate_states));
|
||||
hash_table = HashTableType();
|
||||
ss.agg_arena_pool.reset(new vectorized::Arena);
|
||||
ss.align_aggregate_states);
|
||||
agg_method.hash_table.reset(new HashTableType());
|
||||
ss.agg_arena_pool = std::make_unique<vectorized::Arena>();
|
||||
return Status::OK();
|
||||
},
|
||||
ss.agg_data->method_variant);
|
||||
|
||||
@ -189,7 +189,7 @@ void PartitionSortSinkOperatorX::_emplace_into_hash_table(
|
||||
|
||||
auto creator = [&](const auto& ctor, auto& key, auto& origin) {
|
||||
HashMethodType::try_presis_key(key, origin, *local_state._agg_arena_pool);
|
||||
auto aggregate_data = _pool->add(new vectorized::PartitionBlocks());
|
||||
auto* aggregate_data = _pool->add(new vectorized::PartitionBlocks());
|
||||
local_state._value_places.push_back(aggregate_data);
|
||||
ctor(key, aggregate_data);
|
||||
local_state._num_partition++;
|
||||
@ -206,7 +206,7 @@ void PartitionSortSinkOperatorX::_emplace_into_hash_table(
|
||||
agg_method.lazy_emplace(state, row, creator, creator_for_null_key);
|
||||
mapped->add_row_idx(row);
|
||||
}
|
||||
for (auto place : local_state._value_places) {
|
||||
for (auto* place : local_state._value_places) {
|
||||
SCOPED_TIMER(local_state._selector_block_timer);
|
||||
place->append_block_by_selector(input_block, _child_x->row_desc(),
|
||||
_has_global_limit, _partition_inner_limit,
|
||||
|
||||
@ -155,12 +155,6 @@ public:
|
||||
|
||||
using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable;
|
||||
|
||||
/// Call func(const Key &, Mapped &) for each hash map element.
|
||||
template <typename Func>
|
||||
void for_each_value(Func&& func) {
|
||||
for (auto& v : *this) func(v.get_first(), v.get_second());
|
||||
}
|
||||
|
||||
/// Call func(Mapped &) for each hash map element.
|
||||
template <typename Func>
|
||||
void for_each_mapped(Func&& func) {
|
||||
|
||||
@ -128,20 +128,22 @@ struct MethodBase {
|
||||
|
||||
template <typename State>
|
||||
ALWAYS_INLINE auto find(State& state, size_t i) {
|
||||
prefetch<true>(i);
|
||||
if constexpr (!is_string_hash_map()) {
|
||||
prefetch<true>(i);
|
||||
}
|
||||
return state.find_key_with_hash(*hash_table, hash_values[i], keys[i]);
|
||||
}
|
||||
|
||||
template <typename State, typename F, typename FF>
|
||||
ALWAYS_INLINE auto& lazy_emplace(State& state, size_t i, F&& creator,
|
||||
FF&& creator_for_null_key) {
|
||||
prefetch<false>(i);
|
||||
if constexpr (!is_string_hash_map()) {
|
||||
prefetch<false>(i);
|
||||
}
|
||||
return state.lazy_emplace_key(*hash_table, i, keys[i], hash_values[i], creator,
|
||||
creator_for_null_key);
|
||||
}
|
||||
|
||||
static constexpr bool need_presis() { return std::is_same_v<Key, StringRef>; }
|
||||
|
||||
static constexpr bool is_string_hash_map() {
|
||||
return std::is_same_v<StringHashMap<Mapped>, HashMap> ||
|
||||
std::is_same_v<DataWithNullKey<StringHashMap<Mapped>>, HashMap>;
|
||||
@ -149,7 +151,14 @@ struct MethodBase {
|
||||
|
||||
template <typename Key, typename Origin>
|
||||
static void try_presis_key(Key& key, Origin& origin, Arena& arena) {
|
||||
if constexpr (need_presis()) {
|
||||
if constexpr (std::is_same_v<Key, StringRef>) {
|
||||
key.data = arena.insert(key.data, key.size);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Key, typename Origin>
|
||||
static void try_presis_key_and_origin(Key& key, Origin& origin, Arena& arena) {
|
||||
if constexpr (std::is_same_v<Origin, StringRef>) {
|
||||
origin.data = arena.insert(origin.data, origin.size);
|
||||
if constexpr (!is_string_hash_map()) {
|
||||
key = origin;
|
||||
@ -303,7 +312,6 @@ struct MethodOneNumber : public MethodBase<TData> {
|
||||
->get_raw_data()
|
||||
.data
|
||||
: key_columns[0]->get_raw_data().data);
|
||||
std::string name = key_columns[0]->get_name();
|
||||
if (is_join) {
|
||||
Base::init_join_bucket_num(num_rows, bucket_size, null_map);
|
||||
} else {
|
||||
|
||||
@ -353,29 +353,6 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(sizeof(HashTableGrowerWithPrecalculation<>) == 64);
|
||||
|
||||
/** When used as a Grower, it turns a hash table into something like a lookup table.
|
||||
* It remains non-optimal - the cells store the keys.
|
||||
* Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed.
|
||||
* TODO Make a proper lookup table.
|
||||
*/
|
||||
template <size_t key_bits>
|
||||
struct HashTableFixedGrower {
|
||||
size_t buf_size() const { return 1ULL << key_bits; }
|
||||
size_t place(size_t x) const { return x; }
|
||||
/// You could write __builtin_unreachable(), but the compiler does not optimize everything, and it turns out less efficiently.
|
||||
size_t next(size_t pos) const { return pos + 1; }
|
||||
bool overflow(size_t /*elems*/) const { return false; }
|
||||
|
||||
void increase_size() {
|
||||
LOG(FATAL) << "__builtin_unreachable";
|
||||
__builtin_unreachable();
|
||||
}
|
||||
void set(size_t /*num_elems*/) {}
|
||||
void set_buf_size(size_t /*buf_size_*/) {}
|
||||
};
|
||||
|
||||
/** If you want to store the zero key separately - a place to store it. */
|
||||
template <bool need_zero_value_storage, typename Cell>
|
||||
struct ZeroValueStorage;
|
||||
@ -573,11 +550,6 @@ protected:
|
||||
auto get_ptr() const { return ptr; }
|
||||
size_t get_hash() const { return ptr->get_hash(*container); }
|
||||
|
||||
size_t get_collision_chain_length() const {
|
||||
return container->grower.place((ptr - container->buf) -
|
||||
container->grower.place(get_hash()));
|
||||
}
|
||||
|
||||
/**
|
||||
* A hack for HashedDictionary.
|
||||
*
|
||||
|
||||
@ -97,17 +97,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void ALWAYS_INLINE for_each_value(Func&& func) {
|
||||
if (_is_partitioned) {
|
||||
for (auto i = 0u; i < NUM_LEVEL1_SUB_TABLES; ++i) {
|
||||
level1_sub_tables[i].for_each_value(func);
|
||||
}
|
||||
} else {
|
||||
level0_sub_table.for_each_value(func);
|
||||
}
|
||||
}
|
||||
|
||||
size_t size() {
|
||||
size_t count = 0;
|
||||
if (_is_partitioned) {
|
||||
|
||||
@ -102,8 +102,6 @@ public:
|
||||
|
||||
auto get_ptr() const { return this; }
|
||||
size_t get_hash() const { return base_iterator->get_hash(); }
|
||||
|
||||
size_t get_collision_chain_length() const { return 0; }
|
||||
};
|
||||
|
||||
class iterator : public iterator_base<iterator, false> {
|
||||
@ -202,12 +200,6 @@ public:
|
||||
_hash_map.prefetch_hash(hash_value);
|
||||
}
|
||||
|
||||
/// Call func(const Key &, Mapped &) for each hash map element.
|
||||
template <typename Func>
|
||||
void for_each_value(Func&& func) {
|
||||
for (auto& v : *this) func(v.get_first(), v.get_second());
|
||||
}
|
||||
|
||||
/// Call func(Mapped &) for each hash map element.
|
||||
template <typename Func>
|
||||
void for_each_mapped(Func&& func) {
|
||||
|
||||
@ -55,26 +55,6 @@ struct StringHashMapCell<StringKey16, TMapped>
|
||||
static const StringKey16& get_key(const value_type& value_) { return value_.first; }
|
||||
};
|
||||
|
||||
template <typename TMapped>
|
||||
struct StringHashMapCell<StringKey24, TMapped>
|
||||
: public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState> {
|
||||
using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>;
|
||||
using value_type = typename Base::value_type;
|
||||
using Base::Base;
|
||||
static constexpr bool need_zero_value_storage = false;
|
||||
bool is_zero(const HashTableNoState& state) const { return is_zero(this->value.first, state); }
|
||||
|
||||
// Zero means unoccupied cells in hash table. Use key with last word = 0 as
|
||||
// zero keys, because such keys are unrepresentable (no way to encode length).
|
||||
static bool is_zero(const StringKey24& key, const HashTableNoState&) { return key.c == 0; }
|
||||
void set_zero() { this->value.first.c = 0; }
|
||||
|
||||
// external
|
||||
const doris::StringRef get_key() const { return to_string_ref(this->value.first); } /// NOLINT
|
||||
// internal
|
||||
static const StringKey24& get_key(const value_type& value_) { return value_.first; }
|
||||
};
|
||||
|
||||
template <typename TMapped>
|
||||
struct StringHashMapCell<doris::StringRef, TMapped>
|
||||
: public HashMapCellWithSavedHash<doris::StringRef, TMapped, StringHashTableHash,
|
||||
@ -108,11 +88,17 @@ struct StringHashMapCell<doris::StringRef, TMapped>
|
||||
template <typename TMapped, typename Allocator>
|
||||
struct StringHashMapSubMaps {
|
||||
using T0 = StringHashTableEmpty<StringHashMapCell<doris::StringRef, TMapped>>;
|
||||
using T1 = HashMapTable<StringKey8, StringHashMapCell<StringKey8, TMapped>, StringHashTableHash,
|
||||
StringHashTableGrower<>, Allocator>;
|
||||
using T2 = HashMapTable<StringKey16, StringHashMapCell<StringKey16, TMapped>,
|
||||
using T1 = HashMapTable<StringHashMapSubKeys::T1,
|
||||
StringHashMapCell<StringHashMapSubKeys::T1, TMapped>,
|
||||
StringHashTableHash, StringHashTableGrower<4>, Allocator>;
|
||||
using T2 = HashMapTable<StringHashMapSubKeys::T2,
|
||||
StringHashMapCell<StringHashMapSubKeys::T2, TMapped>,
|
||||
StringHashTableHash, StringHashTableGrower<>, Allocator>;
|
||||
using T3 = HashMapTable<StringKey24, StringHashMapCell<StringKey24, TMapped>,
|
||||
using T3 = HashMapTable<StringHashMapSubKeys::T3,
|
||||
StringHashMapCell<StringHashMapSubKeys::T3, TMapped>,
|
||||
StringHashTableHash, StringHashTableGrower<>, Allocator>;
|
||||
using T4 = HashMapTable<StringHashMapSubKeys::T4,
|
||||
StringHashMapCell<StringHashMapSubKeys::T4, TMapped>,
|
||||
StringHashTableHash, StringHashTableGrower<>, Allocator>;
|
||||
using Ts = HashMapTable<doris::StringRef, StringHashMapCell<doris::StringRef, TMapped>,
|
||||
StringHashTableHash, StringHashTableGrower<>, Allocator>;
|
||||
@ -132,41 +118,33 @@ public:
|
||||
LookupResult it;
|
||||
bool inserted;
|
||||
this->emplace(x, it, inserted);
|
||||
if (inserted) new (&it->get_mapped()) TMapped();
|
||||
if (inserted) {
|
||||
new (&it->get_mapped()) TMapped();
|
||||
}
|
||||
|
||||
return it->get_mapped();
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void ALWAYS_INLINE for_each_value(Func&& func) {
|
||||
if (this->m0.size()) {
|
||||
func(doris::StringRef {}, this->m0.zero_value()->get_second());
|
||||
}
|
||||
|
||||
for (auto& v : this->m1) {
|
||||
func(v.get_key(), v.get_second());
|
||||
}
|
||||
|
||||
for (auto& v : this->m2) {
|
||||
func(v.get_key(), v.get_second());
|
||||
}
|
||||
|
||||
for (auto& v : this->m3) {
|
||||
func(v.get_key(), v.get_second());
|
||||
}
|
||||
|
||||
for (auto& v : this->ms) {
|
||||
func(v.get_key(), v.get_second());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void ALWAYS_INLINE for_each_mapped(Func&& func) {
|
||||
if (this->m0.size()) func(this->m0.zero_value()->get_second());
|
||||
for (auto& v : this->m1) func(v.get_second());
|
||||
for (auto& v : this->m2) func(v.get_second());
|
||||
for (auto& v : this->m3) func(v.get_second());
|
||||
for (auto& v : this->ms) func(v.get_second());
|
||||
if (this->m0.size()) {
|
||||
func(this->m0.zero_value()->get_second());
|
||||
}
|
||||
for (auto& v : this->m1) {
|
||||
func(v.get_second());
|
||||
}
|
||||
for (auto& v : this->m2) {
|
||||
func(v.get_second());
|
||||
}
|
||||
for (auto& v : this->m3) {
|
||||
func(v.get_second());
|
||||
}
|
||||
for (auto& v : this->m4) {
|
||||
func(v.get_second());
|
||||
}
|
||||
for (auto& v : this->ms) {
|
||||
func(v.get_second());
|
||||
}
|
||||
}
|
||||
template <typename MappedType>
|
||||
char* get_null_key_data() {
|
||||
|
||||
@ -24,41 +24,43 @@
|
||||
#include <variant>
|
||||
|
||||
#include "vec/common/hash_table/hash.h"
|
||||
#include "vec/common/hash_table/hash_table.h"
|
||||
#include "vec/common/memcpy_small.h"
|
||||
|
||||
using StringKey2 = doris::vectorized::UInt16;
|
||||
using StringKey4 = doris::vectorized::UInt32;
|
||||
using StringKey8 = doris::vectorized::UInt64;
|
||||
using StringKey16 = doris::vectorized::UInt128;
|
||||
struct StringKey24 {
|
||||
doris::vectorized::UInt64 a;
|
||||
doris::vectorized::UInt64 b;
|
||||
doris::vectorized::UInt64 c;
|
||||
|
||||
bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; }
|
||||
struct StringHashMapSubKeys {
|
||||
using T1 = StringKey2;
|
||||
using T2 = StringKey4;
|
||||
using T3 = StringKey8;
|
||||
using T4 = StringKey16;
|
||||
};
|
||||
|
||||
template <typename StringKey>
|
||||
StringKey toStringKey(const doris::StringRef& key) {
|
||||
StringKey to_string_key(const doris::StringRef& key) {
|
||||
DCHECK_LE(key.size, sizeof(StringKey));
|
||||
StringKey string_key {};
|
||||
memcpy((char*)&string_key, key.data, key.size);
|
||||
memcpy_small<sizeof(StringKey)>((char*)&string_key, key.data, key.size);
|
||||
return string_key;
|
||||
}
|
||||
|
||||
inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {
|
||||
template <typename T>
|
||||
inline doris::StringRef ALWAYS_INLINE to_string_ref(const T& n) {
|
||||
assert(n != 0);
|
||||
return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 3)};
|
||||
return {reinterpret_cast<const char*>(&n), sizeof(T) - (__builtin_clzll(n) >> 3)};
|
||||
}
|
||||
inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) {
|
||||
assert(n.high != 0);
|
||||
return {reinterpret_cast<const char*>(&n), 16ul - (__builtin_clzll(n.high) >> 3)};
|
||||
}
|
||||
inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) {
|
||||
assert(n.c != 0);
|
||||
return {reinterpret_cast<const char*>(&n), 24ul - (__builtin_clzll(n.c) >> 3)};
|
||||
}
|
||||
|
||||
struct StringHashTableHash {
|
||||
#if defined(__SSE4_2__) || defined(__aarch64__)
|
||||
size_t ALWAYS_INLINE operator()(StringKey8 key) const {
|
||||
template <typename T>
|
||||
size_t ALWAYS_INLINE operator()(T key) const {
|
||||
size_t res = -1ULL;
|
||||
res = _mm_crc32_u64(res, key);
|
||||
return res;
|
||||
@ -69,31 +71,23 @@ struct StringHashTableHash {
|
||||
res = _mm_crc32_u64(res, key.high);
|
||||
return res;
|
||||
}
|
||||
size_t ALWAYS_INLINE operator()(StringKey24 key) const {
|
||||
size_t res = -1ULL;
|
||||
res = _mm_crc32_u64(res, key.a);
|
||||
res = _mm_crc32_u64(res, key.b);
|
||||
res = _mm_crc32_u64(res, key.c);
|
||||
return res;
|
||||
}
|
||||
#else
|
||||
size_t ALWAYS_INLINE operator()(StringKey8 key) const {
|
||||
return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 8);
|
||||
}
|
||||
size_t ALWAYS_INLINE operator()(StringKey16 key) const {
|
||||
return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 16);
|
||||
}
|
||||
size_t ALWAYS_INLINE operator()(StringKey24 key) const {
|
||||
return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 24);
|
||||
template <typename T>
|
||||
size_t ALWAYS_INLINE operator()(T key) const {
|
||||
return util_hash::CityHash64(reinterpret_cast<const char*>(&key), sizeof(T));
|
||||
}
|
||||
#endif
|
||||
size_t ALWAYS_INLINE operator()(doris::StringRef key) const {
|
||||
if (key.size <= 8) {
|
||||
return StringHashTableHash()(toStringKey<StringKey8>(key));
|
||||
} else if (key.size <= 16) {
|
||||
return StringHashTableHash()(toStringKey<StringKey16>(key));
|
||||
} else if (key.size <= 24) {
|
||||
return StringHashTableHash()(toStringKey<StringKey24>(key));
|
||||
if (key.size == 0) {
|
||||
return 0;
|
||||
} else if (key.size <= sizeof(StringHashMapSubKeys::T1)) {
|
||||
return StringHashTableHash()(to_string_key<StringHashMapSubKeys::T1>(key));
|
||||
} else if (key.size <= sizeof(StringHashMapSubKeys::T2)) {
|
||||
return StringHashTableHash()(to_string_key<StringHashMapSubKeys::T2>(key));
|
||||
} else if (key.size <= sizeof(StringHashMapSubKeys::T3)) {
|
||||
return StringHashTableHash()(to_string_key<StringHashMapSubKeys::T3>(key));
|
||||
} else if (key.size <= sizeof(StringHashMapSubKeys::T4)) {
|
||||
return StringHashTableHash()(to_string_key<StringHashMapSubKeys::T4>(key));
|
||||
}
|
||||
return doris::StringRefHash()(key);
|
||||
}
|
||||
@ -194,10 +188,9 @@ struct StringHashTableGrower : public HashTableGrowerWithPrecalculation<initial_
|
||||
template <typename Mapped>
|
||||
struct StringHashTableLookupResult {
|
||||
Mapped* mapped_ptr;
|
||||
StringHashTableLookupResult() : mapped_ptr(nullptr) {} /// NOLINT
|
||||
StringHashTableLookupResult(Mapped* mapped_ptr_) : mapped_ptr(mapped_ptr_) {} /// NOLINT
|
||||
StringHashTableLookupResult(std::nullptr_t) {} /// NOLINT
|
||||
const VoidKey getKey() const { return {}; } /// NOLINT
|
||||
StringHashTableLookupResult() : mapped_ptr(nullptr) {}
|
||||
StringHashTableLookupResult(Mapped* mapped_ptr_) : mapped_ptr(mapped_ptr_) {}
|
||||
StringHashTableLookupResult(std::nullptr_t) {}
|
||||
auto& get_mapped() { return *mapped_ptr; }
|
||||
auto& operator*() { return *this; }
|
||||
auto& operator*() const { return *this; }
|
||||
@ -226,7 +219,6 @@ ALWAYS_INLINE inline auto lookup_result_get_mapped(StringHashTableLookupResult<M
|
||||
template <typename SubMaps>
|
||||
class StringHashTable : private boost::noncopyable {
|
||||
protected:
|
||||
static constexpr size_t NUM_MAPS = 5;
|
||||
// Map for storing empty string
|
||||
using T0 = typename SubMaps::T0;
|
||||
|
||||
@ -234,18 +226,17 @@ protected:
|
||||
using T1 = typename SubMaps::T1;
|
||||
using T2 = typename SubMaps::T2;
|
||||
using T3 = typename SubMaps::T3;
|
||||
using T4 = typename SubMaps::T4;
|
||||
|
||||
// Long strings are stored as doris::StringRef along with saved hash
|
||||
using Ts = typename SubMaps::Ts;
|
||||
using Self = StringHashTable;
|
||||
|
||||
template <typename, typename, size_t>
|
||||
friend class TwoLevelStringHashTable;
|
||||
|
||||
T0 m0;
|
||||
T1 m1;
|
||||
T2 m2;
|
||||
T3 m3;
|
||||
T4 m4;
|
||||
Ts ms;
|
||||
|
||||
using Cell = typename Ts::cell_type;
|
||||
@ -259,7 +250,8 @@ protected:
|
||||
typename T1::iterator iterator1;
|
||||
typename T2::iterator iterator2;
|
||||
typename T3::iterator iterator3;
|
||||
typename Ts::iterator iterator4;
|
||||
typename T4::iterator iterator4;
|
||||
typename Ts::iterator iterator5;
|
||||
|
||||
typename Ts::cell_type cell;
|
||||
|
||||
@ -269,39 +261,52 @@ protected:
|
||||
iterator_base() = default;
|
||||
iterator_base(Container* container_, bool end = false) : container(container_) {
|
||||
if (end) {
|
||||
sub_table_index = 4;
|
||||
iterator4 = container->ms.end();
|
||||
sub_table_index = 5;
|
||||
iterator5 = container->ms.end();
|
||||
} else {
|
||||
sub_table_index = 0;
|
||||
if (container->m0.size() == 0)
|
||||
if (container->m0.size() == 0) {
|
||||
sub_table_index++;
|
||||
else
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
iterator1 = container->m1.begin();
|
||||
if (iterator1 == container->m1.end())
|
||||
if (iterator1 == container->m1.end()) {
|
||||
sub_table_index++;
|
||||
else
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
iterator2 = container->m2.begin();
|
||||
if (iterator2 == container->m2.end())
|
||||
if (iterator2 == container->m2.end()) {
|
||||
sub_table_index++;
|
||||
else
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
iterator3 = container->m3.begin();
|
||||
if (iterator3 == container->m3.end())
|
||||
if (iterator3 == container->m3.end()) {
|
||||
sub_table_index++;
|
||||
else
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
iterator4 = container->ms.begin();
|
||||
iterator4 = container->m4.begin();
|
||||
if (iterator4 == container->m4.end()) {
|
||||
sub_table_index++;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
iterator5 = container->ms.begin();
|
||||
}
|
||||
}
|
||||
|
||||
bool operator==(const iterator_base& rhs) const {
|
||||
if (sub_table_index != rhs.sub_table_index) return false;
|
||||
if (sub_table_index != rhs.sub_table_index) {
|
||||
return false;
|
||||
}
|
||||
switch (sub_table_index) {
|
||||
case 0: {
|
||||
return true;
|
||||
@ -318,6 +323,9 @@ protected:
|
||||
case 4: {
|
||||
return iterator4 == rhs.iterator4;
|
||||
}
|
||||
case 5: {
|
||||
return iterator5 == rhs.iterator5;
|
||||
}
|
||||
}
|
||||
LOG(FATAL) << "__builtin_unreachable";
|
||||
__builtin_unreachable();
|
||||
@ -355,6 +363,13 @@ protected:
|
||||
}
|
||||
case 4: {
|
||||
++iterator4;
|
||||
if (iterator4 == container->m4.end()) {
|
||||
need_switch_to_next = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 5: {
|
||||
++iterator5;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -385,7 +400,14 @@ protected:
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
iterator4 = container->ms.begin();
|
||||
iterator4 = container->m4.begin();
|
||||
if (iterator4 == container->m4.end()) {
|
||||
need_switch_to_next = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 5: {
|
||||
iterator5 = container->ms.begin();
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -416,6 +438,10 @@ protected:
|
||||
const_cast<iterator_base*>(this)->cell = *iterator4;
|
||||
break;
|
||||
}
|
||||
case 5: {
|
||||
const_cast<iterator_base*>(this)->cell = *iterator5;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return cell;
|
||||
}
|
||||
@ -438,13 +464,14 @@ protected:
|
||||
return iterator3->get_hash(container->m3);
|
||||
}
|
||||
case 4: {
|
||||
return iterator4->get_hash(container->ms);
|
||||
return iterator4->get_hash(container->m4);
|
||||
}
|
||||
case 5: {
|
||||
return iterator5->get_hash(container->ms);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t get_collision_chain_length() const { return 0; }
|
||||
|
||||
/**
|
||||
* A hack for HashedDictionary.
|
||||
*
|
||||
@ -476,25 +503,11 @@ public:
|
||||
StringHashTable() = default;
|
||||
|
||||
explicit StringHashTable(size_t reserve_for_num_elements)
|
||||
: m1 {reserve_for_num_elements / 4},
|
||||
m2 {reserve_for_num_elements / 4},
|
||||
m3 {reserve_for_num_elements / 4},
|
||||
ms {reserve_for_num_elements / 4} {}
|
||||
|
||||
StringHashTable(StringHashTable&& rhs) noexcept
|
||||
: m1(std::move(rhs.m1)),
|
||||
m2(std::move(rhs.m2)),
|
||||
m3(std::move(rhs.m3)),
|
||||
ms(std::move(rhs.ms)) {}
|
||||
|
||||
StringHashTable& operator=(StringHashTable&& other) {
|
||||
std::swap(m0, other.m0);
|
||||
std::swap(m1, other.m1);
|
||||
std::swap(m2, other.m2);
|
||||
std::swap(m3, other.m3);
|
||||
std::swap(ms, other.ms);
|
||||
return *this;
|
||||
}
|
||||
: m1 {reserve_for_num_elements / 5},
|
||||
m2 {reserve_for_num_elements / 5},
|
||||
m3 {reserve_for_num_elements / 5},
|
||||
m4 {reserve_for_num_elements / 5},
|
||||
ms {reserve_for_num_elements / 5} {}
|
||||
|
||||
~StringHashTable() = default;
|
||||
|
||||
@ -524,24 +537,20 @@ public:
|
||||
return func(self.ms, std::forward<KeyHolder>(key), key, hash_value);
|
||||
}
|
||||
|
||||
switch ((sz - 1) >> 3) {
|
||||
case 0: // 1..8 bytes
|
||||
{
|
||||
return func(self.m1, toStringKey<StringKey8>(key), key, hash_value);
|
||||
if (sz <= sizeof(StringHashMapSubKeys::T1)) {
|
||||
return func(self.m1, to_string_key<StringHashMapSubKeys::T1>(key), key, hash_value);
|
||||
}
|
||||
case 1: // 9..16 bytes
|
||||
{
|
||||
return func(self.m2, toStringKey<StringKey16>(key), key, hash_value);
|
||||
if (sz <= sizeof(StringHashMapSubKeys::T2)) {
|
||||
return func(self.m2, to_string_key<StringHashMapSubKeys::T2>(key), key, hash_value);
|
||||
}
|
||||
case 2: // 17..24 bytes
|
||||
{
|
||||
return func(self.m3, toStringKey<StringKey24>(key), key, hash_value);
|
||||
}
|
||||
default: // >= 25 bytes
|
||||
{
|
||||
return func(self.ms, std::forward<KeyHolder>(key), key, hash_value);
|
||||
if (sz <= sizeof(StringHashMapSubKeys::T3)) {
|
||||
return func(self.m3, to_string_key<StringHashMapSubKeys::T3>(key), key, hash_value);
|
||||
}
|
||||
if (sz <= sizeof(StringHashMapSubKeys::T4)) {
|
||||
return func(self.m4, to_string_key<StringHashMapSubKeys::T4>(key), key, hash_value);
|
||||
}
|
||||
|
||||
return func(self.ms, std::forward<KeyHolder>(key), key, hash_value);
|
||||
}
|
||||
|
||||
struct EmplaceCallable {
|
||||
@ -594,12 +603,14 @@ public:
|
||||
if (!key.size) {
|
||||
return;
|
||||
}
|
||||
if (key.size <= 8) {
|
||||
if (key.size <= sizeof(StringHashMapSubKeys::T1)) {
|
||||
m1.template prefetch<read>(hash_value);
|
||||
} else if (key.size <= 16) {
|
||||
} else if (key.size <= sizeof(StringHashMapSubKeys::T2)) {
|
||||
m2.template prefetch<read>(hash_value);
|
||||
} else if (key.size <= 24) {
|
||||
} else if (key.size <= sizeof(StringHashMapSubKeys::T3)) {
|
||||
m3.template prefetch<read>(hash_value);
|
||||
} else if (key.size <= sizeof(StringHashMapSubKeys::T4)) {
|
||||
m4.template prefetch<read>(hash_value);
|
||||
} else {
|
||||
ms.template prefetch<read>(hash_value);
|
||||
}
|
||||
@ -613,10 +624,11 @@ public:
|
||||
auto ALWAYS_INLINE operator()(Submap& map, const SubmapKey& key, const Origin& origin,
|
||||
size_t hash) {
|
||||
auto it = map.find(key, hash);
|
||||
if (!it)
|
||||
if (!it) {
|
||||
return decltype(&it->get_mapped()) {};
|
||||
else
|
||||
} else {
|
||||
return &it->get_mapped();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -628,14 +640,12 @@ public:
|
||||
return dispatch(*this, x, hash_value, FindCallable {});
|
||||
}
|
||||
|
||||
bool ALWAYS_INLINE has(const Key& x, size_t = 0) const {
|
||||
return dispatch(*this, x, FindCallable {}) != nullptr;
|
||||
size_t size() const {
|
||||
return m0.size() + m1.size() + m2.size() + m3.size() + m4.size() + ms.size();
|
||||
}
|
||||
|
||||
size_t size() const { return m0.size() + m1.size() + m2.size() + m3.size() + ms.size(); }
|
||||
|
||||
bool empty() const {
|
||||
return m0.empty() && m1.empty() && m2.empty() && m3.empty() && ms.empty();
|
||||
return m0.empty() && m1.empty() && m2.empty() && m3.empty() && m4.empty() && ms.empty();
|
||||
}
|
||||
|
||||
size_t get_buffer_size_in_bytes() const {
|
||||
@ -666,6 +676,7 @@ public:
|
||||
|
||||
bool add_elem_size_overflow(size_t add_size) const {
|
||||
return m1.add_elem_size_overflow(add_size) || m2.add_elem_size_overflow(add_size) ||
|
||||
m3.add_elem_size_overflow(add_size) || ms.add_elem_size_overflow(add_size);
|
||||
m3.add_elem_size_overflow(add_size) || m4.add_elem_size_overflow(add_size) ||
|
||||
ms.add_elem_size_overflow(add_size);
|
||||
}
|
||||
};
|
||||
|
||||
@ -20,8 +20,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#if defined(__SSE2__) || defined(__aarch64__)
|
||||
#include "util/sse_util.hpp"
|
||||
|
||||
@ -91,3 +94,32 @@ void memcpy_fixed(char* lhs, const char* rhs) {
|
||||
memcpy(lhs, rhs, sizeof(T));
|
||||
}
|
||||
}
|
||||
|
||||
template <int max_size>
|
||||
inline void memcpy_small(char* lhs, const char* rhs, size_t n) {
|
||||
DCHECK_NE(n, 0);
|
||||
if constexpr (max_size >= 4) {
|
||||
if (n >= 4) {
|
||||
memcpy_fixed<uint32_t>(lhs, rhs);
|
||||
lhs += 4;
|
||||
rhs += 4;
|
||||
n -= 4;
|
||||
}
|
||||
}
|
||||
while (n >= 1) {
|
||||
memcpy_fixed<uint8_t>(lhs, rhs);
|
||||
lhs++;
|
||||
rhs++;
|
||||
n--;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void memcpy_small<2>(char* lhs, const char* rhs, size_t n) {
|
||||
DCHECK_NE(n, 0);
|
||||
if (n == 2) {
|
||||
memcpy_fixed<uint16_t>(lhs, rhs);
|
||||
} else {
|
||||
memcpy_fixed<uint8_t>(lhs, rhs);
|
||||
}
|
||||
}
|
||||
@ -95,12 +95,12 @@ void DistinctAggregationNode::_emplace_into_hash_table_to_distinct(IColumn::Sele
|
||||
auto creator = [&](const auto& ctor, auto& key, auto& origin) {
|
||||
HashMethodType::try_presis_key(key, origin, *_agg_arena_pool);
|
||||
ctor(key, dummy_mapped_data);
|
||||
distinct_row.push_back(row);
|
||||
distinct_row.push_back_without_reserve(row);
|
||||
};
|
||||
|
||||
auto creator_for_null_key = [&](auto& mapped) {
|
||||
mapped = dummy_mapped_data;
|
||||
distinct_row.push_back(row);
|
||||
distinct_row.push_back_without_reserve(row);
|
||||
};
|
||||
|
||||
SCOPED_TIMER(_hash_table_emplace_timer);
|
||||
|
||||
@ -102,26 +102,10 @@ static constexpr int STREAMING_HT_MIN_REDUCTION_SIZE =
|
||||
AggregationNode::AggregationNode(ObjectPool* pool, const TPlanNode& tnode,
|
||||
const DescriptorTbl& descs)
|
||||
: ExecNode(pool, tnode, descs),
|
||||
_hash_table_compute_timer(nullptr),
|
||||
_hash_table_input_counter(nullptr),
|
||||
_expr_timer(nullptr),
|
||||
_intermediate_tuple_id(tnode.agg_node.intermediate_tuple_id),
|
||||
_intermediate_tuple_desc(nullptr),
|
||||
_output_tuple_id(tnode.agg_node.output_tuple_id),
|
||||
_output_tuple_desc(nullptr),
|
||||
_needs_finalize(tnode.agg_node.need_finalize),
|
||||
_is_merge(false),
|
||||
_serialize_key_timer(nullptr),
|
||||
_merge_timer(nullptr),
|
||||
_get_results_timer(nullptr),
|
||||
_serialize_data_timer(nullptr),
|
||||
_serialize_result_timer(nullptr),
|
||||
_deserialize_data_timer(nullptr),
|
||||
_hash_table_iterate_timer(nullptr),
|
||||
_insert_keys_to_column_timer(nullptr),
|
||||
_streaming_agg_timer(nullptr),
|
||||
_hash_table_size_counter(nullptr),
|
||||
_max_row_size_counter(nullptr) {
|
||||
_is_merge(false) {
|
||||
if (tnode.agg_node.__isset.use_streaming_preaggregation) {
|
||||
_is_streaming_preagg = tnode.agg_node.use_streaming_preaggregation;
|
||||
if (_is_streaming_preagg) {
|
||||
@ -593,7 +577,7 @@ Status AggregationNode::_get_without_key_result(RuntimeState* state, Block* bloc
|
||||
}
|
||||
|
||||
for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
|
||||
auto column = columns[i].get();
|
||||
auto* column = columns[i].get();
|
||||
_aggregate_evaluators[i]->insert_result_info(
|
||||
_agg_data->without_key + _offsets_of_aggregate_states[i], column);
|
||||
}
|
||||
@ -816,13 +800,13 @@ Status AggregationNode::_reset_hash_table() {
|
||||
}
|
||||
});
|
||||
|
||||
_aggregate_data_container.reset(new AggregateDataContainer(
|
||||
_aggregate_data_container = std::make_unique<AggregateDataContainer>(
|
||||
sizeof(typename HashTableType::key_type),
|
||||
((_total_size_of_aggregate_states + _align_aggregate_states - 1) /
|
||||
_align_aggregate_states) *
|
||||
_align_aggregate_states));
|
||||
hash_table = HashTableType();
|
||||
_agg_arena_pool.reset(new Arena);
|
||||
_align_aggregate_states);
|
||||
agg_method.hash_table.reset(new HashTableType());
|
||||
_agg_arena_pool = std::make_unique<Arena>();
|
||||
return Status::OK();
|
||||
},
|
||||
_agg_data->method_variant);
|
||||
@ -845,7 +829,7 @@ void AggregationNode::_emplace_into_hash_table(AggregateDataPtr* places, ColumnR
|
||||
|
||||
auto creator = [this](const auto& ctor, auto& key, auto& origin) {
|
||||
try {
|
||||
HashMethodType::try_presis_key(key, origin, *_agg_arena_pool);
|
||||
HashMethodType::try_presis_key_and_origin(key, origin, *_agg_arena_pool);
|
||||
auto mapped = _aggregate_data_container->append_data(origin);
|
||||
auto st = _create_agg_status(mapped);
|
||||
if (!st) {
|
||||
|
||||
@ -116,7 +116,7 @@ void VPartitionSortNode::_emplace_into_hash_table(const ColumnRawPtrs& key_colum
|
||||
|
||||
auto creator = [&](const auto& ctor, auto& key, auto& origin) {
|
||||
HashMethodType::try_presis_key(key, origin, *_agg_arena_pool);
|
||||
auto aggregate_data = _pool->add(new PartitionBlocks());
|
||||
auto* aggregate_data = _pool->add(new PartitionBlocks());
|
||||
_value_places.push_back(aggregate_data);
|
||||
ctor(key, aggregate_data);
|
||||
_num_partition++;
|
||||
|
||||
Reference in New Issue
Block a user