[improvement](agg) iterate aggregation data in memory written order (#12704)
Following the iteration order of the hash table will result in out-of-order access to aggregate states, which is very inefficient. Traversing aggregate states in memory write order can significantly improve memory read efficiency. Test hash table items count: 3.35M Before this optimization: insert keys into column takes 500ms With this optimization only takes 80ms
This commit is contained in:
@ -257,6 +257,37 @@ struct HashMethodSingleLowNullableColumn : public SingleColumnMethod {
|
||||
return EmplaceResult(inserted);
|
||||
}
|
||||
|
||||
template <typename Data, typename Func, typename CreatorForNull>
|
||||
ALWAYS_INLINE typename std::enable_if_t<has_mapped, Mapped>& lazy_emplace_key(
|
||||
Data& data, size_t row, Arena& pool, Func&& f, CreatorForNull&& null_creator) {
|
||||
if (key_columns[0]->is_null_at(row)) {
|
||||
bool has_null_key = data.has_null_key_data();
|
||||
data.has_null_key_data() = true;
|
||||
if (!has_null_key) std::forward<CreatorForNull>(null_creator)(data.get_null_key_data());
|
||||
return data.get_null_key_data();
|
||||
}
|
||||
auto key_holder = Base::get_key_holder(row, pool);
|
||||
typename Data::LookupResult it;
|
||||
data.lazy_emplace(key_holder, it, std::forward<Func>(f));
|
||||
return *lookup_result_get_mapped(it);
|
||||
}
|
||||
|
||||
template <typename Data, typename Func, typename CreatorForNull>
|
||||
ALWAYS_INLINE typename std::enable_if_t<has_mapped, Mapped>& lazy_emplace_key(
|
||||
Data& data, size_t row, Arena& pool, size_t hash_value, Func&& f,
|
||||
CreatorForNull&& null_creator) {
|
||||
if (key_columns[0]->is_null_at(row)) {
|
||||
bool has_null_key = data.has_null_key_data();
|
||||
data.has_null_key_data() = true;
|
||||
if (!has_null_key) std::forward<CreatorForNull>(null_creator)(data.get_null_key_data());
|
||||
return data.get_null_key_data();
|
||||
}
|
||||
auto key_holder = Base::get_key_holder(row, pool);
|
||||
typename Data::LookupResult it;
|
||||
data.lazy_emplace(key_holder, it, hash_value, std::forward<Func>(f));
|
||||
return *lookup_result_get_mapped(it);
|
||||
}
|
||||
|
||||
template <typename Data>
|
||||
ALWAYS_INLINE FindResult find_key(Data& data, size_t row, Arena& pool) {
|
||||
if (key_columns[0]->is_null_at(row)) {
|
||||
@ -276,5 +307,16 @@ struct HashMethodSingleLowNullableColumn : public SingleColumnMethod {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename HashMethod>
|
||||
struct IsSingleNullableColumnMethod {
|
||||
static constexpr bool value = false;
|
||||
};
|
||||
|
||||
template <typename SingleColumnMethod, typename Mapped, bool use_cache>
|
||||
struct IsSingleNullableColumnMethod<
|
||||
HashMethodSingleLowNullableColumn<SingleColumnMethod, Mapped, use_cache>> {
|
||||
static constexpr bool value = true;
|
||||
};
|
||||
|
||||
} // namespace ColumnsHashing
|
||||
} // namespace doris::vectorized
|
||||
|
||||
@ -140,6 +140,22 @@ public:
|
||||
return emplaceImpl(key_holder, hash_value, data);
|
||||
}
|
||||
|
||||
template <typename Data, typename Func>
|
||||
ALWAYS_INLINE typename std::enable_if_t<has_mapped, Mapped>& lazy_emplace_key(Data& data,
|
||||
size_t row,
|
||||
Arena& pool,
|
||||
Func&& f) {
|
||||
auto key_holder = static_cast<Derived&>(*this).get_key_holder(row, pool);
|
||||
return lazy_emplace_impl(key_holder, data, std::forward<Func>(f));
|
||||
}
|
||||
|
||||
template <typename Data, typename Func>
|
||||
ALWAYS_INLINE typename std::enable_if_t<has_mapped, Mapped>& lazy_emplace_key(
|
||||
Data& data, size_t hash_value, size_t row, Arena& pool, Func&& f) {
|
||||
auto key_holder = static_cast<Derived&>(*this).get_key_holder(row, pool);
|
||||
return lazy_emplace_impl(key_holder, hash_value, data, std::forward<Func>(f));
|
||||
}
|
||||
|
||||
template <typename Data>
|
||||
ALWAYS_INLINE FindResult find_key(Data& data, size_t row, Arena& pool) {
|
||||
auto key_holder = static_cast<Derived&>(*this).get_key_holder(row, pool);
|
||||
@ -264,6 +280,23 @@ protected:
|
||||
return EmplaceResult(inserted);
|
||||
}
|
||||
|
||||
template <typename Data, typename KeyHolder, typename Func>
|
||||
ALWAYS_INLINE typename std::enable_if_t<has_mapped, Mapped>& lazy_emplace_impl(
|
||||
KeyHolder& key_holder, Data& data, Func&& f) {
|
||||
typename Data::LookupResult it;
|
||||
data.lazy_emplace(key_holder, it, std::forward<Func>(f));
|
||||
return *lookup_result_get_mapped(it);
|
||||
}
|
||||
|
||||
template <typename Data, typename KeyHolder, typename Func>
|
||||
ALWAYS_INLINE typename std::enable_if_t<has_mapped, Mapped>& lazy_emplace_impl(
|
||||
KeyHolder& key_holder, size_t hash_value, Data& data, Func&& f) {
|
||||
typename Data::LookupResult it;
|
||||
data.lazy_emplace(key_holder, it, hash_value, std::forward<Func>(f));
|
||||
|
||||
return *lookup_result_get_mapped(it);
|
||||
}
|
||||
|
||||
template <typename Data, typename Key>
|
||||
ALWAYS_INLINE FindResult find_key_impl(Key key, Data& data) {
|
||||
if constexpr (Cache::consecutive_keys_optimization) {
|
||||
|
||||
@ -78,6 +78,7 @@ struct FixedHashMapImplicitZeroCell {
|
||||
|
||||
FixedHashMapImplicitZeroCell() {}
|
||||
FixedHashMapImplicitZeroCell(const Key&, const State&) {}
|
||||
FixedHashMapImplicitZeroCell(const Key&, const Mapped& mapped_) : mapped(mapped_) {}
|
||||
FixedHashMapImplicitZeroCell(const value_type& value_, const State&) : mapped(value_.second) {}
|
||||
|
||||
const VoidKey get_first() const { return {}; }
|
||||
|
||||
@ -269,6 +269,31 @@ public:
|
||||
this->increase_size();
|
||||
}
|
||||
|
||||
class Constructor {
|
||||
public:
|
||||
friend class FixedHashTable;
|
||||
template <typename... Args>
|
||||
void operator()(Args&&... args) const {
|
||||
new (_cell) Cell(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
private:
|
||||
Constructor(Cell* cell) : _cell(cell) {}
|
||||
Cell* _cell;
|
||||
};
|
||||
|
||||
template <typename Func>
|
||||
void ALWAYS_INLINE lazy_emplace(const Key& x, LookupResult& it, Func&& f) {
|
||||
it = &buf[x];
|
||||
|
||||
if (!buf[x].is_zero(*this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
f(Constructor(&buf[x]), x);
|
||||
this->increase_size();
|
||||
}
|
||||
|
||||
std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type& x) {
|
||||
std::pair<LookupResult, bool> res;
|
||||
emplace(Cell::get_key(x), res.first, res.second);
|
||||
|
||||
@ -59,6 +59,7 @@ struct HashMapCell {
|
||||
|
||||
HashMapCell() {}
|
||||
HashMapCell(const Key& key_, const State&) : value(key_, NoInitTag()) {}
|
||||
HashMapCell(const Key& key_, const Mapped& mapped_) : value(key_, mapped_) {}
|
||||
HashMapCell(const value_type& value_, const State&) : value(value_) {}
|
||||
|
||||
const Key& get_first() const { return value.first; }
|
||||
|
||||
@ -765,6 +765,27 @@ protected:
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
bool ALWAYS_INLINE lazy_emplace_if_zero(const Key& x, LookupResult& it, size_t hash_value,
|
||||
Func&& f) {
|
||||
/// If it is claimed that the zero key can not be inserted into the table.
|
||||
if (!Cell::need_zero_value_storage) return false;
|
||||
|
||||
if (Cell::is_zero(x, *this)) {
|
||||
it = this->zero_value();
|
||||
if (!this->get_has_zero()) {
|
||||
++m_size;
|
||||
this->set_get_has_zero();
|
||||
std::forward<Func>(f)(Constructor(it), x);
|
||||
this->zero_value()->set_hash(hash_value);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename KeyHolder>
|
||||
void ALWAYS_INLINE emplace_non_zero_impl(size_t place_value, KeyHolder&& key_holder,
|
||||
LookupResult& it, bool& inserted, size_t hash_value) {
|
||||
@ -804,6 +825,43 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
template <typename KeyHolder, typename Func>
|
||||
void ALWAYS_INLINE lazy_emplace_non_zero_impl(size_t place_value, KeyHolder&& key_holder,
|
||||
LookupResult& it, size_t hash_value, Func&& f) {
|
||||
it = &buf[place_value];
|
||||
|
||||
if (!buf[place_value].is_zero(*this)) {
|
||||
key_holder_discard_key(key_holder);
|
||||
return;
|
||||
}
|
||||
|
||||
key_holder_persist_key(key_holder);
|
||||
const auto& key = key_holder_get_key(key_holder);
|
||||
|
||||
f(Constructor(&buf[place_value]), key);
|
||||
buf[place_value].set_hash(hash_value);
|
||||
++m_size;
|
||||
|
||||
if (UNLIKELY(grower.overflow(m_size))) {
|
||||
try {
|
||||
resize();
|
||||
} catch (...) {
|
||||
/** If we have not resized successfully, then there will be problems.
|
||||
* There remains a key, but uninitialized mapped-value,
|
||||
* which, perhaps, can not even be called a destructor.
|
||||
*/
|
||||
--m_size;
|
||||
buf[place_value].set_zero();
|
||||
throw;
|
||||
}
|
||||
|
||||
// The hash table was rehashed, so we have to re-find the key.
|
||||
size_t new_place = find_cell(key, hash_value, grower.place(hash_value));
|
||||
assert(!buf[new_place].is_zero(*this));
|
||||
it = &buf[new_place];
|
||||
}
|
||||
}
|
||||
|
||||
/// Only for non-zero keys. Find the right place, insert the key there, if it does not already exist. Set iterator to the cell in output parameter.
|
||||
template <typename KeyHolder>
|
||||
void ALWAYS_INLINE emplace_non_zero(KeyHolder&& key_holder, LookupResult& it, bool& inserted,
|
||||
@ -813,6 +871,14 @@ protected:
|
||||
emplace_non_zero_impl(place_value, key_holder, it, inserted, hash_value);
|
||||
}
|
||||
|
||||
template <typename KeyHolder, typename Func>
|
||||
void ALWAYS_INLINE lazy_emplace_non_zero(KeyHolder&& key_holder, LookupResult& it,
|
||||
size_t hash_value, Func&& f) {
|
||||
const auto& key = key_holder_get_key(key_holder);
|
||||
size_t place_value = find_cell(key, hash_value, grower.place(hash_value));
|
||||
lazy_emplace_non_zero_impl(place_value, key_holder, it, hash_value, std::forward<Func>(f));
|
||||
}
|
||||
|
||||
public:
|
||||
void expanse_for_add_elem(size_t num_elem) {
|
||||
if (add_elem_size_overflow(num_elem)) {
|
||||
@ -847,6 +913,19 @@ public:
|
||||
reinsert(*it.get_ptr(), hash_value);
|
||||
}
|
||||
|
||||
class Constructor {
|
||||
public:
|
||||
friend class HashTable;
|
||||
template <typename... Args>
|
||||
void operator()(Args&&... args) const {
|
||||
new (_cell) Cell(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
private:
|
||||
Constructor(Cell* cell) : _cell(cell) {}
|
||||
Cell* _cell;
|
||||
};
|
||||
|
||||
/** Insert the key.
|
||||
* Return values:
|
||||
* 'it' -- a LookupResult pointing to the corresponding key/mapped pair.
|
||||
@ -877,6 +956,20 @@ public:
|
||||
emplace_non_zero(key_holder, it, inserted, hash_value);
|
||||
}
|
||||
|
||||
template <typename KeyHolder, typename Func>
|
||||
void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, Func&& f) {
|
||||
const auto& key = key_holder_get_key(key_holder);
|
||||
lazy_emplace(key_holder, it, hash(key), std::forward<Func>(f));
|
||||
}
|
||||
|
||||
template <typename KeyHolder, typename Func>
|
||||
void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, size_t hash_value,
|
||||
Func&& f) {
|
||||
const auto& key = key_holder_get_key(key_holder);
|
||||
if (!lazy_emplace_if_zero(key, it, hash_value, std::forward<Func>(f)))
|
||||
lazy_emplace_non_zero(key_holder, it, hash_value, std::forward<Func>(f));
|
||||
}
|
||||
|
||||
/// Copy the cell from another hash table. It is assumed that the cell is not zero, and also that there was no such key in the table yet.
|
||||
void ALWAYS_INLINE insert_unique_non_zero(const Cell* cell, size_t hash_value) {
|
||||
size_t place_value = find_empty_cell(grower.place(hash_value));
|
||||
|
||||
@ -22,4 +22,5 @@ template <typename T>
|
||||
struct HashTableTraits {
|
||||
static constexpr bool is_phmap = false;
|
||||
static constexpr bool is_parallel_phmap = false;
|
||||
static constexpr bool is_string_hash_table = false;
|
||||
};
|
||||
|
||||
@ -118,6 +118,16 @@ public:
|
||||
it = &*it_;
|
||||
}
|
||||
|
||||
template <typename KeyHolder, typename Func>
|
||||
void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, Func&& f) {
|
||||
const auto& key = key_holder_get_key(key_holder);
|
||||
auto it_ = _hash_map.lazy_emplace(key, [&](const auto& ctor) {
|
||||
key_holder_persist_key(key_holder);
|
||||
f(ctor, key);
|
||||
});
|
||||
it = &*it_;
|
||||
}
|
||||
|
||||
template <typename KeyHolder>
|
||||
void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, size_t hash_value,
|
||||
bool& inserted) {
|
||||
@ -140,6 +150,25 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
template <typename KeyHolder, typename Func>
|
||||
void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, size_t hash_value,
|
||||
Func&& f) {
|
||||
const auto& key = key_holder_get_key(key_holder);
|
||||
if constexpr (use_parallel) {
|
||||
auto it_ = _hash_map.lazy_emplace_with_hash(hash_value, key, [&](const auto& ctor) {
|
||||
key_holder_persist_key(key_holder);
|
||||
f(ctor, key);
|
||||
});
|
||||
it = &*it_;
|
||||
} else {
|
||||
auto it_ = _hash_map.lazy_emplace_with_hash(key, hash_value, [&](const auto& ctor) {
|
||||
key_holder_persist_key(key_holder);
|
||||
f(ctor, key);
|
||||
});
|
||||
it = &*it_;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename KeyHolder>
|
||||
LookupResult ALWAYS_INLINE find(KeyHolder&& key_holder) {
|
||||
const auto& key = key_holder_get_key(key_holder);
|
||||
@ -197,4 +226,5 @@ template <typename Key, typename Mapped, typename Hash, bool use_parallel>
|
||||
struct HashTableTraits<PHHashMap<Key, Mapped, Hash, use_parallel>> {
|
||||
static constexpr bool is_phmap = true;
|
||||
static constexpr bool is_parallel_phmap = use_parallel;
|
||||
static constexpr bool is_string_hash_table = false;
|
||||
};
|
||||
|
||||
@ -209,3 +209,17 @@ public:
|
||||
char* get_null_key_data() { return nullptr; }
|
||||
bool has_null_key_data() const { return false; }
|
||||
};
|
||||
|
||||
template <typename TMapped, typename Allocator>
|
||||
struct HashTableTraits<StringHashMap<TMapped, Allocator>> {
|
||||
static constexpr bool is_phmap = false;
|
||||
static constexpr bool is_parallel_phmap = false;
|
||||
static constexpr bool is_string_hash_table = true;
|
||||
};
|
||||
|
||||
template <template <typename> class Derived, typename TMapped, typename Allocator>
|
||||
struct HashTableTraits<Derived<StringHashMap<TMapped, Allocator>>> {
|
||||
static constexpr bool is_phmap = false;
|
||||
static constexpr bool is_parallel_phmap = false;
|
||||
static constexpr bool is_string_hash_table = true;
|
||||
};
|
||||
|
||||
@ -130,6 +130,30 @@ public:
|
||||
it = zero_value();
|
||||
}
|
||||
|
||||
class Constructor {
|
||||
public:
|
||||
friend struct StringHashTableEmpty;
|
||||
template <typename... Args>
|
||||
void operator()(Args&&... args) const {
|
||||
new (_cell) Cell(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
private:
|
||||
Constructor(Cell* cell) : _cell(cell) {}
|
||||
Cell* _cell;
|
||||
};
|
||||
|
||||
template <typename KeyHolder, typename Func>
|
||||
void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, size_t hash_value,
|
||||
Func&& f) {
|
||||
if (!has_zero()) {
|
||||
const auto& key = key_holder_get_key(key_holder);
|
||||
set_has_zero(key);
|
||||
std::forward<Func>(f)(Constructor(zero_value()), key);
|
||||
}
|
||||
it = zero_value();
|
||||
}
|
||||
|
||||
template <typename Key>
|
||||
LookupResult ALWAYS_INLINE find(const Key&, size_t = 0) {
|
||||
return has_zero() ? zero_value() : nullptr;
|
||||
@ -179,7 +203,7 @@ struct StringHashTableLookupResult {
|
||||
};
|
||||
|
||||
template <typename Mapped>
|
||||
ALWAYS_INLINE inline auto lookup_result_get_mapped(StringHashTableLookupResult<Mapped*> cell) {
|
||||
ALWAYS_INLINE inline auto lookup_result_get_mapped(StringHashTableLookupResult<Mapped> cell) {
|
||||
return &cell.get_mapped();
|
||||
}
|
||||
|
||||
@ -548,6 +572,27 @@ public:
|
||||
this->dispatch(*this, key_holder, EmplaceCallable(it, inserted));
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
struct LazyEmplaceCallable {
|
||||
LookupResult& mapped;
|
||||
Func&& f;
|
||||
|
||||
LazyEmplaceCallable(LookupResult& mapped_, Func&& f_)
|
||||
: mapped(mapped_), f(std::forward<Func>(f_)) {}
|
||||
|
||||
template <typename Map, typename KeyHolder>
|
||||
void ALWAYS_INLINE operator()(Map& map, KeyHolder&& key_holder, size_t hash) {
|
||||
typename Map::LookupResult result;
|
||||
map.lazy_emplace(key_holder, result, hash, std::forward<Func>(f));
|
||||
mapped = &result->get_second();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename KeyHolder, typename Func>
|
||||
void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, Func&& f) {
|
||||
this->dispatch(*this, key_holder, LazyEmplaceCallable<Func>(it, std::forward<Func>(f)));
|
||||
}
|
||||
|
||||
struct FindCallable {
|
||||
// find() doesn't need any key memory management, so we don't work with
|
||||
// any key holders here, only with normal keys. The key type is still
|
||||
@ -613,3 +658,10 @@ public:
|
||||
size_t get_collisions() const { return 0; }
|
||||
#endif
|
||||
};
|
||||
|
||||
template <typename SubMaps>
|
||||
struct HashTableTraits<StringHashTable<SubMaps>> {
|
||||
static constexpr bool is_phmap = false;
|
||||
static constexpr bool is_parallel_phmap = false;
|
||||
static constexpr bool is_string_hash_table = true;
|
||||
};
|
||||
|
||||
@ -97,6 +97,8 @@ AggregationNode::AggregationNode(ObjectPool* pool, const TPlanNode& tnode,
|
||||
_serialize_result_timer(nullptr),
|
||||
_deserialize_data_timer(nullptr),
|
||||
_hash_table_compute_timer(nullptr),
|
||||
_hash_table_iterate_timer(nullptr),
|
||||
_insert_keys_to_column_timer(nullptr),
|
||||
_streaming_agg_timer(nullptr),
|
||||
_hash_table_size_counter(nullptr),
|
||||
_hash_table_input_counter(nullptr) {
|
||||
@ -295,6 +297,8 @@ Status AggregationNode::prepare(RuntimeState* state) {
|
||||
_serialize_result_timer = ADD_TIMER(runtime_profile(), "SerializeResultTime");
|
||||
_deserialize_data_timer = ADD_TIMER(runtime_profile(), "DeserializeDataTime");
|
||||
_hash_table_compute_timer = ADD_TIMER(runtime_profile(), "HashTableComputeTime");
|
||||
_hash_table_iterate_timer = ADD_TIMER(runtime_profile(), "HashTableIterateTime");
|
||||
_insert_keys_to_column_timer = ADD_TIMER(runtime_profile(), "InsertKeysToColumnTime");
|
||||
_streaming_agg_timer = ADD_TIMER(runtime_profile(), "StreamingAggTime");
|
||||
_hash_table_size_counter = ADD_COUNTER(runtime_profile(), "HashTableSize", TUnit::UNIT);
|
||||
_hash_table_input_counter = ADD_COUNTER(runtime_profile(), "HashTableInputCount", TUnit::UNIT);
|
||||
@ -384,6 +388,20 @@ Status AggregationNode::prepare(RuntimeState* state) {
|
||||
_executor.close = std::bind<void>(&AggregationNode::_close_without_key, this);
|
||||
} else {
|
||||
_init_hash_method(_probe_expr_ctxs);
|
||||
|
||||
std::visit(
|
||||
[&](auto&& agg_method) {
|
||||
using HashTableType = std::decay_t<decltype(agg_method.data)>;
|
||||
using KeyType = typename HashTableType::key_type;
|
||||
|
||||
/// some aggregate functions (like AVG for decimal) have align issues.
|
||||
_aggregate_data_container.reset(new AggregateDataContainer(
|
||||
sizeof(KeyType),
|
||||
((_total_size_of_aggregate_states + _align_aggregate_states - 1) /
|
||||
_align_aggregate_states) *
|
||||
_align_aggregate_states));
|
||||
},
|
||||
_agg_data._aggregated_method_variant);
|
||||
if (_is_merge) {
|
||||
_executor.execute = std::bind<Status>(&AggregationNode::_merge_with_serialized_key,
|
||||
this, std::placeholders::_1);
|
||||
@ -787,43 +805,64 @@ void AggregationNode::_emplace_into_hash_table(AggregateDataPtr* places, ColumnR
|
||||
}
|
||||
}
|
||||
|
||||
auto creator = [this](const auto& ctor, const auto& key) {
|
||||
using KeyType = std::decay_t<decltype(key)>;
|
||||
if constexpr (HashTableTraits<HashTableType>::is_string_hash_table &&
|
||||
!std::is_same_v<StringRef, KeyType>) {
|
||||
StringRef string_ref = to_string_ref(key);
|
||||
ArenaKeyHolder key_holder {string_ref, _agg_arena_pool};
|
||||
key_holder_persist_key(key_holder);
|
||||
auto mapped = _aggregate_data_container->append_data(key_holder.key);
|
||||
_create_agg_status(mapped);
|
||||
ctor(key, mapped);
|
||||
} else {
|
||||
auto mapped = _aggregate_data_container->append_data(key);
|
||||
_create_agg_status(mapped);
|
||||
ctor(key, mapped);
|
||||
}
|
||||
};
|
||||
|
||||
auto creator_for_null_key = [this](auto& mapped) {
|
||||
mapped = _agg_arena_pool.aligned_alloc(_total_size_of_aggregate_states,
|
||||
_align_aggregate_states);
|
||||
_create_agg_status(mapped);
|
||||
};
|
||||
|
||||
/// For all rows.
|
||||
COUNTER_UPDATE(_hash_table_input_counter, num_rows);
|
||||
for (size_t i = 0; i < num_rows; ++i) {
|
||||
AggregateDataPtr aggregate_data = nullptr;
|
||||
|
||||
auto emplace_result = [&]() {
|
||||
if constexpr (HashTableTraits<HashTableType>::is_phmap) {
|
||||
if (LIKELY(i + HASH_MAP_PREFETCH_DIST < num_rows)) {
|
||||
if constexpr (HashTableTraits<HashTableType>::is_parallel_phmap) {
|
||||
agg_method.data.prefetch_by_key(state.get_key_holder(
|
||||
i + HASH_MAP_PREFETCH_DIST, _agg_arena_pool));
|
||||
} else
|
||||
agg_method.data.prefetch_by_hash(
|
||||
_hash_values[i + HASH_MAP_PREFETCH_DIST]);
|
||||
}
|
||||
|
||||
return state.emplace_key(agg_method.data, _hash_values[i], i,
|
||||
_agg_arena_pool);
|
||||
} else {
|
||||
return state.emplace_key(agg_method.data, i, _agg_arena_pool);
|
||||
AggregateDataPtr mapped = nullptr;
|
||||
if constexpr (HashTableTraits<HashTableType>::is_phmap) {
|
||||
if (LIKELY(i + HASH_MAP_PREFETCH_DIST < num_rows)) {
|
||||
if constexpr (HashTableTraits<HashTableType>::is_parallel_phmap) {
|
||||
agg_method.data.prefetch_by_key(state.get_key_holder(
|
||||
i + HASH_MAP_PREFETCH_DIST, _agg_arena_pool));
|
||||
} else
|
||||
agg_method.data.prefetch_by_hash(
|
||||
_hash_values[i + HASH_MAP_PREFETCH_DIST]);
|
||||
}
|
||||
}();
|
||||
|
||||
/// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key.
|
||||
if (emplace_result.is_inserted()) {
|
||||
/// exception-safety - if you can not allocate memory or create states, then destructors will not be called.
|
||||
emplace_result.set_mapped(nullptr);
|
||||
if constexpr (ColumnsHashing::IsSingleNullableColumnMethod<
|
||||
AggState>::value) {
|
||||
mapped = state.lazy_emplace_key(agg_method.data, _hash_values[i], i,
|
||||
_agg_arena_pool, creator,
|
||||
creator_for_null_key);
|
||||
} else {
|
||||
mapped = state.lazy_emplace_key(agg_method.data, _hash_values[i], i,
|
||||
_agg_arena_pool, creator);
|
||||
}
|
||||
} else {
|
||||
if constexpr (ColumnsHashing::IsSingleNullableColumnMethod<
|
||||
AggState>::value) {
|
||||
mapped = state.lazy_emplace_key(agg_method.data, i, _agg_arena_pool,
|
||||
creator, creator_for_null_key);
|
||||
} else {
|
||||
mapped = state.lazy_emplace_key(agg_method.data, i, _agg_arena_pool,
|
||||
creator);
|
||||
}
|
||||
}
|
||||
|
||||
aggregate_data = _agg_arena_pool.aligned_alloc(
|
||||
_total_size_of_aggregate_states, _align_aggregate_states);
|
||||
_create_agg_status(aggregate_data);
|
||||
|
||||
emplace_result.set_mapped(aggregate_data);
|
||||
} else
|
||||
aggregate_data = emplace_result.get_mapped();
|
||||
|
||||
places[i] = aggregate_data;
|
||||
places[i] = mapped;
|
||||
assert(places[i] != nullptr);
|
||||
}
|
||||
},
|
||||
@ -1051,24 +1090,33 @@ Status AggregationNode::_get_with_serialized_key_result(RuntimeState* state, Blo
|
||||
std::visit(
|
||||
[&](auto&& agg_method) -> void {
|
||||
auto& data = agg_method.data;
|
||||
auto& iter = agg_method.iterator;
|
||||
agg_method.init_once();
|
||||
const auto size = std::min(data.size(), size_t(state->batch_size()));
|
||||
using KeyType = std::decay_t<decltype(iter->get_first())>;
|
||||
using KeyType = std::decay_t<decltype(agg_method.iterator->get_first())>;
|
||||
std::vector<KeyType> keys(size);
|
||||
if (_values.size() < size) {
|
||||
_values.resize(size);
|
||||
}
|
||||
|
||||
size_t num_rows = 0;
|
||||
while (iter != data.end() && num_rows < state->batch_size()) {
|
||||
keys[num_rows] = iter->get_first();
|
||||
_values[num_rows] = iter->get_second();
|
||||
++iter;
|
||||
++num_rows;
|
||||
_aggregate_data_container->init_once();
|
||||
auto& iter = _aggregate_data_container->iterator;
|
||||
|
||||
{
|
||||
SCOPED_TIMER(_hash_table_iterate_timer);
|
||||
while (iter != _aggregate_data_container->end() &&
|
||||
num_rows < state->batch_size()) {
|
||||
keys[num_rows] = iter.get_key<KeyType>();
|
||||
_values[num_rows] = iter.get_aggregate_data();
|
||||
++iter;
|
||||
++num_rows;
|
||||
}
|
||||
}
|
||||
|
||||
agg_method.insert_keys_into_columns(keys, key_columns, num_rows, _probe_key_sz);
|
||||
{
|
||||
SCOPED_TIMER(_insert_keys_to_column_timer);
|
||||
agg_method.insert_keys_into_columns(keys, key_columns, num_rows, _probe_key_sz);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < _aggregate_evaluators.size(); ++i) {
|
||||
_aggregate_evaluators[i]->insert_result_info_vec(
|
||||
@ -1076,7 +1124,7 @@ Status AggregationNode::_get_with_serialized_key_result(RuntimeState* state, Blo
|
||||
num_rows);
|
||||
}
|
||||
|
||||
if (iter == data.end()) {
|
||||
if (iter == _aggregate_data_container->end()) {
|
||||
if (agg_method.data.has_null_key_data()) {
|
||||
// only one key of group by support wrap null key
|
||||
// here need additional processing logic on the null key / value
|
||||
@ -1137,27 +1185,37 @@ Status AggregationNode::_serialize_with_serialized_key_result(RuntimeState* stat
|
||||
[&](auto&& agg_method) -> void {
|
||||
agg_method.init_once();
|
||||
auto& data = agg_method.data;
|
||||
auto& iter = agg_method.iterator;
|
||||
|
||||
const auto size = std::min(data.size(), size_t(state->batch_size()));
|
||||
using KeyType = std::decay_t<decltype(iter->get_first())>;
|
||||
using KeyType = std::decay_t<decltype(agg_method.iterator->get_first())>;
|
||||
std::vector<KeyType> keys(size);
|
||||
if (_values.size() < size + 1) {
|
||||
_values.resize(size + 1);
|
||||
}
|
||||
|
||||
size_t num_rows = 0;
|
||||
while (iter != data.end() && num_rows < state->batch_size()) {
|
||||
keys[num_rows] = iter->get_first();
|
||||
_values[num_rows] = iter->get_second();
|
||||
++iter;
|
||||
++num_rows;
|
||||
_aggregate_data_container->init_once();
|
||||
auto& iter = _aggregate_data_container->iterator;
|
||||
|
||||
{
|
||||
SCOPED_TIMER(_hash_table_iterate_timer);
|
||||
while (iter != _aggregate_data_container->end() &&
|
||||
num_rows < state->batch_size()) {
|
||||
keys[num_rows] = iter.get_key<KeyType>();
|
||||
_values[num_rows] = iter.get_aggregate_data();
|
||||
++iter;
|
||||
++num_rows;
|
||||
}
|
||||
}
|
||||
|
||||
agg_method.insert_keys_into_columns(keys, key_columns, num_rows, _probe_key_sz);
|
||||
{
|
||||
SCOPED_TIMER(_insert_keys_to_column_timer);
|
||||
agg_method.insert_keys_into_columns(keys, key_columns, num_rows, _probe_key_sz);
|
||||
}
|
||||
|
||||
if (iter == data.end()) {
|
||||
if (iter == _aggregate_data_container->end()) {
|
||||
if (agg_method.data.has_null_key_data()) {
|
||||
// only one key of group by support wrap null key
|
||||
// here need additional processing logic on the null key / value
|
||||
DCHECK(key_columns.size() == 1);
|
||||
DCHECK(key_columns[0]->is_nullable());
|
||||
if (agg_method.data.has_null_key_data()) {
|
||||
|
||||
@ -616,6 +616,125 @@ struct AggregatedDataVariants {
|
||||
|
||||
using AggregatedDataVariantsPtr = std::shared_ptr<AggregatedDataVariants>;
|
||||
|
||||
struct AggregateDataContainer {
|
||||
public:
|
||||
AggregateDataContainer(size_t size_of_key, size_t size_of_aggregate_states)
|
||||
: _size_of_key(size_of_key), _size_of_aggregate_states(size_of_aggregate_states) {
|
||||
_expand();
|
||||
}
|
||||
|
||||
template <typename KeyType>
|
||||
AggregateDataPtr append_data(const KeyType& key) {
|
||||
assert(sizeof(KeyType) == _size_of_key);
|
||||
if (UNLIKELY(_index_in_sub_container == SUB_CONTAINER_CAPACITY)) {
|
||||
_expand();
|
||||
}
|
||||
|
||||
*reinterpret_cast<KeyType*>(_current_keys) = key;
|
||||
auto aggregate_data = _current_agg_data;
|
||||
++_total_count;
|
||||
++_index_in_sub_container;
|
||||
_current_agg_data += _size_of_aggregate_states;
|
||||
_current_keys += _size_of_key;
|
||||
return aggregate_data;
|
||||
}
|
||||
|
||||
template <typename Derived, bool IsConst>
|
||||
class IteratorBase {
|
||||
using Container =
|
||||
std::conditional_t<IsConst, const AggregateDataContainer, AggregateDataContainer>;
|
||||
|
||||
Container* container;
|
||||
uint32_t index;
|
||||
uint32_t sub_container_index;
|
||||
uint32_t index_in_sub_container;
|
||||
|
||||
friend class HashTable;
|
||||
|
||||
public:
|
||||
IteratorBase() {}
|
||||
IteratorBase(Container* container_, uint32_t index_)
|
||||
: container(container_), index(index_) {
|
||||
sub_container_index = index / SUB_CONTAINER_CAPACITY;
|
||||
index_in_sub_container = index % SUB_CONTAINER_CAPACITY;
|
||||
}
|
||||
|
||||
bool operator==(const IteratorBase& rhs) const { return index == rhs.index; }
|
||||
bool operator!=(const IteratorBase& rhs) const { return index != rhs.index; }
|
||||
|
||||
Derived& operator++() {
|
||||
index++;
|
||||
sub_container_index = index / SUB_CONTAINER_CAPACITY;
|
||||
index_in_sub_container = index % SUB_CONTAINER_CAPACITY;
|
||||
return static_cast<Derived&>(*this);
|
||||
}
|
||||
|
||||
template <typename KeyType>
|
||||
KeyType get_key() {
|
||||
assert(sizeof(KeyType) == container->_size_of_key);
|
||||
return ((KeyType*)(container->_key_containers[sub_container_index]))
|
||||
[index_in_sub_container];
|
||||
}
|
||||
|
||||
AggregateDataPtr get_aggregate_data() {
|
||||
return &(container->_value_containers[sub_container_index]
|
||||
[container->_size_of_aggregate_states *
|
||||
index_in_sub_container]);
|
||||
}
|
||||
};
|
||||
|
||||
class Iterator : public IteratorBase<Iterator, false> {
|
||||
public:
|
||||
using IteratorBase<Iterator, false>::IteratorBase;
|
||||
};
|
||||
|
||||
class ConstIterator : public IteratorBase<ConstIterator, true> {
|
||||
public:
|
||||
using IteratorBase<ConstIterator, true>::IteratorBase;
|
||||
};
|
||||
|
||||
ConstIterator begin() const { return ConstIterator(this, 0); }
|
||||
|
||||
ConstIterator cbegin() const { return begin(); }
|
||||
|
||||
Iterator begin() { return Iterator(this, 0); }
|
||||
|
||||
ConstIterator end() const { return ConstIterator(this, _total_count); }
|
||||
ConstIterator cend() const { return end(); }
|
||||
Iterator end() { return Iterator(this, _total_count); }
|
||||
|
||||
void init_once() {
|
||||
if (_inited) return;
|
||||
_inited = true;
|
||||
iterator = begin();
|
||||
}
|
||||
Iterator iterator;
|
||||
|
||||
private:
|
||||
void _expand() {
|
||||
_index_in_sub_container = 0;
|
||||
_current_keys = _arena_pool.alloc(_size_of_key * SUB_CONTAINER_CAPACITY);
|
||||
_key_containers.emplace_back(_current_keys);
|
||||
|
||||
_current_agg_data = (AggregateDataPtr)_arena_pool.alloc(_size_of_aggregate_states *
|
||||
SUB_CONTAINER_CAPACITY);
|
||||
_value_containers.emplace_back(_current_agg_data);
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr uint32_t SUB_CONTAINER_CAPACITY = 8192;
|
||||
Arena _arena_pool;
|
||||
std::vector<char*> _key_containers;
|
||||
std::vector<AggregateDataPtr> _value_containers;
|
||||
AggregateDataPtr _current_agg_data;
|
||||
char* _current_keys;
|
||||
size_t _size_of_key {};
|
||||
size_t _size_of_aggregate_states {};
|
||||
uint32_t _index_in_sub_container {};
|
||||
uint32_t _total_count {};
|
||||
bool _inited = false;
|
||||
};
|
||||
|
||||
// not support spill
|
||||
class AggregationNode : public ::doris::ExecNode {
|
||||
public:
|
||||
@ -675,6 +794,8 @@ private:
|
||||
RuntimeProfile::Counter* _serialize_result_timer;
|
||||
RuntimeProfile::Counter* _deserialize_data_timer;
|
||||
RuntimeProfile::Counter* _hash_table_compute_timer;
|
||||
RuntimeProfile::Counter* _hash_table_iterate_timer;
|
||||
RuntimeProfile::Counter* _insert_keys_to_column_timer;
|
||||
RuntimeProfile::Counter* _streaming_agg_timer;
|
||||
RuntimeProfile::Counter* _hash_table_size_counter;
|
||||
RuntimeProfile::Counter* _hash_table_input_counter;
|
||||
@ -690,6 +811,7 @@ private:
|
||||
std::vector<char> _deserialize_buffer;
|
||||
std::vector<size_t> _hash_values;
|
||||
std::vector<AggregateDataPtr> _values;
|
||||
std::unique_ptr<AggregateDataContainer> _aggregate_data_container;
|
||||
|
||||
private:
|
||||
/// Return true if we should keep expanding hash tables in the preagg. If false,
|
||||
|
||||
Reference in New Issue
Block a user