[improve](distinct agg) add check of hash table to decide whether emplace value (#32063)
* [improve](distinct agg) add check of hash table to emplace value
This commit is contained in:
@ -39,6 +39,7 @@ Status DistinctAggregationNode::_distinct_pre_agg_with_serialized_key(
|
||||
|
||||
size_t key_size = _probe_expr_ctxs.size();
|
||||
ColumnRawPtrs key_columns(key_size);
|
||||
std::vector<int> result_idxs(key_size);
|
||||
{
|
||||
SCOPED_TIMER(_expr_timer);
|
||||
for (size_t i = 0; i < key_size; ++i) {
|
||||
@ -48,6 +49,7 @@ Status DistinctAggregationNode::_distinct_pre_agg_with_serialized_key(
|
||||
in_block->get_by_position(result_column_id)
|
||||
.column->convert_to_full_column_if_const();
|
||||
key_columns[i] = in_block->get_by_position(result_column_id).column.get();
|
||||
result_idxs[i] = result_column_id;
|
||||
}
|
||||
}
|
||||
|
||||
@ -57,24 +59,40 @@ Status DistinctAggregationNode::_distinct_pre_agg_with_serialized_key(
|
||||
|
||||
RETURN_IF_CATCH_EXCEPTION(
|
||||
_emplace_into_hash_table_to_distinct(_distinct_row, key_columns, rows));
|
||||
|
||||
// if get stop_emplace_flag = true, means have no need to emplace value into hash table
|
||||
// so return block directly and notice the column ref is 2, need deal with.
|
||||
SCOPED_TIMER(_insert_keys_to_column_timer);
|
||||
bool mem_reuse = _make_nullable_keys.empty() && out_block->mem_reuse();
|
||||
if (mem_reuse) {
|
||||
for (int i = 0; i < key_size; ++i) {
|
||||
auto dst = out_block->get_by_position(i).column->assume_mutable();
|
||||
key_columns[i]->append_data_by_selector(dst, _distinct_row);
|
||||
auto output_column = out_block->get_by_position(i).column;
|
||||
if (_stop_emplace_flag) { // swap the column directly, to solve Check failed: d.column->use_count() == 1 (2 vs. 1)
|
||||
out_block->replace_by_position(i, key_columns[i]->assume_mutable());
|
||||
in_block->replace_by_position(result_idxs[i], output_column);
|
||||
} else {
|
||||
auto dst = output_column->assume_mutable();
|
||||
key_columns[i]->append_data_by_selector(dst, _distinct_row);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ColumnsWithTypeAndName columns_with_schema;
|
||||
for (int i = 0; i < key_size; ++i) {
|
||||
auto distinct_column = key_columns[i]->clone_empty();
|
||||
key_columns[i]->append_data_by_selector(distinct_column, _distinct_row);
|
||||
columns_with_schema.emplace_back(std::move(distinct_column),
|
||||
_probe_expr_ctxs[i]->root()->data_type(),
|
||||
_probe_expr_ctxs[i]->root()->expr_name());
|
||||
if (_stop_emplace_flag) {
|
||||
columns_with_schema.emplace_back(key_columns[i]->assume_mutable(),
|
||||
_probe_expr_ctxs[i]->root()->data_type(),
|
||||
_probe_expr_ctxs[i]->root()->expr_name());
|
||||
} else {
|
||||
auto distinct_column = key_columns[i]->clone_empty();
|
||||
key_columns[i]->append_data_by_selector(distinct_column, _distinct_row);
|
||||
columns_with_schema.emplace_back(std::move(distinct_column),
|
||||
_probe_expr_ctxs[i]->root()->data_type(),
|
||||
_probe_expr_ctxs[i]->root()->expr_name());
|
||||
}
|
||||
}
|
||||
out_block->swap(Block(columns_with_schema));
|
||||
if (_stop_emplace_flag) {
|
||||
in_block->clear(); // clear the column ref with stop_emplace_flag = true
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
@ -88,6 +106,13 @@ void DistinctAggregationNode::_emplace_into_hash_table_to_distinct(IColumn::Sele
|
||||
SCOPED_TIMER(_hash_table_compute_timer);
|
||||
using HashMethodType = std::decay_t<decltype(agg_method)>;
|
||||
using AggState = typename HashMethodType::State;
|
||||
auto& hash_tbl = *agg_method.hash_table;
|
||||
if (is_streaming_preagg() && hash_tbl.add_elem_size_overflow(num_rows)) {
|
||||
if (!_should_expand_preagg_hash_tables()) {
|
||||
_stop_emplace_flag = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
AggState state(key_columns);
|
||||
agg_method.init_serialized_keys(key_columns, num_rows);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user