[Optimize][Set Operation Node] Reduce the memory expansion operation of the hash table in ExceptNode and IntersectNode (#6915)

Reduce the memory expansion operation of the hash table in ExceptNode and IntersectNode
This commit is contained in:
HappenLee
2021-11-12 10:39:59 +08:00
committed by GitHub
parent 35da149ebe
commit 047b83b987
6 changed files with 68 additions and 52 deletions

View File

@ -41,33 +41,17 @@ Status ExceptNode::init(const TPlanNode& tnode, RuntimeState* state) {
Status ExceptNode::open(RuntimeState* state) {
RETURN_IF_ERROR(SetOperationNode::open(state));
// if a table is empty, the result must be empty
if (_hash_tbl->size() == 0) {
_hash_tbl_iterator = _hash_tbl->begin();
return Status::OK();
}
bool eos = false;
_valid_element_in_hash_tbl = _hash_tbl->num_filled_buckets();
for (int i = 1; i < _children.size(); ++i) {
// rebuild hash table, for first time will rebuild with the no duplicated _hash_tbl,
if (i > 1) {
SCOPED_TIMER(_build_timer);
std::unique_ptr<HashTable> temp_tbl(
new HashTable(_child_expr_lists[0], _child_expr_lists[i], _build_tuple_size,
true, _find_nulls, id(), mem_tracker(), 1024));
_hash_tbl_iterator = _hash_tbl->begin();
while (_hash_tbl_iterator.has_next()) {
if (!_hash_tbl_iterator.matched()) {
VLOG_ROW << "rebuild row: "
<< get_row_output_string(_hash_tbl_iterator.get_row(),
child(0)->row_desc());
temp_tbl->insert(_hash_tbl_iterator.get_row());
}
_hash_tbl_iterator.next<false>();
}
_hash_tbl.swap(temp_tbl);
temp_tbl->close();
}
if (i > 1) { refresh_hash_table<false>(i); }
// probe
_probe_batch.reset(
new RowBatch(child(i)->row_desc(), state->batch_size(), mem_tracker().get()));
@ -83,7 +67,10 @@ Status ExceptNode::open(RuntimeState* state) {
<< get_row_output_string(_probe_batch->get_row(j), child(i)->row_desc());
_hash_tbl_iterator = _hash_tbl->find(_probe_batch->get_row(j));
if (_hash_tbl_iterator != _hash_tbl->end()) {
_hash_tbl_iterator.set_matched();
if (!_hash_tbl_iterator.matched()) {
_hash_tbl_iterator.set_matched();
_valid_element_in_hash_tbl--;
}
VLOG_ROW << "probe matched: "
<< get_row_output_string(_hash_tbl_iterator.get_row(),
child(0)->row_desc());