[improvement](hash join) Return result early if probe side has no data (#23044)

This commit is contained in:
TengJianPing
2023-08-17 09:17:09 +08:00
committed by GitHub
parent a77e9fbc99
commit 343a6dc29d
8 changed files with 565 additions and 2 deletions

View File

@ -678,6 +678,11 @@ Status HashJoinNode::push(RuntimeState* /*state*/, vectorized::Block* input_bloc
Status HashJoinNode::get_next(RuntimeState* state, Block* output_block, bool* eos) {
SCOPED_TIMER(_runtime_profile->total_time_counter());
if (_is_hash_join_early_start_probe_eos(state)) {
*eos = true;
return Status::OK();
}
if (_short_circuit_for_probe) {
// If we use a short-circuit strategy, should return empty block directly.
*eos = true;
@ -761,6 +766,50 @@ void HashJoinNode::_prepare_probe_block() {
release_block_memory(_probe_block);
}
bool HashJoinNode::_enable_hash_join_early_start_probe(RuntimeState* state) const {
return state->enable_hash_join_early_start_probe() &&
(_join_op == TJoinOp::INNER_JOIN || _join_op == TJoinOp::LEFT_OUTER_JOIN ||
_join_op == TJoinOp::LEFT_SEMI_JOIN || _join_op == TJoinOp::LEFT_ANTI_JOIN);
}
bool HashJoinNode::_is_hash_join_early_start_probe_eos(RuntimeState* state) const {
return _enable_hash_join_early_start_probe(state) && _probe_block.rows() == 0;
}
void HashJoinNode::_probe_side_open_thread(RuntimeState* state, std::promise<Status>* promise) {
Defer defer {[&]() { _probe_open_finish = true; }};
SCOPED_ATTACH_TASK(state);
auto st = child(0)->open(state);
if (!st.ok()) {
promise->set_value(st);
return;
}
if (_enable_hash_join_early_start_probe(state)) {
while (need_more_input_data()) {
prepare_for_next();
SCOPED_TIMER(_probe_next_timer);
st = child(0)->get_next_after_projects(
state, &_probe_block, &_probe_eos,
std::bind((Status(ExecNode::*)(RuntimeState*, vectorized::Block*, bool*)) &
ExecNode::get_next,
_children[0], std::placeholders::_1, std::placeholders::_2,
std::placeholders::_3));
if (!st.ok()) {
promise->set_value(st);
return;
}
st = push(state, &_probe_block, _probe_eos);
if (!st.ok()) {
promise->set_value(st);
return;
}
}
}
promise->set_value(Status::OK());
}
Status HashJoinNode::open(RuntimeState* state) {
SCOPED_TIMER(_runtime_profile->total_time_counter());
SCOPED_TIMER(_open_timer);
@ -800,7 +849,8 @@ Status HashJoinNode::_materialize_build_side(RuntimeState* state) {
Block block;
// If eos or have already met a null value using short-circuit strategy, we do not need to pull
// data from data.
while (!eos && !_short_circuit_for_null_in_probe_side) {
while (!eos && !_short_circuit_for_null_in_probe_side &&
(!_probe_open_finish || !_is_hash_join_early_start_probe_eos(state))) {
block.clear_column_data();
RETURN_IF_CANCELLED(state);
{