// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "exec/hash_join_node.h" #include "exec/hash_table.hpp" #include "runtime/row_batch.h" namespace doris { // Functions in this file are cross compiled to IR with clang. // Wrapper around ExecNode's eval conjuncts with a different function name. // This lets us distinguish between the join conjuncts vs. non-join conjuncts // for codegen. // Note: don't declare this static. LLVM will pick the fastcc calling convention and // we will not be able to replace the functions with codegen'd versions. // TODO: explicitly set the calling convention? // TODO: investigate using fastcc for all codegen internal functions? bool IR_NO_INLINE eval_other_join_conjuncts(ExprContext* const* ctxs, int num_ctxs, TupleRow* row) { return ExecNode::eval_conjuncts(ctxs, num_ctxs, row); } // CreateOutputRow, EvalOtherJoinConjuncts, and EvalConjuncts are replaced by // codegen. int HashJoinNode::process_probe_batch(RowBatch* out_batch, RowBatch* probe_batch, int max_added_rows) { // This path does not handle full outer or right outer joins DCHECK(!_match_all_build); int row_idx = out_batch->add_rows(max_added_rows); DCHECK(row_idx != RowBatch::INVALID_ROW_INDEX); uint8_t* out_row_mem = reinterpret_cast(out_batch->get_row(row_idx)); TupleRow* out_row = reinterpret_cast(out_row_mem); int rows_returned = 0; int probe_rows = probe_batch->num_rows(); ExprContext* const* other_conjunct_ctxs = &_other_join_conjunct_ctxs[0]; int num_other_conjunct_ctxs = _other_join_conjunct_ctxs.size(); ExprContext* const* conjunct_ctxs = &_conjunct_ctxs[0]; int num_conjunct_ctxs = _conjunct_ctxs.size(); while (true) { // Create output row for each matching build row while (_hash_tbl_iterator.has_next()) { TupleRow* matched_build_row = _hash_tbl_iterator.get_row(); _hash_tbl_iterator.next(); create_output_row(out_row, _current_probe_row, matched_build_row); if (!eval_other_join_conjuncts(other_conjunct_ctxs, num_other_conjunct_ctxs, out_row)) { continue; } _matched_probe = true; // left_anti_join: equal match won't return if (_join_op == TJoinOp::LEFT_ANTI_JOIN) { _hash_tbl_iterator = _hash_tbl->end(); break; } if (eval_conjuncts(conjunct_ctxs, num_conjunct_ctxs, out_row)) { ++rows_returned; // Filled up out batch or hit limit if (UNLIKELY(rows_returned == max_added_rows)) { goto end; } // Advance to next out row out_row_mem += out_batch->row_byte_size(); out_row = reinterpret_cast(out_row_mem); } // Handle left semi-join if (_match_one_build) { _hash_tbl_iterator = _hash_tbl->end(); break; } } // Handle left outer-join and left semi-join if ((!_matched_probe && _match_all_probe) || ((!_matched_probe && _join_op == TJoinOp::LEFT_ANTI_JOIN))) { create_output_row(out_row, _current_probe_row, NULL); _matched_probe = true; if (ExecNode::eval_conjuncts(conjunct_ctxs, num_conjunct_ctxs, out_row)) { ++rows_returned; if (UNLIKELY(rows_returned == max_added_rows)) { goto end; } // Advance to next out row out_row_mem += out_batch->row_byte_size(); out_row = reinterpret_cast(out_row_mem); } } if (!_hash_tbl_iterator.has_next()) { // Advance to the next probe row if (UNLIKELY(_probe_batch_pos == probe_rows)) { goto end; } _current_probe_row = probe_batch->get_row(_probe_batch_pos++); _hash_tbl_iterator = _hash_tbl->find(_current_probe_row); _matched_probe = false; } } end: if (_match_one_build && _matched_probe) { _hash_tbl_iterator = _hash_tbl->end(); } out_batch->commit_rows(rows_returned); return rows_returned; } // when build table has too many duplicated rows, the collisions will be very serious, // so in some case will don't need to store duplicated value in hash table, we can build an unique one void HashJoinNode::process_build_batch(RowBatch* build_batch) { // insert build row into our hash table if (_build_unique) { for (int i = 0; i < build_batch->num_rows(); ++i) { _hash_tbl->insert_unique(build_batch->get_row(i)); } } else { for (int i = 0; i < build_batch->num_rows(); ++i) { _hash_tbl->insert(build_batch->get_row(i)); } } } } // namespace doris