// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "exec/intersect_node.h" #include "exec/hash_table.hpp" #include "exprs/expr.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" namespace doris { IntersectNode::IntersectNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : SetOperationNode(pool, tnode, descs, tnode.intersect_node.tuple_id) {} Status IntersectNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(SetOperationNode::init(tnode, state)); // Create result_expr_ctx_lists_ from thrift exprs. auto& result_texpr_lists = tnode.intersect_node.result_expr_lists; for (auto& texprs : result_texpr_lists) { std::vector ctxs; RETURN_IF_ERROR(Expr::create_expr_trees(_pool, texprs, &ctxs)); _child_expr_lists.push_back(ctxs); } return Status::OK(); } // the actual intersect operation is in this function, // 1 build a hash table from child(0) // 2 probe with child(1), then filter the hash table and find the matched item, use them to rebuild a hash table // repeat [2] this for all the rest child Status IntersectNode::open(RuntimeState* state) { RETURN_IF_ERROR(SetOperationNode::open(state)); // if a table is empty, the result must be empty if (_hash_tbl->size() == 0) { _hash_tbl_iterator = _hash_tbl->begin(); return Status::OK(); } bool eos = false; for (int i = 1; i < _children.size(); ++i) { if (i > 1) { SCOPED_TIMER(_build_timer); std::unique_ptr temp_tbl( new HashTable(_child_expr_lists[0], _child_expr_lists[i], _build_tuple_size, true, _find_nulls, id(), mem_tracker(), 1024)); _hash_tbl_iterator = _hash_tbl->begin(); while (_hash_tbl_iterator.has_next()) { if (_hash_tbl_iterator.matched()) { VLOG_ROW << "rebuild row: " << get_row_output_string(_hash_tbl_iterator.get_row(), child(0)->row_desc()); temp_tbl->insert(_hash_tbl_iterator.get_row()); } _hash_tbl_iterator.next(); } _hash_tbl.swap(temp_tbl); temp_tbl->close(); VLOG_ROW << "hash table content: " << _hash_tbl->debug_string(true, &child(0)->row_desc()); // if a table is empty, the result must be empty if (_hash_tbl->size() == 0) { break; } } // probe _probe_batch.reset( new RowBatch(child(i)->row_desc(), state->batch_size(), mem_tracker().get())); ScopedTimer probe_timer(_probe_timer); RETURN_IF_ERROR(child(i)->open(state)); eos = false; while (!eos) { RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(child(i)->get_next(state, _probe_batch.get(), &eos)); RETURN_IF_LIMIT_EXCEEDED(state, " Intersect , while probing the hash table."); for (int j = 0; j < _probe_batch->num_rows(); ++j) { VLOG_ROW << "probe row: " << get_row_output_string(_probe_batch->get_row(j), child(i)->row_desc()); _hash_tbl_iterator = _hash_tbl->find(_probe_batch->get_row(j)); if (_hash_tbl_iterator != _hash_tbl->end()) { _hash_tbl_iterator.set_matched(); VLOG_ROW << "probe matched: " << get_row_output_string(_hash_tbl_iterator.get_row(), child(0)->row_desc()); } } _probe_batch->reset(); } } _hash_tbl_iterator = _hash_tbl->begin(); return Status::OK(); } Status IntersectNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* eos) { RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT)); RETURN_IF_CANCELLED(state); SCOPED_TIMER(_runtime_profile->total_time_counter()); *eos = true; if (reached_limit()) { return Status::OK(); } int64_t tuple_buf_size; uint8_t* tuple_buf; RETURN_IF_ERROR( out_batch->resize_and_allocate_tuple_buffer(state, &tuple_buf_size, &tuple_buf)); memset(tuple_buf, 0, tuple_buf_size); while (_hash_tbl_iterator.has_next()) { VLOG_ROW << "find row: " << get_row_output_string(_hash_tbl_iterator.get_row(), child(0)->row_desc()) << " matched: " << _hash_tbl_iterator.matched(); if (_hash_tbl_iterator.matched()) { create_output_row(_hash_tbl_iterator.get_row(), out_batch, tuple_buf); tuple_buf += _tuple_desc->byte_size(); ++_num_rows_returned; } _hash_tbl_iterator.next(); *eos = !_hash_tbl_iterator.has_next() || reached_limit(); if (out_batch->is_full() || out_batch->at_resource_limit() || *eos) { return Status::OK(); } } return Status::OK(); } } // namespace doris