// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "exec/union_node.h" #include "codegen/llvm_codegen.h" #include "exprs/expr.h" #include "exprs/expr_context.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" #include "runtime/tuple.h" #include "runtime/tuple_row.h" // #include "util/runtime_profile_counters.h" #include "util/runtime_profile.h" #include "gen_cpp/PlanNodes_types.h" // #include "common/names.h" using namespace llvm; namespace doris { UnionNode::UnionNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : ExecNode(pool, tnode, descs), _tuple_id(tnode.union_node.tuple_id), _tuple_desc(nullptr), _first_materialized_child_idx(tnode.union_node.first_materialized_child_idx), _child_idx(0), _child_batch(nullptr), _child_row_idx(0), _child_eos(false), _const_expr_list_idx(0), _to_close_child_idx(-1) { } Status UnionNode::init(const TPlanNode& tnode, RuntimeState* state) { // TODO(zc): // RETURN_IF_ERROR(ExecNode::init(tnode, state)); RETURN_IF_ERROR(ExecNode::init(tnode, state)); DCHECK(tnode.__isset.union_node); DCHECK_EQ(_conjunct_ctxs.size(), 0); // Create const_expr_ctx_lists_ from thrift exprs. auto& const_texpr_lists = tnode.union_node.const_expr_lists; for (auto& texprs : const_texpr_lists) { std::vector ctxs; RETURN_IF_ERROR(Expr::create_expr_trees(_pool, texprs, &ctxs)); _const_expr_lists.push_back(ctxs); } // Create result_expr_ctx_lists_ from thrift exprs. auto& result_texpr_lists = tnode.union_node.result_expr_lists; for (auto& texprs : result_texpr_lists) { std::vector ctxs; RETURN_IF_ERROR(Expr::create_expr_trees(_pool, texprs, &ctxs)); _child_expr_lists.push_back(ctxs); } return Status::OK(); } Status UnionNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::prepare(state)); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); DCHECK(_tuple_desc != nullptr); _codegend_union_materialize_batch_fns.resize(_child_expr_lists.size()); // Prepare const expr lists. for (const vector& exprs : _const_expr_lists) { RETURN_IF_ERROR(Expr::prepare(exprs, state, row_desc(), expr_mem_tracker())); // TODO(zc) // AddExprCtxsToFree(exprs); DCHECK_EQ(exprs.size(), _tuple_desc->slots().size()); } // Prepare result expr lists. for (int i = 0; i < _child_expr_lists.size(); ++i) { RETURN_IF_ERROR(Expr::prepare( _child_expr_lists[i], state, child(i)->row_desc(), expr_mem_tracker())); // TODO(zc) // AddExprCtxsToFree(_child_expr_lists[i]); DCHECK_EQ(_child_expr_lists[i].size(), _tuple_desc->slots().size()); } return Status::OK(); } void UnionNode::codegen(RuntimeState* state) { #if 0 DCHECK(state->ShouldCodegen()); ExecNode::codegen(state); if (IsNodeCodegenDisabled()) return; LlvmCodeGen* codegen = state->codegen(); DCHECK(codegen != nullptr); std::stringstream codegen_message; Status codegen_status; for (int i = 0; i < _child_expr_lists.size(); ++i) { if (is_child_passthrough(i)) continue; llvm::Function* tuple_materialize_exprs_fn; codegen_status = Tuple::CodegenMaterializeExprs(codegen, false, *_tuple_desc, _child_expr_lists[i], true, &tuple_materialize_exprs_fn); if (!codegen_status.ok()) { // Codegen may fail in some corner cases (e.g. we don't handle TYPE_CHAR). If this // happens, abort codegen for this and the remaining children. codegen_message << "Codegen failed for child: " << _children[i]->id(); break; } // Get a copy of the function. This function will be modified and added to the // vector of functions. Function* union_materialize_batch_fn = codegen->GetFunction(IRFunction::UNION_MATERIALIZE_BATCH, true); DCHECK(union_materialize_batch_fn != nullptr); int replaced = codegen->ReplaceCallSites(union_materialize_batch_fn, tuple_materialize_exprs_fn, Tuple::MATERIALIZE_EXPRS_SYMBOL); DCHECK_EQ(replaced, 1) << LlvmCodeGen::Print(union_materialize_batch_fn); union_materialize_batch_fn = codegen->FinalizeFunction( union_materialize_batch_fn); DCHECK(union_materialize_batch_fn != nullptr); // Add the function to Jit and to the vector of codegened functions. codegen->AddFunctionToJit(union_materialize_batch_fn, reinterpret_cast(&(_codegend_union_materialize_batch_fns.data()[i]))); } runtime_profile()->AddCodegenMsg( codegen_status.ok(), codegen_status, codegen_message.str()); #endif } Status UnionNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::open(state)); // open const expr lists. for (const vector& exprs : _const_expr_lists) { RETURN_IF_ERROR(Expr::open(exprs, state)); } // open result expr lists. for (const vector& exprs : _child_expr_lists) { RETURN_IF_ERROR(Expr::open(exprs, state)); } // Ensures that rows are available for clients to fetch after this open() has // succeeded. if (!_children.empty()) RETURN_IF_ERROR(child(_child_idx)->open(state)); return Status::OK(); } Status UnionNode::get_next_pass_through(RuntimeState* state, RowBatch* row_batch) { DCHECK(!reached_limit()); DCHECK(!is_in_subplan()); DCHECK_LT(_child_idx, _children.size()); DCHECK(is_child_passthrough(_child_idx)); // TODO(zc) // DCHECK(child(_child_idx)->row_desc().LayoutEquals(row_batch->row_desc())); if (_child_eos) RETURN_IF_ERROR(child(_child_idx)->open(state)); DCHECK_EQ(row_batch->num_rows(), 0); RETURN_IF_ERROR(child(_child_idx)->get_next(state, row_batch, &_child_eos)); if (_child_eos) { // Even though the child is at eos, it's not OK to close() it here. Once we close // the child, the row batches that it produced are invalid. Marking the batch as // needing a deep copy let's us safely close the child in the next get_next() call. // TODO: Remove this as part of IMPALA-4179. row_batch->mark_needs_deep_copy(); _to_close_child_idx = _child_idx; ++_child_idx; } return Status::OK(); } Status UnionNode::get_next_materialized(RuntimeState* state, RowBatch* row_batch) { // Fetch from children, evaluate corresponding exprs and materialize. DCHECK(!reached_limit()); DCHECK_LT(_child_idx, _children.size()); int64_t tuple_buf_size; uint8_t* tuple_buf; RETURN_IF_ERROR( row_batch->resize_and_allocate_tuple_buffer(state, &tuple_buf_size, &tuple_buf)); memset(tuple_buf, 0, tuple_buf_size); while (has_more_materialized() && !row_batch->at_capacity()) { // The loop runs until we are either done iterating over the children that require // materialization, or the row batch is at capacity. DCHECK(!is_child_passthrough(_child_idx)); // Child row batch was either never set or we're moving on to a different child. if (_child_batch.get() == nullptr) { DCHECK_LT(_child_idx, _children.size()); _child_batch.reset(new RowBatch( child(_child_idx)->row_desc(), state->batch_size(), mem_tracker())); _child_row_idx = 0; // open the current child unless it's the first child, which was already opened in // UnionNode::open(). if (_child_eos) RETURN_IF_ERROR(child(_child_idx)->open(state)); // The first batch from each child is always fetched here. RETURN_IF_ERROR(child(_child_idx)->get_next( state, _child_batch.get(), &_child_eos)); } while (!row_batch->at_capacity()) { DCHECK(_child_batch.get() != nullptr); DCHECK_LE(_child_row_idx, _child_batch->num_rows()); if (_child_row_idx == _child_batch->num_rows()) { // Move on to the next child if it is at eos. if (_child_eos) break; // Fetch more rows from the child. _child_batch->reset(); _child_row_idx = 0; // All batches except the first batch from each child are fetched here. RETURN_IF_ERROR(child(_child_idx)->get_next( state, _child_batch.get(), &_child_eos)); // If we fetched an empty batch, go back to the beginning of this while loop, and // try again. if (_child_batch->num_rows() == 0) continue; } DCHECK_EQ(_codegend_union_materialize_batch_fns.size(), _children.size()); if (_codegend_union_materialize_batch_fns[_child_idx] == nullptr) { materialize_batch(row_batch, &tuple_buf); } else { _codegend_union_materialize_batch_fns[_child_idx](this, row_batch, &tuple_buf); } } // It shouldn't be the case that we reached the limit because we shouldn't have // incremented '_num_rows_returned' yet. DCHECK(!reached_limit()); if (_child_eos && _child_row_idx == _child_batch->num_rows()) { // Unless we are inside a subplan expecting to call open()/get_next() on the child // again, the child can be closed at this point. _child_batch.reset(); if (!is_in_subplan()) child(_child_idx)->close(state); ++_child_idx; } else { // If we haven't finished consuming rows from the current child, we must have ended // up here because the row batch is at capacity. DCHECK(row_batch->at_capacity()); } } DCHECK_LE(_child_idx, _children.size()); return Status::OK(); } Status UnionNode::get_next_const(RuntimeState* state, RowBatch* row_batch) { DCHECK_EQ(state->per_fragment_instance_idx(), 0); DCHECK_LT(_const_expr_list_idx, _const_expr_lists.size()); // Create new tuple buffer for row_batch. int64_t tuple_buf_size; uint8_t* tuple_buf; RETURN_IF_ERROR( row_batch->resize_and_allocate_tuple_buffer(state, &tuple_buf_size, &tuple_buf)); memset(tuple_buf, 0, tuple_buf_size); while (_const_expr_list_idx < _const_expr_lists.size() && !row_batch->at_capacity()) { materialize_exprs( _const_expr_lists[_const_expr_list_idx], nullptr, tuple_buf, row_batch); tuple_buf += _tuple_desc->byte_size(); ++_const_expr_list_idx; } return Status::OK(); } Status UnionNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT)); RETURN_IF_CANCELLED(state); // TODO(zc) // RETURN_IF_ERROR(QueryMaintenance(state)); if (_to_close_child_idx != -1) { // The previous child needs to be closed if passthrough was enabled for it. In the non // passthrough case, the child was already closed in the previous call to get_next(). DCHECK(is_child_passthrough(_to_close_child_idx)); DCHECK(!is_in_subplan()); child(_to_close_child_idx)->close(state); _to_close_child_idx = -1; } // Save the number of rows in case get_next() is called with a non-empty batch, which can // happen in a subplan. int num_rows_before = row_batch->num_rows(); if (has_more_passthrough()) { RETURN_IF_ERROR(get_next_pass_through(state, row_batch)); } else if (has_more_materialized()) { RETURN_IF_ERROR(get_next_materialized(state, row_batch)); } else if (has_more_const(state)) { RETURN_IF_ERROR(get_next_const(state, row_batch)); } int num_rows_added = row_batch->num_rows() - num_rows_before; DCHECK_GE(num_rows_added, 0); if (_limit != -1 && _num_rows_returned + num_rows_added > _limit) { // Truncate the row batch if we went over the limit. num_rows_added = _limit - _num_rows_returned; row_batch->set_num_rows(num_rows_before + num_rows_added); DCHECK_GE(num_rows_added, 0); } _num_rows_returned += num_rows_added; *eos = reached_limit() || (!has_more_passthrough() && !has_more_materialized() && !has_more_const(state)); COUNTER_SET(_rows_returned_counter, _num_rows_returned); return Status::OK(); } #if 0 Status UnionNode::reset(RuntimeState* state) { _child_idx = 0; _child_batch.reset(); _child_row_idx = 0; _child_eos = false; _const_expr_list_idx = 0; // Since passthrough is disabled in subplans, verify that there is no passthrough child // that needs to be closed. DCHECK_EQ(_to_close_child_idx, -1); return ExecNode::reset(state); } #endif Status UnionNode::close(RuntimeState* state) { if (is_closed()) return Status::OK(); _child_batch.reset(); for (auto& exprs : _const_expr_lists) { Expr::close(exprs, state); } for (auto& exprs : _child_expr_lists) { Expr::close(exprs, state); } return ExecNode::close(state); } }