279 lines
10 KiB
C++
279 lines
10 KiB
C++
// Modifications copyright (C) 2017, Baidu.com, Inc.
|
|
// Copyright 2017 The Apache Software Foundation
|
|
|
|
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#include "exec/topn_node.h"
|
|
|
|
#include <sstream>
|
|
|
|
#include "exprs/expr.h"
|
|
#include "gen_cpp/Exprs_types.h"
|
|
#include "gen_cpp/PlanNodes_types.h"
|
|
#include "runtime/descriptors.h"
|
|
#include "runtime/mem_pool.h"
|
|
#include "runtime/raw_value.h"
|
|
#include "runtime/row_batch.h"
|
|
#include "runtime/runtime_state.h"
|
|
#include "runtime/tuple.h"
|
|
#include "runtime/tuple_row.h"
|
|
#include "util/debug_util.h"
|
|
#include "util/runtime_profile.h"
|
|
#include "util/tuple_row_compare.h"
|
|
#include <gperftools/profiler.h>
|
|
|
|
namespace palo {
|
|
|
|
TopNNode::TopNNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) :
|
|
ExecNode(pool, tnode, descs),
|
|
_offset(tnode.sort_node.__isset.offset ? tnode.sort_node.offset : 0),
|
|
_materialized_tuple_desc(NULL),
|
|
_tuple_row_less_than(NULL),
|
|
_tuple_pool(NULL),
|
|
_num_rows_skipped(0),
|
|
_priority_queue(NULL) {
|
|
}
|
|
|
|
TopNNode::~TopNNode() {
|
|
}
|
|
|
|
Status TopNNode::init(const TPlanNode& tnode) {
|
|
RETURN_IF_ERROR(ExecNode::init(tnode));
|
|
RETURN_IF_ERROR(_sort_exec_exprs.init(tnode.sort_node.sort_info, _pool));
|
|
_is_asc_order = tnode.sort_node.sort_info.is_asc_order;
|
|
_nulls_first = tnode.sort_node.sort_info.nulls_first;
|
|
|
|
DCHECK_EQ(_conjuncts.size(), 0) << "TopNNode should never have predicates to evaluate.";
|
|
_abort_on_default_limit_exceeded = tnode.sort_node.is_default_limit;
|
|
return Status::OK;
|
|
}
|
|
|
|
Status TopNNode::prepare(RuntimeState* state) {
|
|
SCOPED_TIMER(_runtime_profile->total_time_counter());
|
|
RETURN_IF_ERROR(ExecNode::prepare(state));
|
|
_tuple_pool.reset(new MemPool(mem_tracker()));
|
|
RETURN_IF_ERROR(_sort_exec_exprs.prepare(
|
|
state, child(0)->row_desc(), _row_descriptor, expr_mem_tracker()));
|
|
// AddExprCtxsToFree(_sort_exec_exprs);
|
|
|
|
_tuple_row_less_than.reset(
|
|
new TupleRowComparator(_sort_exec_exprs, _is_asc_order, _nulls_first));
|
|
|
|
if (state->codegen_level() > 0) {
|
|
bool success = _tuple_row_less_than->codegen(state);
|
|
if (success) {
|
|
// AddRuntimeExecOption("Codegen Enabled");
|
|
}
|
|
}
|
|
|
|
_abort_on_default_limit_exceeded = _abort_on_default_limit_exceeded &&
|
|
state->abort_on_default_limit_exceeded();
|
|
_materialized_tuple_desc = _row_descriptor.tuple_descriptors()[0];
|
|
return Status::OK;
|
|
}
|
|
|
|
Status TopNNode::open(RuntimeState* state) {
|
|
SCOPED_TIMER(_runtime_profile->total_time_counter());
|
|
RETURN_IF_ERROR(ExecNode::open(state));
|
|
RETURN_IF_CANCELLED(state);
|
|
// RETURN_IF_ERROR(QueryMaintenance(state));
|
|
RETURN_IF_ERROR(state->check_query_state());
|
|
RETURN_IF_ERROR(_sort_exec_exprs.open(state));
|
|
|
|
// Avoid creating them after every Reset()/Open().
|
|
// TODO: For some reason initializing _priority_queue in Prepare() causes a 30% perf
|
|
// regression. Why??
|
|
if (_priority_queue.get() == NULL) {
|
|
_priority_queue.reset(
|
|
new std::priority_queue<Tuple*, std::vector<Tuple*>, TupleRowComparator>(
|
|
*_tuple_row_less_than));
|
|
}
|
|
|
|
// Allocate memory for a temporary tuple.
|
|
_tmp_tuple = reinterpret_cast<Tuple*>(
|
|
_tuple_pool->allocate(_materialized_tuple_desc->byte_size()));
|
|
RETURN_IF_ERROR(child(0)->open(state));
|
|
|
|
// Limit of 0, no need to fetch anything from children.
|
|
if (_limit != 0) {
|
|
RowBatch batch(child(0)->row_desc(), state->batch_size(), mem_tracker());
|
|
bool eos = true;
|
|
|
|
do {
|
|
batch.reset();
|
|
RETURN_IF_ERROR(child(0)->get_next(state, &batch, &eos));
|
|
|
|
if (_abort_on_default_limit_exceeded && child(0)->rows_returned() > _limit) {
|
|
return Status("DEFAULT_ORDER_BY_LIMIT has been exceeded.");
|
|
}
|
|
|
|
for (int i = 0; i < batch.num_rows(); ++i) {
|
|
insert_tuple_row(batch.get_row(i));
|
|
}
|
|
RETURN_IF_CANCELLED(state);
|
|
// RETURN_IF_LIMIT_EXCEEDED(state);
|
|
RETURN_IF_ERROR(state->check_query_state());
|
|
} while (!eos);
|
|
}
|
|
|
|
DCHECK_LE(_priority_queue->size(), _offset + _limit);
|
|
prepare_for_output();
|
|
|
|
// Unless we are inside a subplan expecting to call open()/get_next() on the child
|
|
// again, the child can be closed at this point.
|
|
// if (!is_in_subplan()) {
|
|
child(0)->close(state);
|
|
// }
|
|
return Status::OK;
|
|
}
|
|
|
|
Status TopNNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
|
|
SCOPED_TIMER(_runtime_profile->total_time_counter());
|
|
RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT));
|
|
RETURN_IF_CANCELLED(state);
|
|
// RETURN_IF_ERROR(QueryMaintenance(state));
|
|
RETURN_IF_ERROR(state->check_query_state());
|
|
|
|
while (!row_batch->at_capacity() && (_get_next_iter != _sorted_top_n.end())) {
|
|
if (_num_rows_skipped < _offset) {
|
|
++_get_next_iter;
|
|
_num_rows_skipped++;
|
|
continue;
|
|
}
|
|
|
|
int row_idx = row_batch->add_row();
|
|
TupleRow* dst_row = row_batch->get_row(row_idx);
|
|
Tuple* src_tuple = *_get_next_iter;
|
|
TupleRow* src_row = reinterpret_cast<TupleRow*>(&src_tuple);
|
|
row_batch->copy_row(src_row, dst_row);
|
|
++_get_next_iter;
|
|
row_batch->commit_last_row();
|
|
++_num_rows_returned;
|
|
COUNTER_SET(_rows_returned_counter, _num_rows_returned);
|
|
}
|
|
if (VLOG_ROW_IS_ON) {
|
|
VLOG_ROW << "TOPN-node output row: " << print_batch(row_batch);
|
|
}
|
|
|
|
*eos = _get_next_iter == _sorted_top_n.end();
|
|
// Transfer ownership of tuple data to output batch.
|
|
// TODO: To improve performance for small inputs when this node is run multiple times
|
|
// inside a subplan, we might choose to only selectively transfer, e.g., when the
|
|
// block(s) in the pool are all full or when the pool has reached a certain size.
|
|
if (*eos) {
|
|
row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), false);
|
|
if (memory_used_counter() != NULL) {
|
|
COUNTER_UPDATE(memory_used_counter(), _tuple_pool->peak_allocated_bytes());
|
|
}
|
|
}
|
|
return Status::OK;
|
|
}
|
|
|
|
Status TopNNode::close(RuntimeState* state) {
|
|
if (is_closed()) {
|
|
return Status::OK;
|
|
}
|
|
if (_tuple_pool.get() != NULL) {
|
|
_tuple_pool->free_all();
|
|
}
|
|
_sort_exec_exprs.close(state);
|
|
|
|
return ExecNode::close(state);
|
|
}
|
|
|
|
// Insert if either not at the limit or it's a new TopN tuple_row
|
|
void TopNNode::insert_tuple_row(TupleRow* input_row) {
|
|
Tuple* insert_tuple = NULL;
|
|
|
|
if (_priority_queue->size() < _offset + _limit) {
|
|
insert_tuple = reinterpret_cast<Tuple*>(
|
|
_tuple_pool->allocate(_materialized_tuple_desc->byte_size()));
|
|
insert_tuple->materialize_exprs<false>(input_row, *_materialized_tuple_desc,
|
|
_sort_exec_exprs.sort_tuple_slot_expr_ctxs(), _tuple_pool.get(), NULL, NULL);
|
|
} else {
|
|
DCHECK(!_priority_queue->empty());
|
|
Tuple* top_tuple = _priority_queue->top();
|
|
_tmp_tuple->materialize_exprs<false>(input_row, *_materialized_tuple_desc,
|
|
_sort_exec_exprs.sort_tuple_slot_expr_ctxs(), NULL, NULL, NULL);
|
|
|
|
if ((*_tuple_row_less_than)(_tmp_tuple, top_tuple)) {
|
|
// TODO: DeepCopy will allocate new buffers for the string data. This needs
|
|
// to be fixed to use a freelist
|
|
_tmp_tuple->deep_copy(top_tuple, *_materialized_tuple_desc, _tuple_pool.get());
|
|
insert_tuple = top_tuple;
|
|
_priority_queue->pop();
|
|
}
|
|
}
|
|
|
|
if (insert_tuple != NULL) {
|
|
_priority_queue->push(insert_tuple);
|
|
}
|
|
}
|
|
|
|
// Reverse the order of the tuples in the priority queue
|
|
void TopNNode::prepare_for_output() {
|
|
_sorted_top_n.resize(_priority_queue->size());
|
|
int index = _sorted_top_n.size() - 1;
|
|
|
|
while (_priority_queue->size() > 0) {
|
|
Tuple* tuple = _priority_queue->top();
|
|
_priority_queue->pop();
|
|
_sorted_top_n[index] = tuple;
|
|
--index;
|
|
}
|
|
|
|
_get_next_iter = _sorted_top_n.begin();
|
|
}
|
|
|
|
void TopNNode::debug_string(int indentation_level, std::stringstream* out) const {
|
|
*out << std::string(indentation_level * 2, ' ');
|
|
*out << "TopNNode("
|
|
// << " ordering_exprs=" << Expr::debug_string(_lhs_ordering_expr_ctxs)
|
|
<< Expr::debug_string(_sort_exec_exprs.lhs_ordering_expr_ctxs())
|
|
<< " sort_order=[";
|
|
|
|
for (int i = 0; i < _is_asc_order.size(); ++i) {
|
|
*out << (i > 0 ? " " : "")
|
|
<< (_is_asc_order[i] ? "asc" : "desc")
|
|
<< " nulls " << (_nulls_first[i] ? "first" : "last");
|
|
}
|
|
|
|
*out << "]";
|
|
ExecNode::debug_string(indentation_level, out);
|
|
*out << ")";
|
|
}
|
|
|
|
void TopNNode::push_down_predicate(
|
|
RuntimeState *state, std::list<ExprContext*> *expr_ctxs) {
|
|
std::list<ExprContext*>::iterator iter = expr_ctxs->begin();
|
|
while (iter != expr_ctxs->end()) {
|
|
if ((*iter)->root()->is_bound(&_tuple_ids)) {
|
|
// LOG(INFO) << "push down success expr is " << (*iter)->debug_string();
|
|
// (*iter)->get_child(0)->prepare(state, row_desc());
|
|
(*iter)->prepare(state, row_desc(), _expr_mem_tracker.get());
|
|
(*iter)->open(state);
|
|
_conjunct_ctxs.push_back(*iter);
|
|
iter = expr_ctxs->erase(iter);
|
|
} else {
|
|
++iter;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|