Files
doris/be/src/exec/intersect_node.cpp
sduzh 6fedf5881b [CodeFormat] Clang-format cpp sources (#4965)
Clang-format all c++ source files.
2020-11-28 18:36:49 +08:00

137 lines
5.8 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "exec/intersect_node.h"
#include "exec/hash_table.hpp"
#include "exprs/expr.h"
#include "runtime/row_batch.h"
#include "runtime/runtime_state.h"
namespace doris {
IntersectNode::IntersectNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
: SetOperationNode(pool, tnode, descs, tnode.intersect_node.tuple_id) {}
Status IntersectNode::init(const TPlanNode& tnode, RuntimeState* state) {
RETURN_IF_ERROR(SetOperationNode::init(tnode, state));
// Create result_expr_ctx_lists_ from thrift exprs.
auto& result_texpr_lists = tnode.intersect_node.result_expr_lists;
for (auto& texprs : result_texpr_lists) {
std::vector<ExprContext*> ctxs;
RETURN_IF_ERROR(Expr::create_expr_trees(_pool, texprs, &ctxs));
_child_expr_lists.push_back(ctxs);
}
return Status::OK();
}
// the actual intersect operation is in this function,
// 1 build a hash table from child(0)
// 2 probe with child(1), then filter the hash table and find the matched item, use them to rebuild a hash table
// repeat [2] this for all the rest child
Status IntersectNode::open(RuntimeState* state) {
RETURN_IF_ERROR(SetOperationNode::open(state));
// if a table is empty, the result must be empty
if (_hash_tbl->size() == 0) {
_hash_tbl_iterator = _hash_tbl->begin();
return Status::OK();
}
bool eos = false;
for (int i = 1; i < _children.size(); ++i) {
if (i > 1) {
SCOPED_TIMER(_build_timer);
std::unique_ptr<HashTable> temp_tbl(
new HashTable(_child_expr_lists[0], _child_expr_lists[i], _build_tuple_size,
true, _find_nulls, id(), mem_tracker(), 1024));
_hash_tbl_iterator = _hash_tbl->begin();
while (_hash_tbl_iterator.has_next()) {
if (_hash_tbl_iterator.matched()) {
VLOG_ROW << "rebuild row: "
<< get_row_output_string(_hash_tbl_iterator.get_row(),
child(0)->row_desc());
temp_tbl->insert(_hash_tbl_iterator.get_row());
}
_hash_tbl_iterator.next<false>();
}
_hash_tbl.swap(temp_tbl);
temp_tbl->close();
VLOG_ROW << "hash table content: "
<< _hash_tbl->debug_string(true, &child(0)->row_desc());
// if a table is empty, the result must be empty
if (_hash_tbl->size() == 0) {
break;
}
}
// probe
_probe_batch.reset(
new RowBatch(child(i)->row_desc(), state->batch_size(), mem_tracker().get()));
ScopedTimer<MonotonicStopWatch> probe_timer(_probe_timer);
RETURN_IF_ERROR(child(i)->open(state));
eos = false;
while (!eos) {
RETURN_IF_CANCELLED(state);
RETURN_IF_ERROR(child(i)->get_next(state, _probe_batch.get(), &eos));
RETURN_IF_LIMIT_EXCEEDED(state, " Intersect , while probing the hash table.");
for (int j = 0; j < _probe_batch->num_rows(); ++j) {
VLOG_ROW << "probe row: "
<< get_row_output_string(_probe_batch->get_row(j), child(i)->row_desc());
_hash_tbl_iterator = _hash_tbl->find(_probe_batch->get_row(j));
if (_hash_tbl_iterator != _hash_tbl->end()) {
_hash_tbl_iterator.set_matched();
VLOG_ROW << "probe matched: "
<< get_row_output_string(_hash_tbl_iterator.get_row(),
child(0)->row_desc());
}
}
_probe_batch->reset();
}
}
_hash_tbl_iterator = _hash_tbl->begin();
return Status::OK();
}
Status IntersectNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* eos) {
RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT));
RETURN_IF_CANCELLED(state);
SCOPED_TIMER(_runtime_profile->total_time_counter());
*eos = true;
if (reached_limit()) {
return Status::OK();
}
int64_t tuple_buf_size;
uint8_t* tuple_buf;
RETURN_IF_ERROR(
out_batch->resize_and_allocate_tuple_buffer(state, &tuple_buf_size, &tuple_buf));
memset(tuple_buf, 0, tuple_buf_size);
while (_hash_tbl_iterator.has_next()) {
VLOG_ROW << "find row: "
<< get_row_output_string(_hash_tbl_iterator.get_row(), child(0)->row_desc())
<< " matched: " << _hash_tbl_iterator.matched();
if (_hash_tbl_iterator.matched()) {
create_output_row(_hash_tbl_iterator.get_row(), out_batch, tuple_buf);
tuple_buf += _tuple_desc->byte_size();
++_num_rows_returned;
}
_hash_tbl_iterator.next<false>();
*eos = !_hash_tbl_iterator.has_next() || reached_limit();
if (out_batch->is_full() || out_batch->at_resource_limit() || *eos) {
return Status::OK();
}
}
return Status::OK();
}
} // namespace doris