Files
doris/be/src/exec/repeat_node.cpp
luozenglin 1cf57a985d [fix] Fix the query result error caused by the grouping sets statemen… (#11316)
* [fix] Fix the query result error caused by the grouping sets statement grouping as an expression
2022-08-01 13:52:18 +08:00

217 lines
8.3 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "exec/repeat_node.h"
#include "gutil/strings/join.h"
#include "runtime/raw_value.h"
#include "runtime/row_batch.h"
#include "runtime/runtime_state.h"
#include "runtime/tuple.h"
#include "runtime/tuple_row.h"
#include "util/runtime_profile.h"
namespace doris {
RepeatNode::RepeatNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
: ExecNode(pool, tnode, descs),
_slot_id_set_list(tnode.repeat_node.slot_id_set_list),
_all_slot_ids(tnode.repeat_node.all_slot_ids),
_repeat_id_list(tnode.repeat_node.repeat_id_list),
_grouping_list(tnode.repeat_node.grouping_list),
_output_tuple_id(tnode.repeat_node.output_tuple_id),
_output_tuple_desc(nullptr),
_child_row_batch(nullptr),
_child_eos(false),
_repeat_id_idx(0),
_runtime_state(nullptr) {}
RepeatNode::~RepeatNode() {}
Status RepeatNode::init(const TPlanNode& tnode, RuntimeState* state) {
RETURN_IF_ERROR(ExecNode::init(tnode, state));
const RowDescriptor& row_desc = child(0)->row_desc();
RETURN_IF_ERROR(Expr::create(tnode.repeat_node.exprs, row_desc, state, &_exprs));
DCHECK(!_exprs.empty());
return Status::OK();
}
Status RepeatNode::prepare(RuntimeState* state) {
SCOPED_TIMER(_runtime_profile->total_time_counter());
RETURN_IF_ERROR(ExecNode::prepare(state));
SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
_runtime_state = state;
_output_tuple_desc = state->desc_tbl().get_tuple_descriptor(_output_tuple_id);
if (_output_tuple_desc == nullptr) {
return Status::InternalError("Failed to get tuple descriptor.");
}
for (int i = 0; i < _exprs.size(); i++) {
ExprContext* context = _pool->add(new ExprContext(_exprs[i]));
RETURN_IF_ERROR(context->prepare(state, child(0)->row_desc()));
_expr_evals.push_back(context);
}
DCHECK_EQ(_exprs.size(), _expr_evals.size());
return Status::OK();
}
Status RepeatNode::open(RuntimeState* state) {
SCOPED_TIMER(_runtime_profile->total_time_counter());
RETURN_IF_ERROR(ExecNode::open(state));
SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
for (int i = 0; i < _expr_evals.size(); i++) {
RETURN_IF_ERROR(_expr_evals[i]->open(state));
}
RETURN_IF_CANCELLED(state);
RETURN_IF_ERROR(child(0)->open(state));
return Status::OK();
}
/**
* copy the rows to new tuple based on repeat_id_idx and _repeat_id_list and fill in row_batch,
* and then set grouping_id and other grouping function slot in child_row_batch
* e.g. _repeat_id_list = [0, 3, 1, 2], _repeat_id_idx = 2, _grouping_list [[0, 3, 1, 2], [0, 1, 1, 0]],
* row_batch tuple 0 ['a', 'b', 1] -> [['a', null, 1] tuple 1 [1, 1]]
*/
Status RepeatNode::get_repeated_batch(RowBatch* child_row_batch, int repeat_id_idx,
RowBatch* row_batch) {
DCHECK(child_row_batch != nullptr);
DCHECK_EQ(row_batch->num_rows(), 0);
// Fill all slots according to child
MemPool* tuple_pool = row_batch->tuple_data_pool();
Tuple* tuple = nullptr;
for (int row_index = 0; row_index < child_row_batch->num_rows(); ++row_index) {
int row_idx = row_batch->add_row();
TupleRow* dst_row = row_batch->get_row(row_idx);
TupleRow* src_row = child_row_batch->get_row(row_index);
if (UNLIKELY(tuple == nullptr)) {
int size = row_batch->capacity() * _output_tuple_desc->byte_size();
void* tuple_buffer = tuple_pool->allocate(size);
if (tuple_buffer == nullptr) {
return Status::InternalError("Allocate memory for row batch failed.");
}
tuple = reinterpret_cast<Tuple*>(tuple_buffer);
} else {
char* new_tuple = reinterpret_cast<char*>(tuple);
new_tuple += _output_tuple_desc->byte_size();
tuple = reinterpret_cast<Tuple*>(new_tuple);
}
dst_row->set_tuple(0, tuple);
memset(tuple, 0, _output_tuple_desc->num_null_bytes());
int slot_index = 0;
for (; slot_index < _expr_evals.size(); ++slot_index) {
const SlotDescriptor* slot_desc = _output_tuple_desc->slots()[slot_index];
// set null base on repeated list
if (_all_slot_ids.find(slot_desc->id()) != _all_slot_ids.end()) {
std::set<SlotId>& repeat_ids = _slot_id_set_list[repeat_id_idx];
if (repeat_ids.find(slot_desc->id()) == repeat_ids.end()) {
tuple->set_null(slot_desc->null_indicator_offset());
continue;
}
}
void* val = _expr_evals[slot_index]->get_value(src_row);
tuple->set_not_null(slot_desc->null_indicator_offset());
RawValue::write(val, tuple, slot_desc, tuple_pool);
}
DCHECK_EQ(slot_index + _grouping_list.size(), _output_tuple_desc->slots().size());
for (int i = 0; slot_index < _output_tuple_desc->slots().size(); ++i, ++slot_index) {
const SlotDescriptor* slot_desc = _output_tuple_desc->slots()[slot_index];
tuple->set_not_null(slot_desc->null_indicator_offset());
int64_t val = _grouping_list[i][repeat_id_idx];
RawValue::write(&val, tuple, slot_desc, tuple_pool);
}
row_batch->commit_last_row();
}
return Status::OK();
}
Status RepeatNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
SCOPED_TIMER(_runtime_profile->total_time_counter());
SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
RETURN_IF_CANCELLED(state);
DCHECK(_repeat_id_idx >= 0);
for (const std::vector<int64_t>& v : _grouping_list) {
DCHECK(_repeat_id_idx <= (int)v.size());
}
// current child has finished its repeat, get child's next batch
if (_child_row_batch.get() == nullptr) {
if (_child_eos) {
*eos = true;
return Status::OK();
}
_child_row_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size()));
RETURN_IF_ERROR(child(0)->get_next(state, _child_row_batch.get(), &_child_eos));
if (_child_row_batch->num_rows() <= 0) {
_child_row_batch.reset(nullptr);
*eos = true;
return Status::OK();
}
}
DCHECK_EQ(row_batch->num_rows(), 0);
RETURN_IF_ERROR(get_repeated_batch(_child_row_batch.get(), _repeat_id_idx, row_batch));
_repeat_id_idx++;
int size = _repeat_id_list.size();
if (_repeat_id_idx >= size) {
_child_row_batch.reset(nullptr);
_repeat_id_idx = 0;
}
return Status::OK();
}
Status RepeatNode::close(RuntimeState* state) {
if (is_closed()) {
return Status::OK();
}
_child_row_batch.reset(nullptr);
for (int i = 0; i < _expr_evals.size(); i++) {
_expr_evals[i]->close(state);
}
_expr_evals.clear();
Expr::close(_exprs);
RETURN_IF_ERROR(child(0)->close(state));
return ExecNode::close(state);
}
void RepeatNode::debug_string(int indentation_level, std::stringstream* out) const {
*out << string(indentation_level * 2, ' ');
*out << "RepeatNode(";
*out << "repeat pattern: [" << JoinElements(_repeat_id_list, ",") << "]\n";
*out << "add " << _grouping_list.size() << " columns. \n";
*out << "_exprs: " << Expr::debug_string(_exprs);
*out << "added column values: ";
for (const std::vector<int64_t>& v : _grouping_list) {
*out << "[" << JoinElements(v, ",") << "] ";
}
*out << "\n";
ExecNode::debug_string(indentation_level, out);
*out << ")";
}
} // namespace doris