[feature](expr) add type check when expr prepare (#33330)

This commit is contained in:
Mryange
2024-04-09 15:57:49 +08:00
committed by yiguolei
parent bc929686e3
commit f7d52b5b1c
8 changed files with 69 additions and 16 deletions

View File

@ -170,6 +170,11 @@ Status ExecNode::prepare(RuntimeState* state) {
RETURN_IF_ERROR(vectorized::VExpr::prepare(_projections, state, projections_row_desc()));
if (has_output_row_descriptor()) {
RETURN_IF_ERROR(
vectorized::VExpr::check_expr_output_type(_projections, *_output_row_descriptor));
}
for (auto& i : _children) {
RETURN_IF_ERROR(i->prepare(state));
}
@ -582,12 +587,7 @@ Status ExecNode::do_projections(vectorized::Block* origin_block, vectorized::Blo
auto& mutable_columns = mutable_block.mutable_columns();
if (mutable_columns.size() != _projections.size()) {
return Status::InternalError(
"Logical error during processing {}, output of projections {} mismatches with "
"exec node output {}",
this->get_name(), _projections.size(), mutable_columns.size());
}
DCHECK_EQ(mutable_columns.size(), _projections.size());
for (int i = 0; i < mutable_columns.size(); ++i) {
auto result_column_id = -1;

View File

@ -152,6 +152,11 @@ Status OperatorXBase::prepare(RuntimeState* state) {
}
RETURN_IF_ERROR(vectorized::VExpr::prepare(_projections, state, projections_row_desc()));
if (has_output_row_desc()) {
RETURN_IF_ERROR(
vectorized::VExpr::check_expr_output_type(_projections, *_output_row_descriptor));
}
if (_child_x && !is_source()) {
RETURN_IF_ERROR(_child_x->prepare(state));
}

View File

@ -327,6 +327,8 @@ public:
return _output_row_descriptor.get();
}
bool has_output_row_desc() const { return _output_row_descriptor != nullptr; }
[[nodiscard]] bool is_source() const override { return false; }
[[nodiscard]] virtual Status get_block_after_projects(RuntimeState* state,

View File

@ -31,5 +31,5 @@ namespace doris::vectorized {
using ColumnsWithTypeAndName = std::vector<ColumnWithTypeAndName>;
using NameAndTypePair = std::pair<std::string, DataTypePtr>;
using NameAndTypePairs = std::vector<NameAndTypePair>;
} // namespace doris::vectorized

View File

@ -212,12 +212,7 @@ Status VScanner::_do_projections(vectorized::Block* origin_block, vectorized::Bl
auto& mutable_columns = mutable_block.mutable_columns();
if (mutable_columns.size() != _projections.size()) {
return Status::InternalError(
"Logical error in scanner, output of projections {} mismatches with "
"scanner output {}",
_projections.size(), mutable_columns.size());
}
DCHECK_EQ(mutable_columns.size(), _projections.size());
for (int i = 0; i < mutable_columns.size(); ++i) {
auto result_column_id = -1;

View File

@ -19,6 +19,7 @@
#include <fmt/format.h>
#include <gen_cpp/Exprs_types.h>
#include <gen_cpp/FrontendService_types.h>
#include <thrift/protocol/TDebugProtocol.h>
#include <algorithm>
@ -32,6 +33,7 @@
#include "vec/columns/column_vector.h"
#include "vec/columns/columns_number.h"
#include "vec/data_types/data_type_factory.hpp"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/exprs/varray_literal.h"
#include "vec/exprs/vcase_expr.h"
@ -407,6 +409,39 @@ Status VExpr::create_expr_trees(const std::vector<TExpr>& texprs, VExprContextSP
return Status::OK();
}
Status VExpr::check_expr_output_type(const VExprContextSPtrs& ctxs,
const RowDescriptor& output_row_desc) {
if (ctxs.empty()) {
return Status::OK();
}
auto name_and_types = VectorizedUtils::create_name_and_data_types(output_row_desc);
if (ctxs.size() != name_and_types.size()) {
return Status::InternalError(
"output type size not match expr size {} , expected output size {} ", ctxs.size(),
name_and_types.size());
}
auto check_type_can_be_converted = [](DataTypePtr& from, DataTypePtr& to) -> bool {
if (to->equals(*from)) {
return true;
}
if (to->is_nullable() && !from->is_nullable()) {
return remove_nullable(to)->equals(*from);
}
return false;
};
for (int i = 0; i < ctxs.size(); i++) {
auto real_expr_type = ctxs[i]->root()->data_type();
auto&& [name, expected_type] = name_and_types[i];
if (!check_type_can_be_converted(real_expr_type, expected_type)) {
return Status::InternalError(
"output type not match expr type , col name {} , expected type {} , real type "
"{}",
name, expected_type->get_name(), real_expr_type->get_name());
}
}
return Status::OK();
}
Status VExpr::prepare(const VExprContextSPtrs& ctxs, RuntimeState* state,
const RowDescriptor& row_desc) {
for (auto ctx : ctxs) {

View File

@ -162,6 +162,9 @@ public:
static Status create_tree_from_thrift(const std::vector<TExprNode>& nodes, int* node_idx,
VExprSPtr& root_expr, VExprContextSPtr& ctx);
static Status check_expr_output_type(const VExprContextSPtrs& ctxs,
const RowDescriptor& output_row_desc);
virtual const VExprSPtrs& children() const { return _children; }
void set_children(const VExprSPtrs& children) { _children = children; }
void set_children(VExprSPtrs&& children) { _children = std::move(children); }

View File

@ -66,12 +66,12 @@ public:
}
return MutableBlock(block);
}
static ColumnsWithTypeAndName create_columns_with_type_and_name(
const RowDescriptor& row_desc, bool ignore_trivial_slot = true) {
static ColumnsWithTypeAndName create_columns_with_type_and_name(const RowDescriptor& row_desc) {
ColumnsWithTypeAndName columns_with_type_and_name;
for (const auto& tuple_desc : row_desc.tuple_descriptors()) {
for (const auto& slot_desc : tuple_desc->slots()) {
if (ignore_trivial_slot && !slot_desc->need_materialize()) {
if (!slot_desc->need_materialize()) {
continue;
}
columns_with_type_and_name.emplace_back(nullptr, slot_desc->get_data_type_ptr(),
@ -81,6 +81,19 @@ public:
return columns_with_type_and_name;
}
static NameAndTypePairs create_name_and_data_types(const RowDescriptor& row_desc) {
NameAndTypePairs name_with_types;
for (const auto& tuple_desc : row_desc.tuple_descriptors()) {
for (const auto& slot_desc : tuple_desc->slots()) {
if (!slot_desc->need_materialize()) {
continue;
}
name_with_types.emplace_back(slot_desc->col_name(), slot_desc->get_data_type_ptr());
}
}
return name_with_types;
}
static ColumnsWithTypeAndName create_empty_block(const RowDescriptor& row_desc,
bool ignore_trivial_slot = true) {
ColumnsWithTypeAndName columns_with_type_and_name;