From f7d52b5b1cc2e3d5fe483c060412bcea87feb599 Mon Sep 17 00:00:00 2001 From: Mryange <59914473+Mryange@users.noreply.github.com> Date: Tue, 9 Apr 2024 15:57:49 +0800 Subject: [PATCH] [feature](expr) add type check when expr prepare (#33330) --- be/src/exec/exec_node.cpp | 12 +++---- be/src/pipeline/pipeline_x/operator.cpp | 5 +++ be/src/pipeline/pipeline_x/operator.h | 2 ++ be/src/vec/core/columns_with_type_and_name.h | 2 +- be/src/vec/exec/scan/vscanner.cpp | 7 +--- be/src/vec/exprs/vexpr.cpp | 35 ++++++++++++++++++++ be/src/vec/exprs/vexpr.h | 3 ++ be/src/vec/utils/util.hpp | 19 +++++++++-- 8 files changed, 69 insertions(+), 16 deletions(-) diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index 63b88aa9de..7fdda0c5c8 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -170,6 +170,11 @@ Status ExecNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(vectorized::VExpr::prepare(_projections, state, projections_row_desc())); + if (has_output_row_descriptor()) { + RETURN_IF_ERROR( + vectorized::VExpr::check_expr_output_type(_projections, *_output_row_descriptor)); + } + for (auto& i : _children) { RETURN_IF_ERROR(i->prepare(state)); } @@ -582,12 +587,7 @@ Status ExecNode::do_projections(vectorized::Block* origin_block, vectorized::Blo auto& mutable_columns = mutable_block.mutable_columns(); - if (mutable_columns.size() != _projections.size()) { - return Status::InternalError( - "Logical error during processing {}, output of projections {} mismatches with " - "exec node output {}", - this->get_name(), _projections.size(), mutable_columns.size()); - } + DCHECK_EQ(mutable_columns.size(), _projections.size()); for (int i = 0; i < mutable_columns.size(); ++i) { auto result_column_id = -1; diff --git a/be/src/pipeline/pipeline_x/operator.cpp b/be/src/pipeline/pipeline_x/operator.cpp index 4a16cb65a0..08f0c4b73c 100644 --- a/be/src/pipeline/pipeline_x/operator.cpp +++ b/be/src/pipeline/pipeline_x/operator.cpp @@ -152,6 +152,11 @@ Status OperatorXBase::prepare(RuntimeState* state) { } RETURN_IF_ERROR(vectorized::VExpr::prepare(_projections, state, projections_row_desc())); + if (has_output_row_desc()) { + RETURN_IF_ERROR( + vectorized::VExpr::check_expr_output_type(_projections, *_output_row_descriptor)); + } + if (_child_x && !is_source()) { RETURN_IF_ERROR(_child_x->prepare(state)); } diff --git a/be/src/pipeline/pipeline_x/operator.h b/be/src/pipeline/pipeline_x/operator.h index 45e42390bc..7a0a5d1217 100644 --- a/be/src/pipeline/pipeline_x/operator.h +++ b/be/src/pipeline/pipeline_x/operator.h @@ -327,6 +327,8 @@ public: return _output_row_descriptor.get(); } + bool has_output_row_desc() const { return _output_row_descriptor != nullptr; } + [[nodiscard]] bool is_source() const override { return false; } [[nodiscard]] virtual Status get_block_after_projects(RuntimeState* state, diff --git a/be/src/vec/core/columns_with_type_and_name.h b/be/src/vec/core/columns_with_type_and_name.h index c70775fcae..82eae3158a 100644 --- a/be/src/vec/core/columns_with_type_and_name.h +++ b/be/src/vec/core/columns_with_type_and_name.h @@ -31,5 +31,5 @@ namespace doris::vectorized { using ColumnsWithTypeAndName = std::vector; using NameAndTypePair = std::pair; - +using NameAndTypePairs = std::vector; } // namespace doris::vectorized diff --git a/be/src/vec/exec/scan/vscanner.cpp b/be/src/vec/exec/scan/vscanner.cpp index de0b6b4569..ed972badb8 100644 --- a/be/src/vec/exec/scan/vscanner.cpp +++ b/be/src/vec/exec/scan/vscanner.cpp @@ -212,12 +212,7 @@ Status VScanner::_do_projections(vectorized::Block* origin_block, vectorized::Bl auto& mutable_columns = mutable_block.mutable_columns(); - if (mutable_columns.size() != _projections.size()) { - return Status::InternalError( - "Logical error in scanner, output of projections {} mismatches with " - "scanner output {}", - _projections.size(), mutable_columns.size()); - } + DCHECK_EQ(mutable_columns.size(), _projections.size()); for (int i = 0; i < mutable_columns.size(); ++i) { auto result_column_id = -1; diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index ee811c65ab..1fc74deb8c 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -32,6 +33,7 @@ #include "vec/columns/column_vector.h" #include "vec/columns/columns_number.h" #include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/exprs/varray_literal.h" #include "vec/exprs/vcase_expr.h" @@ -407,6 +409,39 @@ Status VExpr::create_expr_trees(const std::vector& texprs, VExprContextSP return Status::OK(); } +Status VExpr::check_expr_output_type(const VExprContextSPtrs& ctxs, + const RowDescriptor& output_row_desc) { + if (ctxs.empty()) { + return Status::OK(); + } + auto name_and_types = VectorizedUtils::create_name_and_data_types(output_row_desc); + if (ctxs.size() != name_and_types.size()) { + return Status::InternalError( + "output type size not match expr size {} , expected output size {} ", ctxs.size(), + name_and_types.size()); + } + auto check_type_can_be_converted = [](DataTypePtr& from, DataTypePtr& to) -> bool { + if (to->equals(*from)) { + return true; + } + if (to->is_nullable() && !from->is_nullable()) { + return remove_nullable(to)->equals(*from); + } + return false; + }; + for (int i = 0; i < ctxs.size(); i++) { + auto real_expr_type = ctxs[i]->root()->data_type(); + auto&& [name, expected_type] = name_and_types[i]; + if (!check_type_can_be_converted(real_expr_type, expected_type)) { + return Status::InternalError( + "output type not match expr type , col name {} , expected type {} , real type " + "{}", + name, expected_type->get_name(), real_expr_type->get_name()); + } + } + return Status::OK(); +} + Status VExpr::prepare(const VExprContextSPtrs& ctxs, RuntimeState* state, const RowDescriptor& row_desc) { for (auto ctx : ctxs) { diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index 42a46d8a8f..57bb4a1cf6 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -162,6 +162,9 @@ public: static Status create_tree_from_thrift(const std::vector& nodes, int* node_idx, VExprSPtr& root_expr, VExprContextSPtr& ctx); + + static Status check_expr_output_type(const VExprContextSPtrs& ctxs, + const RowDescriptor& output_row_desc); virtual const VExprSPtrs& children() const { return _children; } void set_children(const VExprSPtrs& children) { _children = children; } void set_children(VExprSPtrs&& children) { _children = std::move(children); } diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp index 440bbff153..30609799e7 100644 --- a/be/src/vec/utils/util.hpp +++ b/be/src/vec/utils/util.hpp @@ -66,12 +66,12 @@ public: } return MutableBlock(block); } - static ColumnsWithTypeAndName create_columns_with_type_and_name( - const RowDescriptor& row_desc, bool ignore_trivial_slot = true) { + + static ColumnsWithTypeAndName create_columns_with_type_and_name(const RowDescriptor& row_desc) { ColumnsWithTypeAndName columns_with_type_and_name; for (const auto& tuple_desc : row_desc.tuple_descriptors()) { for (const auto& slot_desc : tuple_desc->slots()) { - if (ignore_trivial_slot && !slot_desc->need_materialize()) { + if (!slot_desc->need_materialize()) { continue; } columns_with_type_and_name.emplace_back(nullptr, slot_desc->get_data_type_ptr(), @@ -81,6 +81,19 @@ public: return columns_with_type_and_name; } + static NameAndTypePairs create_name_and_data_types(const RowDescriptor& row_desc) { + NameAndTypePairs name_with_types; + for (const auto& tuple_desc : row_desc.tuple_descriptors()) { + for (const auto& slot_desc : tuple_desc->slots()) { + if (!slot_desc->need_materialize()) { + continue; + } + name_with_types.emplace_back(slot_desc->col_name(), slot_desc->get_data_type_ptr()); + } + } + return name_with_types; + } + static ColumnsWithTypeAndName create_empty_block(const RowDescriptor& row_desc, bool ignore_trivial_slot = true) { ColumnsWithTypeAndName columns_with_type_and_name;