From 9f8de896597087281b2e75e5f757579c7dfd1ab9 Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Mon, 29 May 2023 11:47:31 +0800 Subject: [PATCH] [refactor](exec) replace the single pointer with an array of 'conjuncts' in ExecNode (#19758) Refactoring the filtering conditions in the current ExecNode from an expression tree to an array can simplify the process of adding runtime filters. It eliminates the need for complex merge operations and removes the requirement for the frontend to combine expressions into a single entity. By representing the filtering conditions as an array, each condition can be treated individually, making it easier to add runtime filters without the need for complex merging logic. The array can store the individual conditions, and the runtime filter logic can iterate through the array to apply the filters as needed. This refactoring simplifies the codebase, improves readability, and reduces the complexity associated with handling filtering conditions and adding runtime filters. It separates the conditions into discrete entities, enabling more straightforward manipulation and management within the execution node. --- be/src/exec/data_sink.cpp | 6 +- be/src/exec/exec_node.cpp | 27 +- be/src/exec/exec_node.h | 9 +- be/src/exec/scan_node.cpp | 24 -- be/src/exec/scan_node.h | 4 - be/src/exec/table_connector.cpp | 18 +- be/src/exec/table_connector.h | 11 +- be/src/exec/tablet_info.cpp | 4 +- be/src/exec/tablet_info.h | 7 +- be/src/exprs/runtime_filter.cpp | 113 ++++----- be/src/exprs/runtime_filter.h | 10 +- be/src/exprs/runtime_filter_slots.h | 8 +- be/src/exprs/runtime_filter_slots_cross.h | 6 +- be/src/olap/iterators.h | 3 +- be/src/olap/push_handler.cpp | 30 ++- be/src/olap/push_handler.h | 10 +- be/src/olap/reader.cpp | 6 +- be/src/olap/reader.h | 5 +- be/src/olap/rowset/beta_rowset_reader.cpp | 4 +- be/src/olap/rowset/rowset_reader_context.h | 6 +- .../rowset/segment_v2/segment_iterator.cpp | 107 +++----- .../olap/rowset/segment_v2/segment_iterator.h | 11 +- be/src/olap/schema_change.cpp | 11 +- be/src/runtime/fold_constant_executor.cpp | 6 +- be/src/service/point_query_executor.cpp | 5 +- be/src/service/point_query_executor.h | 9 +- be/src/vec/common/sort/vsort_exec_exprs.cpp | 15 +- be/src/vec/common/sort/vsort_exec_exprs.h | 22 +- be/src/vec/core/sort_cursor.h | 4 +- be/src/vec/exec/format/generic_reader.h | 2 +- be/src/vec/exec/format/orc/vorc_reader.cpp | 53 ++-- be/src/vec/exec/format/orc/vorc_reader.h | 12 +- .../format/parquet/vparquet_group_reader.cpp | 63 ++--- .../format/parquet/vparquet_group_reader.h | 29 ++- .../exec/format/parquet/vparquet_reader.cpp | 34 +-- .../vec/exec/format/parquet/vparquet_reader.h | 12 +- .../vec/exec/format/table/iceberg_reader.cpp | 14 +- be/src/vec/exec/format/table/iceberg_reader.h | 8 +- .../exec/join/process_hash_table_probe_impl.h | 15 +- be/src/vec/exec/join/vhash_join_node.cpp | 40 +-- be/src/vec/exec/join/vhash_join_node.h | 10 +- be/src/vec/exec/join/vjoin_node_base.cpp | 11 +- be/src/vec/exec/join/vjoin_node_base.h | 6 +- .../vec/exec/join/vnested_loop_join_node.cpp | 97 ++------ be/src/vec/exec/join/vnested_loop_join_node.h | 4 +- be/src/vec/exec/scan/new_es_scan_node.cpp | 2 +- be/src/vec/exec/scan/new_es_scanner.cpp | 4 +- be/src/vec/exec/scan/new_es_scanner.h | 2 +- be/src/vec/exec/scan/new_file_scan_node.cpp | 4 +- be/src/vec/exec/scan/new_jdbc_scan_node.cpp | 2 +- be/src/vec/exec/scan/new_jdbc_scanner.cpp | 7 +- be/src/vec/exec/scan/new_jdbc_scanner.h | 2 +- be/src/vec/exec/scan/new_odbc_scan_node.cpp | 2 +- be/src/vec/exec/scan/new_odbc_scanner.cpp | 7 +- be/src/vec/exec/scan/new_odbc_scanner.h | 2 +- be/src/vec/exec/scan/new_olap_scan_node.cpp | 14 +- be/src/vec/exec/scan/new_olap_scanner.cpp | 34 +-- be/src/vec/exec/scan/vfile_scanner.cpp | 128 +++++----- be/src/vec/exec/scan/vfile_scanner.h | 18 +- be/src/vec/exec/scan/vmeta_scan_node.cpp | 2 +- be/src/vec/exec/scan/vmeta_scanner.cpp | 4 +- be/src/vec/exec/scan/vmeta_scanner.h | 2 +- be/src/vec/exec/scan/vscan_node.cpp | 234 ++++++++---------- be/src/vec/exec/scan/vscan_node.h | 28 ++- be/src/vec/exec/scan/vscanner.cpp | 31 ++- be/src/vec/exec/scan/vscanner.h | 23 +- be/src/vec/exec/vaggregation_node.cpp | 17 +- be/src/vec/exec/vaggregation_node.h | 6 +- be/src/vec/exec/vanalytic_eval_node.cpp | 22 +- be/src/vec/exec/vanalytic_eval_node.h | 14 +- be/src/vec/exec/vdata_gen_scan_node.cpp | 2 +- be/src/vec/exec/vjdbc_connector.cpp | 3 +- be/src/vec/exec/vjdbc_connector.h | 3 +- be/src/vec/exec/vpartition_sort_node.cpp | 4 +- be/src/vec/exec/vpartition_sort_node.h | 2 +- be/src/vec/exec/vrepeat_node.cpp | 4 +- be/src/vec/exec/vrepeat_node.h | 4 +- be/src/vec/exec/vschema_scan_node.cpp | 2 +- be/src/vec/exec/vselect_node.cpp | 3 +- be/src/vec/exec/vset_operation_node.cpp | 9 +- be/src/vec/exec/vset_operation_node.h | 2 +- be/src/vec/exec/vtable_function_node.cpp | 11 +- be/src/vec/exec/vtable_function_node.h | 2 +- be/src/vec/exec/vunion_node.cpp | 18 +- be/src/vec/exec/vunion_node.h | 8 +- .../exprs/lambda_function/lambda_function.h | 2 +- .../varray_filter_function.cpp | 2 +- .../lambda_function/varray_map_function.cpp | 2 +- .../vec/exprs/table_function/table_function.h | 4 +- be/src/vec/exprs/table_function/vexplode.cpp | 8 +- .../exprs/table_function/vexplode_bitmap.cpp | 8 +- .../table_function/vexplode_json_array.cpp | 8 +- .../exprs/table_function/vexplode_numbers.cpp | 8 +- .../exprs/table_function/vexplode_split.cpp | 12 +- be/src/vec/exprs/varray_literal.cpp | 4 +- be/src/vec/exprs/vbitmap_predicate.h | 4 +- be/src/vec/exprs/vbloom_predicate.h | 4 +- be/src/vec/exprs/vcase_expr.cpp | 2 +- be/src/vec/exprs/vcase_expr.h | 5 +- be/src/vec/exprs/vcast_expr.cpp | 2 +- be/src/vec/exprs/vcast_expr.h | 4 +- be/src/vec/exprs/vcolumn_ref.h | 4 +- be/src/vec/exprs/vcompound_pred.h | 10 +- be/src/vec/exprs/vdirect_in_predicate.h | 4 +- be/src/vec/exprs/vectorized_agg_fn.cpp | 6 +- be/src/vec/exprs/vectorized_agg_fn.h | 6 +- be/src/vec/exprs/vectorized_fn_call.cpp | 2 +- be/src/vec/exprs/vectorized_fn_call.h | 4 +- be/src/vec/exprs/vexpr.cpp | 140 +++++------ be/src/vec/exprs/vexpr.h | 49 ++-- be/src/vec/exprs/vexpr_context.cpp | 86 +++++-- be/src/vec/exprs/vexpr_context.h | 69 +++++- be/src/vec/exprs/vexpr_fwd.h | 33 +++ be/src/vec/exprs/vin_predicate.h | 4 +- be/src/vec/exprs/vinfo_func.h | 4 +- be/src/vec/exprs/vlambda_function_call_expr.h | 6 +- be/src/vec/exprs/vlambda_function_expr.h | 4 +- be/src/vec/exprs/vliteral.h | 4 +- be/src/vec/exprs/vmap_literal.cpp | 6 +- be/src/vec/exprs/vmap_literal.h | 1 - be/src/vec/exprs/vruntimefilter_wrapper.cpp | 2 +- be/src/vec/exprs/vruntimefilter_wrapper.h | 12 +- be/src/vec/exprs/vschema_change_expr.h | 5 +- be/src/vec/exprs/vslot_ref.h | 4 +- be/src/vec/exprs/vstruct_literal.cpp | 4 +- be/src/vec/exprs/vtuple_is_null_predicate.h | 4 +- be/src/vec/olap/vcollect_iterator.cpp | 11 +- be/src/vec/runtime/vdata_stream_recvr.cpp | 2 +- be/src/vec/runtime/vdata_stream_recvr.h | 4 +- be/src/vec/runtime/vfile_result_writer.cpp | 13 +- be/src/vec/runtime/vfile_result_writer.h | 7 +- be/src/vec/runtime/vorc_writer.cpp | 2 +- be/src/vec/runtime/vorc_writer.h | 4 +- be/src/vec/runtime/vparquet_writer.cpp | 2 +- be/src/vec/runtime/vparquet_writer.h | 10 +- be/src/vec/runtime/vsorted_run_merger.cpp | 2 +- be/src/vec/runtime/vsorted_run_merger.h | 11 +- be/src/vec/sink/vdata_stream_sender.cpp | 4 +- be/src/vec/sink/vdata_stream_sender.h | 2 +- be/src/vec/sink/vmemory_scratch_sink.cpp | 7 +- be/src/vec/sink/vmemory_scratch_sink.h | 9 +- be/src/vec/sink/vmysql_result_writer.cpp | 6 +- be/src/vec/sink/vmysql_result_writer.h | 7 +- be/src/vec/sink/vmysql_table_writer.cpp | 2 +- be/src/vec/sink/vmysql_table_writer.h | 6 +- be/src/vec/sink/vresult_file_sink.cpp | 3 +- be/src/vec/sink/vresult_file_sink.h | 2 +- be/src/vec/sink/vresult_sink.cpp | 5 +- be/src/vec/sink/vresult_sink.h | 4 +- be/src/vec/sink/vtable_sink.cpp | 2 +- be/src/vec/sink/vtable_sink.h | 4 +- be/src/vec/sink/vtablet_sink.cpp | 2 +- be/src/vec/sink/vtablet_sink.h | 14 +- be/src/vec/utils/util.hpp | 29 --- be/test/exprs/mock_vexpr.h | 2 +- .../serde/data_type_serde_mysql_test.cpp | 6 +- .../vec/exec/parquet/parquet_reader_test.cpp | 4 +- be/test/vec/exprs/vexpr_test.cpp | 8 +- be/test/vec/function/table_function_test.cpp | 22 +- .../doris/analysis/CompoundPredicate.java | 2 +- .../java/org/apache/doris/analysis/Expr.java | 2 +- .../doris/load/loadv2/LoadingTaskPlanner.java | 1 - .../translator/PhysicalPlanTranslator.java | 1 - .../doris/planner/FileLoadScanNode.java | 6 +- .../apache/doris/planner/HashJoinNode.java | 23 +- .../apache/doris/planner/JoinNodeBase.java | 4 - .../doris/planner/NestedLoopJoinNode.java | 20 +- .../apache/doris/planner/OlapScanNode.java | 12 +- .../apache/doris/planner/OriginalPlanner.java | 5 - .../org/apache/doris/planner/PlanNode.java | 41 +-- .../doris/planner/SingleNodePlanner.java | 14 +- .../doris/planner/StreamLoadPlanner.java | 1 - gensrc/thrift/PlanNodes.thrift | 3 + .../suites/query_p0/join/test_join.groovy | 7 +- 174 files changed, 1179 insertions(+), 1332 deletions(-) create mode 100644 be/src/vec/exprs/vexpr_fwd.h diff --git a/be/src/exec/data_sink.cpp b/be/src/exec/data_sink.cpp index a8e88a38c3..b0d75067e0 100644 --- a/be/src/exec/data_sink.cpp +++ b/be/src/exec/data_sink.cpp @@ -110,8 +110,7 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink return Status::InternalError("Missing data buffer sink."); } - tmp_sink = new vectorized::MemoryScratchSink(row_desc, output_exprs, - thrift_sink.memory_scratch_sink, pool); + tmp_sink = new vectorized::MemoryScratchSink(row_desc, output_exprs); sink->reset(tmp_sink); break; } @@ -254,8 +253,7 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink return Status::InternalError("Missing data buffer sink."); } - tmp_sink = new vectorized::MemoryScratchSink(row_desc, output_exprs, - thrift_sink.memory_scratch_sink, pool); + tmp_sink = new vectorized::MemoryScratchSink(row_desc, output_exprs); sink->reset(tmp_sink); break; } diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index b74fbd23b1..ae6e2944a7 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -105,15 +105,21 @@ Status ExecNode::init(const TPlanNode& tnode, RuntimeState* state) { init_runtime_profile(get_name()); if (tnode.__isset.vconjunct) { - RETURN_IF_ERROR(doris::vectorized::VExpr::create_expr_tree(_pool, tnode.vconjunct, - &_vconjunct_ctx_ptr)); + vectorized::VExprContextSPtr context; + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(tnode.vconjunct, context)); + _conjuncts.emplace_back(context); + } else if (tnode.__isset.conjuncts) { + for (auto& conjunct : tnode.conjuncts) { + vectorized::VExprContextSPtr context; + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(conjunct, context)); + _conjuncts.emplace_back(context); + } } // create the projections expr if (tnode.__isset.projections) { DCHECK(tnode.__isset.output_tuple_id); - RETURN_IF_ERROR( - vectorized::VExpr::create_expr_trees(_pool, tnode.projections, &_projections)); + RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(tnode.projections, _projections)); } return Status::OK(); @@ -133,8 +139,8 @@ Status ExecNode::prepare(RuntimeState* state) { _mem_tracker = std::make_unique("ExecNode:" + _runtime_profile->name(), _runtime_profile.get(), nullptr, "PeakMemoryUsage"); - if (_vconjunct_ctx_ptr != nullptr) { - RETURN_IF_ERROR(_vconjunct_ctx_ptr->prepare(state, intermediate_row_desc())); + for (auto& conjunct : _conjuncts) { + RETURN_IF_ERROR(conjunct->prepare(state, intermediate_row_desc())); } RETURN_IF_ERROR(vectorized::VExpr::prepare(_projections, state, intermediate_row_desc())); @@ -147,8 +153,8 @@ Status ExecNode::prepare(RuntimeState* state) { } Status ExecNode::alloc_resource(doris::RuntimeState* state) { - if (_vconjunct_ctx_ptr != nullptr) { - RETURN_IF_ERROR(_vconjunct_ctx_ptr->open(state)); + for (auto& conjunct : _conjuncts) { + RETURN_IF_ERROR(conjunct->open(state)); } RETURN_IF_ERROR(vectorized::VExpr::open(_projections, state)); return Status::OK(); @@ -180,9 +186,10 @@ void ExecNode::release_resource(doris::RuntimeState* state) { COUNTER_SET(_rows_returned_counter, _num_rows_returned); } - if (_vconjunct_ctx_ptr != nullptr) { - _vconjunct_ctx_ptr->close(state); + for (auto& conjunct : _conjuncts) { + conjunct->close(state); } + vectorized::VExpr::close(_projections, state); runtime_profile()->add_to_span(_span); diff --git a/be/src/exec/exec_node.h b/be/src/exec/exec_node.h index d119609ab5..7f709de0e6 100644 --- a/be/src/exec/exec_node.h +++ b/be/src/exec/exec_node.h @@ -38,6 +38,7 @@ #include "util/runtime_profile.h" #include "util/telemetry/telemetry.h" #include "vec/core/block.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class ObjectPool; @@ -45,10 +46,6 @@ class RuntimeState; class MemTracker; class QueryStatistics; -namespace vectorized { -class VExprContext; -} // namespace vectorized - namespace pipeline { class OperatorBase; } // namespace pipeline @@ -262,14 +259,14 @@ protected: ObjectPool* _pool; std::vector _tuple_ids; - doris::vectorized::VExprContext* _vconjunct_ctx_ptr = nullptr; + vectorized::VExprContextSPtrs _conjuncts; std::vector _children; RowDescriptor _row_descriptor; vectorized::Block _origin_block; std::unique_ptr _output_row_descriptor; - std::vector _projections; + vectorized::VExprContextSPtrs _projections; /// Resource information sent from the frontend. const TBackendResourceProfile _resource_profile; diff --git a/be/src/exec/scan_node.cpp b/be/src/exec/scan_node.cpp index 00496b306f..9271c44e00 100644 --- a/be/src/exec/scan_node.cpp +++ b/be/src/exec/scan_node.cpp @@ -54,28 +54,4 @@ Status ScanNode::prepare(RuntimeState* state) { return Status::OK(); } -// This function is used to remove pushed expr in expr tree. -// It relies on the logic of function convertConjunctsToAndCompoundPredicate() of FE splicing expr. -// It requires FE to satisfy each splicing with 'and' expr, and spliced from left to right, in order. -// Expr tree specific forms do not require requirements. -void ScanNode::_peel_pushed_vconjunct(RuntimeState* state, - const std::function& checker) { - if (_vconjunct_ctx_ptr == nullptr) { - return; - } - - int leaf_index = 0; - vectorized::VExpr* conjunct_expr_root = _vconjunct_ctx_ptr->root(); - - if (conjunct_expr_root != nullptr) { - vectorized::VExpr* new_conjunct_expr_root = vectorized::VectorizedUtils::dfs_peel_conjunct( - state, _vconjunct_ctx_ptr, conjunct_expr_root, leaf_index, checker); - if (new_conjunct_expr_root == nullptr) { - _vconjunct_ctx_ptr->close(state); - } else { - _vconjunct_ctx_ptr->set_root(new_conjunct_expr_root); - } - } -} - } // namespace doris diff --git a/be/src/exec/scan_node.h b/be/src/exec/scan_node.h index 6df382a975..ddad887b66 100644 --- a/be/src/exec/scan_node.h +++ b/be/src/exec/scan_node.h @@ -100,10 +100,6 @@ public: static const std::string _s_num_disks_accessed_counter; protected: - void _peel_pushed_vconjunct( - RuntimeState* state, - const std::function& checker); // remove pushed expr from conjunct tree - RuntimeProfile::Counter* _bytes_read_counter; // # bytes read from the scanner RuntimeProfile::Counter* _rows_read_counter; // Wall based aggregate read throughput [bytes/sec] diff --git a/be/src/exec/table_connector.cpp b/be/src/exec/table_connector.cpp index 82b82e077c..667618df82 100644 --- a/be/src/exec/table_connector.cpp +++ b/be/src/exec/table_connector.cpp @@ -97,7 +97,7 @@ std::u16string TableConnector::utf8_to_u16string(const char* first, const char* } Status TableConnector::append(const std::string& table_name, vectorized::Block* block, - const std::vector& output_vexpr_ctxs, + const vectorized::VExprContextSPtrs& output_vexpr_ctxs, uint32_t start_send_row, uint32_t* num_rows_sent, TOdbcTableType::type table_type) { _insert_stmt_buffer.clear(); @@ -153,10 +153,10 @@ Status TableConnector::append(const std::string& table_name, vectorized::Block* return Status::OK(); } -Status TableConnector::oracle_type_append( - const std::string& table_name, vectorized::Block* block, - const std::vector& output_vexpr_ctxs, uint32_t start_send_row, - uint32_t* num_rows_sent, TOdbcTableType::type table_type) { +Status TableConnector::oracle_type_append(const std::string& table_name, vectorized::Block* block, + const vectorized::VExprContextSPtrs& output_vexpr_ctxs, + uint32_t start_send_row, uint32_t* num_rows_sent, + TOdbcTableType::type table_type) { fmt::format_to(_insert_stmt_buffer, "INSERT ALL "); int num_rows = block->rows(); int num_columns = block->columns(); @@ -185,10 +185,10 @@ Status TableConnector::oracle_type_append( return Status::OK(); } -Status TableConnector::sap_hana_type_append( - const std::string& table_name, vectorized::Block* block, - const std::vector& output_vexpr_ctxs, uint32_t start_send_row, - uint32_t* num_rows_sent, TOdbcTableType::type table_type) { +Status TableConnector::sap_hana_type_append(const std::string& table_name, vectorized::Block* block, + const vectorized::VExprContextSPtrs& output_vexpr_ctxs, + uint32_t start_send_row, uint32_t* num_rows_sent, + TOdbcTableType::type table_type) { fmt::format_to(_insert_stmt_buffer, "INSERT INTO {} ", table_name); int num_rows = block->rows(); int num_columns = block->columns(); diff --git a/be/src/exec/table_connector.h b/be/src/exec/table_connector.h index 8a1cdff7e3..d8c6e01075 100644 --- a/be/src/exec/table_connector.h +++ b/be/src/exec/table_connector.h @@ -29,6 +29,7 @@ #include "util/runtime_profile.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/data_types/data_type.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class RuntimeState; @@ -36,7 +37,6 @@ class TupleDescriptor; namespace vectorized { class Block; -class VExprContext; } // namespace vectorized // Table Connector for scan data from ODBC/JDBC @@ -59,9 +59,8 @@ public: //write data into table vectorized Status append(const std::string& table_name, vectorized::Block* block, - const std::vector& _output_vexpr_ctxs, - uint32_t start_send_row, uint32_t* num_rows_sent, - TOdbcTableType::type table_type = TOdbcTableType::MYSQL); + const vectorized::VExprContextSPtrs& _output_vexpr_ctxs, uint32_t start_send_row, + uint32_t* num_rows_sent, TOdbcTableType::type table_type = TOdbcTableType::MYSQL); void init_profile(RuntimeProfile*); @@ -95,11 +94,11 @@ private: // insert into tables values (...),(...); // Here we do something special for Oracle and SAP Hana. Status oracle_type_append(const std::string& table_name, vectorized::Block* block, - const std::vector& output_vexpr_ctxs, + const vectorized::VExprContextSPtrs& output_vexpr_ctxs, uint32_t start_send_row, uint32_t* num_rows_sent, TOdbcTableType::type table_type); Status sap_hana_type_append(const std::string& table_name, vectorized::Block* block, - const std::vector& output_vexpr_ctxs, + const vectorized::VExprContextSPtrs& output_vexpr_ctxs, uint32_t start_send_row, uint32_t* num_rows_sent, TOdbcTableType::type table_type); }; diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index 4191865f2a..a4829a0d05 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -162,8 +162,8 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) { } } if (t_index.__isset.where_clause) { - RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(&_obj_pool, t_index.where_clause, - &index->where_clause)); + RETURN_IF_ERROR( + vectorized::VExpr::create_expr_tree(t_index.where_clause, index->where_clause)); } _indexes.emplace_back(index); } diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h index bcfc5541eb..4b79ef6482 100644 --- a/be/src/exec/tablet_info.h +++ b/be/src/exec/tablet_info.h @@ -35,6 +35,7 @@ #include "vec/columns/column.h" #include "vec/core/block.h" #include "vec/core/column_with_type_and_name.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class MemTracker; @@ -44,17 +45,13 @@ class TabletColumn; class TabletIndex; class TupleDescriptor; -namespace vectorized { -class VExprContext; -} // namespace vectorized - struct OlapTableIndexSchema { int64_t index_id; std::vector slots; int32_t schema_hash; std::vector columns; std::vector indexes; - vectorized::VExprContext* where_clause = nullptr; + vectorized::VExprContextSPtr where_clause; void to_protobuf(POlapTableIndexSchema* pindex) const; }; diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index 13bc456a29..f8a3ffc8d4 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -192,7 +192,7 @@ PFilterType get_type(RuntimeFilterType type) { } } -Status create_literal(ObjectPool* pool, const TypeDescriptor& type, const void* data, void** expr) { +Status create_literal(const TypeDescriptor& type, const void* data, vectorized::VExprSPtr& expr) { TExprNode node; switch (type.type) { @@ -278,8 +278,7 @@ Status create_literal(ObjectPool* pool, const TypeDescriptor& type, const void* } try { - *reinterpret_cast(expr) = - pool->add(vectorized::VLiteral::create_unique(node).release()); + expr = vectorized::VLiteral::create_shared(node); } catch (const Exception& e) { return Status::Error(e.code(), e.to_string()); } @@ -287,8 +286,8 @@ Status create_literal(ObjectPool* pool, const TypeDescriptor& type, const void* return Status::OK(); } -Status create_vbin_predicate(ObjectPool* pool, const TypeDescriptor& type, TExprOpcode::type opcode, - vectorized::VExpr** expr, TExprNode* tnode) { +Status create_vbin_predicate(const TypeDescriptor& type, TExprOpcode::type opcode, + vectorized::VExprSPtr& expr, TExprNode* tnode) { TExprNode node; TScalarType tscalar_type; tscalar_type.__set_type(TPrimitiveType::BOOLEAN); @@ -344,7 +343,7 @@ Status create_vbin_predicate(ObjectPool* pool, const TypeDescriptor& type, TExpr fn.__set_has_var_args(false); node.__set_fn(fn); *tnode = node; - return vectorized::VExpr::create_expr(pool, node, expr); + return vectorized::VExpr::create_expr(node, expr); } // This class is a wrapper of runtime predicate function class RuntimePredicateWrapper { @@ -605,8 +604,8 @@ public: return 0; } - Status get_push_vexprs(std::vector* container, - vectorized::VExprContext* prob_expr); + Status get_push_exprs(std::vector* container, + const vectorized::VExprContextSPtr& prob_expr); Status merge(const RuntimePredicateWrapper* wrapper) { bool can_not_merge_in_or_bloom = _filter_type == RuntimeFilterType::IN_OR_BLOOM_FILTER && @@ -1165,20 +1164,20 @@ void IRuntimeFilter::publish_finally() { join_rpc(); } -Status IRuntimeFilter::get_push_expr_ctxs(std::vector* push_vexprs) { +Status IRuntimeFilter::get_push_expr_ctxs(std::vector* push_exprs) { DCHECK(is_consumer()); if (!_is_ignored) { _set_push_down(); _profile->add_info_string("Info", _format_status()); - return _wrapper->get_push_vexprs(push_vexprs, _vprobe_ctx); + return _wrapper->get_push_exprs(push_exprs, _vprobe_ctx); } else { _profile->add_info_string("Info", _format_status()); return Status::OK(); } } -Status IRuntimeFilter::get_prepared_vexprs(std::vector* vexprs, - const RowDescriptor& desc, RuntimeState* state) { +Status IRuntimeFilter::get_prepared_exprs(std::vector* vexprs, + const RowDescriptor& desc, RuntimeState* state) { _profile->add_info_string("Info", _format_status()); if (_is_ignored) { return Status::OK(); @@ -1190,7 +1189,7 @@ Status IRuntimeFilter::get_prepared_vexprs(std::vector* vexp std::lock_guard guard(_inner_mutex); if (_push_down_vexprs.empty()) { - RETURN_IF_ERROR(_wrapper->get_push_vexprs(&_push_down_vexprs, _vprobe_ctx)); + RETURN_IF_ERROR(_wrapper->get_push_exprs(&_push_down_vexprs, _vprobe_ctx)); } vexprs->insert(vexprs->end(), _push_down_vexprs.begin(), _push_down_vexprs.end()); return Status::OK(); @@ -1348,8 +1347,8 @@ Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQue _expr_order = desc->expr_order; _filter_id = desc->filter_id; _opt_remote_rf = desc->__isset.opt_remote_rf && desc->opt_remote_rf; - vectorized::VExprContext* build_ctx = nullptr; - RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(_pool, desc->src_expr, &build_ctx)); + vectorized::VExprContextSPtr build_ctx; + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(desc->src_expr, build_ctx)); RuntimeFilterParams params; params.filter_id = _filter_id; @@ -1373,9 +1372,9 @@ Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQue if (!desc->__isset.bitmap_target_expr) { return Status::InvalidArgument("Unknown bitmap filter target expr."); } - vectorized::VExprContext* bitmap_target_ctx = nullptr; - RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(_pool, desc->bitmap_target_expr, - &bitmap_target_ctx)); + vectorized::VExprContextSPtr bitmap_target_ctx; + RETURN_IF_ERROR( + vectorized::VExpr::create_expr_tree(desc->bitmap_target_expr, bitmap_target_ctx)); params.column_return_type = bitmap_target_ctx->root()->type().type; if (desc->__isset.bitmap_filter_not_in) { @@ -1390,7 +1389,7 @@ Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQue DCHECK(false) << "runtime filter not found node_id:" << node_id; return Status::InternalError("not found a node id"); } - RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(_pool, iter->second, &_vprobe_ctx)); + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(iter->second, _vprobe_ctx)); } if (_state) { @@ -1878,15 +1877,15 @@ Status IRuntimeFilter::consumer_close() { return Status::OK(); } -Status RuntimePredicateWrapper::get_push_vexprs(std::vector* container, - vectorized::VExprContext* vprob_expr) { +Status RuntimePredicateWrapper::get_push_exprs(std::vector* container, + const vectorized::VExprContextSPtr& prob_expr) { DCHECK(container != nullptr); DCHECK(_pool != nullptr); - DCHECK(vprob_expr->root()->type().type == _column_return_type || - (is_string_type(vprob_expr->root()->type().type) && + DCHECK(prob_expr->root()->type().type == _column_return_type || + (is_string_type(prob_expr->root()->type().type) && is_string_type(_column_return_type)) || _filter_type == RuntimeFilterType::BITMAP_FILTER) - << " vprob_expr->root()->type().type: " << vprob_expr->root()->type().type + << " prob_expr->root()->type().type: " << prob_expr->root()->type().type << " _column_return_type: " << _column_return_type << " _filter_type: " << ::doris::to_string(_filter_type); @@ -1905,47 +1904,43 @@ Status RuntimePredicateWrapper::get_push_vexprs(std::vector* node.__set_vector_opcode(to_in_opcode(_column_return_type)); node.__set_is_nullable(false); - auto in_pred = - _pool->add(vectorized::VDirectInPredicate::create_unique(node).release()); + auto in_pred = vectorized::VDirectInPredicate::create_shared(node); in_pred->set_filter(_context.hybrid_set); - auto cloned_vexpr = vprob_expr->root()->clone(_pool); - in_pred->add_child(cloned_vexpr); - auto wrapper = _pool->add( - vectorized::VRuntimeFilterWrapper::create_unique(node, in_pred).release()); + auto cloned_expr = prob_expr->root()->clone(); + in_pred->add_child(cloned_expr); + auto wrapper = vectorized::VRuntimeFilterWrapper::create_shared(node, in_pred); container->push_back(wrapper); } break; } case RuntimeFilterType::MINMAX_FILTER: { - vectorized::VExpr* max_pred = nullptr; + vectorized::VExprSPtr max_pred; // create max filter TExprNode max_pred_node; - RETURN_IF_ERROR(create_vbin_predicate(_pool, vprob_expr->root()->type(), TExprOpcode::LE, - &max_pred, &max_pred_node)); - vectorized::VExpr* max_literal = nullptr; - RETURN_IF_ERROR(create_literal(_pool, vprob_expr->root()->type(), - _context.minmax_func->get_max(), (void**)&max_literal)); - auto cloned_vexpr = vprob_expr->root()->clone(_pool); - max_pred->add_child(cloned_vexpr); + RETURN_IF_ERROR(create_vbin_predicate(prob_expr->root()->type(), TExprOpcode::LE, max_pred, + &max_pred_node)); + vectorized::VExprSPtr max_literal; + RETURN_IF_ERROR(create_literal(prob_expr->root()->type(), _context.minmax_func->get_max(), + max_literal)); + auto cloned_expr = prob_expr->root()->clone(); + max_pred->add_child(cloned_expr); max_pred->add_child(max_literal); container->push_back( - _pool->add(vectorized::VRuntimeFilterWrapper::create_unique(max_pred_node, max_pred) - .release())); + vectorized::VRuntimeFilterWrapper::create_shared(max_pred_node, max_pred)); // create min filter - vectorized::VExpr* min_pred = nullptr; + vectorized::VExprSPtr min_pred; TExprNode min_pred_node; - RETURN_IF_ERROR(create_vbin_predicate(_pool, vprob_expr->root()->type(), TExprOpcode::GE, - &min_pred, &min_pred_node)); - vectorized::VExpr* min_literal = nullptr; - RETURN_IF_ERROR(create_literal(_pool, vprob_expr->root()->type(), - _context.minmax_func->get_min(), (void**)&min_literal)); - cloned_vexpr = vprob_expr->root()->clone(_pool); - min_pred->add_child(cloned_vexpr); + RETURN_IF_ERROR(create_vbin_predicate(prob_expr->root()->type(), TExprOpcode::GE, min_pred, + &min_pred_node)); + vectorized::VExprSPtr min_literal; + RETURN_IF_ERROR(create_literal(prob_expr->root()->type(), _context.minmax_func->get_min(), + min_literal)); + cloned_expr = prob_expr->root()->clone(); + min_pred->add_child(cloned_expr); min_pred->add_child(min_literal); container->push_back( - _pool->add(vectorized::VRuntimeFilterWrapper::create_unique(min_pred_node, min_pred) - .release())); + vectorized::VRuntimeFilterWrapper::create_shared(min_pred_node, min_pred)); break; } case RuntimeFilterType::BLOOM_FILTER: { @@ -1959,12 +1954,11 @@ Status RuntimePredicateWrapper::get_push_vexprs(std::vector* node.__isset.vector_opcode = true; node.__set_vector_opcode(to_in_opcode(_column_return_type)); node.__set_is_nullable(false); - auto bloom_pred = _pool->add(vectorized::VBloomPredicate::create_unique(node).release()); + auto bloom_pred = vectorized::VBloomPredicate::create_shared(node); bloom_pred->set_filter(_context.bloom_filter_func); - auto cloned_vexpr = vprob_expr->root()->clone(_pool); - bloom_pred->add_child(cloned_vexpr); - auto wrapper = _pool->add( - vectorized::VRuntimeFilterWrapper::create_unique(node, bloom_pred).release()); + auto cloned_expr = prob_expr->root()->clone(); + bloom_pred->add_child(cloned_expr); + auto wrapper = vectorized::VRuntimeFilterWrapper::create_shared(node, bloom_pred); container->push_back(wrapper); break; } @@ -1979,12 +1973,11 @@ Status RuntimePredicateWrapper::get_push_vexprs(std::vector* node.__isset.vector_opcode = true; node.__set_vector_opcode(to_in_opcode(_column_return_type)); node.__set_is_nullable(false); - auto bitmap_pred = _pool->add(vectorized::VBitmapPredicate::create_unique(node).release()); + auto bitmap_pred = vectorized::VBitmapPredicate::create_shared(node); bitmap_pred->set_filter(_context.bitmap_filter_func); - auto cloned_vexpr = vprob_expr->root()->clone(_pool); - bitmap_pred->add_child(cloned_vexpr); - auto wrapper = _pool->add( - vectorized::VRuntimeFilterWrapper::create_unique(node, bitmap_pred).release()); + auto cloned_expr = prob_expr->root()->clone(); + bitmap_pred->add_child(cloned_expr); + auto wrapper = vectorized::VRuntimeFilterWrapper::create_shared(node, bitmap_pred); container->push_back(wrapper); break; } diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h index b1dbbfa4b2..8389e8416b 100644 --- a/be/src/exprs/runtime_filter.h +++ b/be/src/exprs/runtime_filter.h @@ -221,10 +221,10 @@ public: RuntimeFilterType type() const { return _runtime_filter_type; } - Status get_push_expr_ctxs(std::vector* push_vexprs); + Status get_push_expr_ctxs(std::vector* push_exprs); - Status get_prepared_vexprs(std::vector* push_vexprs, - const RowDescriptor& desc, RuntimeState* state); + Status get_prepared_exprs(std::vector* push_exprs, + const RowDescriptor& desc, RuntimeState* state); bool is_broadcast_join() const { return _is_broadcast_join; } @@ -381,13 +381,13 @@ protected: // this filter won't filter any data bool _always_true; - doris::vectorized::VExprContext* _vprobe_ctx = nullptr; + doris::vectorized::VExprContextSPtr _vprobe_ctx; // Indicate whether runtime filter expr has been ignored bool _is_ignored; std::string _ignored_msg; - std::vector _push_down_vexprs; + std::vector _push_down_vexprs; struct rpc_context; diff --git a/be/src/exprs/runtime_filter_slots.h b/be/src/exprs/runtime_filter_slots.h index 88fcccc47b..3367e8d44a 100644 --- a/be/src/exprs/runtime_filter_slots.h +++ b/be/src/exprs/runtime_filter_slots.h @@ -32,8 +32,8 @@ namespace doris { template class RuntimeFilterSlotsBase { public: - RuntimeFilterSlotsBase(const std::vector& prob_expr_ctxs, - const std::vector& build_expr_ctxs, + RuntimeFilterSlotsBase(const std::vector>& prob_expr_ctxs, + const std::vector>& build_expr_ctxs, const std::vector& runtime_filter_descs) : _probe_expr_context(prob_expr_ctxs), _build_expr_context(build_expr_ctxs), @@ -247,8 +247,8 @@ public: bool empty() { return !_runtime_filters.size(); } private: - const std::vector& _probe_expr_context; - const std::vector& _build_expr_context; + const std::vector>& _probe_expr_context; + const std::vector>& _build_expr_context; const std::vector& _runtime_filter_descs; // prob_contition index -> [IRuntimeFilter] std::map> _runtime_filters; diff --git a/be/src/exprs/runtime_filter_slots_cross.h b/be/src/exprs/runtime_filter_slots_cross.h index adc9fc1188..1e8c15e713 100644 --- a/be/src/exprs/runtime_filter_slots_cross.h +++ b/be/src/exprs/runtime_filter_slots_cross.h @@ -35,7 +35,7 @@ template class RuntimeFilterSlotsCross { public: RuntimeFilterSlotsCross(const std::vector& runtime_filter_descs, - const std::vector& src_expr_ctxs) + const vectorized::VExprContextSPtrs& src_expr_ctxs) : _runtime_filter_descs(runtime_filter_descs), filter_src_expr_ctxs(src_expr_ctxs) {} ~RuntimeFilterSlotsCross() = default; @@ -56,7 +56,7 @@ public: Status insert(vectorized::Block* block) { for (int i = 0; i < _runtime_filters.size(); ++i) { auto* filter = _runtime_filters[i]; - auto* vexpr_ctx = filter_src_expr_ctxs[i]; + auto& vexpr_ctx = filter_src_expr_ctxs[i]; int result_column_id = -1; RETURN_IF_ERROR(vexpr_ctx->execute(block, &result_column_id)); @@ -102,7 +102,7 @@ public: private: const std::vector& _runtime_filter_descs; - const std::vector filter_src_expr_ctxs; + const vectorized::VExprContextSPtrs filter_src_expr_ctxs; std::vector _runtime_filters; }; diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h index adb1a5feb0..f1b195f8f9 100644 --- a/be/src/olap/iterators.h +++ b/be/src/olap/iterators.h @@ -101,7 +101,8 @@ public: std::vector* read_orderby_key_columns = nullptr; io::IOContext io_ctx; vectorized::VExpr* remaining_vconjunct_root = nullptr; - vectorized::VExprContext* common_vexpr_ctxs_pushdown = nullptr; + std::vector remaining_conjunct_roots; + vectorized::VExprContextSPtrs common_expr_ctxs_push_down; const std::set* output_columns = nullptr; // runtime state RuntimeState* runtime_state = nullptr; diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 0379e33616..2a7f1d2d5d 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -417,14 +417,14 @@ Status PushBrokerReader::next(vectorized::Block* block) { Status PushBrokerReader::close() { _ready = false; - for (auto ctx : _dest_vexpr_ctx) { + for (auto ctx : _dest_expr_ctxs) { if (ctx != nullptr) { ctx->close(_runtime_state.get()); } } - if (_push_down_expr) { - _push_down_expr->close(_runtime_state.get()); + for (auto& expr : _push_down_exprs) { + expr->close(_runtime_state.get()); } for (auto& [k, v] : _slot_id_to_filter_conjuncts) { @@ -435,7 +435,7 @@ Status PushBrokerReader::close() { } } - for (auto* ctx : _not_single_slot_filter_conjuncts) { + for (auto& ctx : _not_single_slot_filter_conjuncts) { if (ctx != nullptr) { ctx->close(_runtime_state.get()); } @@ -515,7 +515,7 @@ Status PushBrokerReader::_convert_to_output_block(vectorized::Block* block) { int dest_index = ctx_idx++; vectorized::ColumnPtr column_ptr; - auto* ctx = _dest_vexpr_ctx[dest_index]; + auto& ctx = _dest_expr_ctxs[dest_index]; int result_column_id = -1; // PT1 => dest primitive type RETURN_IF_ERROR(ctx->execute(&_src_block, &result_column_id)); @@ -587,11 +587,10 @@ Status PushBrokerReader::_init_expr_ctxes() { if (!_pre_filter_texprs.empty()) { DCHECK(_pre_filter_texprs.size() == 1); - _vpre_filter_ctx_ptr.reset(new doris::vectorized::VExprContext*); - RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree( - _runtime_state->obj_pool(), _pre_filter_texprs[0], _vpre_filter_ctx_ptr.get())); - RETURN_IF_ERROR((*_vpre_filter_ctx_ptr)->prepare(_runtime_state.get(), *_row_desc)); - RETURN_IF_ERROR((*_vpre_filter_ctx_ptr)->open(_runtime_state.get())); + RETURN_IF_ERROR( + vectorized::VExpr::create_expr_tree(_pre_filter_texprs[0], _pre_filter_ctx_ptr)); + RETURN_IF_ERROR(_pre_filter_ctx_ptr->prepare(_runtime_state.get(), *_row_desc)); + RETURN_IF_ERROR(_pre_filter_ctx_ptr->open(_runtime_state.get())); } _dest_tuple_desc = _runtime_state->desc_tbl().get_tuple_descriptor(_params.dest_tuple_id); @@ -610,12 +609,11 @@ Status PushBrokerReader::_init_expr_ctxes() { slot_desc->col_name()); } - vectorized::VExprContext* ctx = nullptr; - RETURN_IF_ERROR( - vectorized::VExpr::create_expr_tree(_runtime_state->obj_pool(), it->second, &ctx)); + vectorized::VExprContextSPtr ctx; + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(it->second, ctx)); RETURN_IF_ERROR(ctx->prepare(_runtime_state.get(), *_row_desc.get())); RETURN_IF_ERROR(ctx->open(_runtime_state.get())); - _dest_vexpr_ctx.emplace_back(ctx); + _dest_expr_ctxs.emplace_back(ctx); if (has_slot_id_map) { auto it1 = _params.dest_sid_to_src_sid_without_trans.find(slot_desc->id()); if (it1 == std::end(_params.dest_sid_to_src_sid_without_trans)) { @@ -654,7 +652,7 @@ Status PushBrokerReader::_get_next_reader() { RETURN_IF_ERROR(parquet_reader->open()); std::vector place_holder; init_status = parquet_reader->init_reader( - _all_col_names, place_holder, _colname_to_value_range, _push_down_expr, + _all_col_names, place_holder, _colname_to_value_range, _push_down_exprs, _real_tuple_desc, _default_val_row_desc.get(), _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts, false); _cur_reader = std::move(parquet_reader); @@ -664,7 +662,7 @@ Status PushBrokerReader::_get_next_reader() { } std::unordered_map> partition_columns; - std::unordered_map missing_columns; + std::unordered_map missing_columns; _cur_reader->get_columns(&_name_to_col_type, &_missing_cols); _cur_reader->set_fill_columns(partition_columns, missing_columns); break; diff --git a/be/src/olap/push_handler.h b/be/src/olap/push_handler.h index de6dd857e9..204d0e73e2 100644 --- a/be/src/olap/push_handler.h +++ b/be/src/olap/push_handler.h @@ -117,8 +117,8 @@ private: std::unordered_map _name_to_col_type; std::unordered_set _missing_cols; std::unordered_map _src_block_name_to_idx; - std::vector _dest_vexpr_ctx; - std::unique_ptr _vpre_filter_ctx_ptr; + vectorized::VExprContextSPtrs _dest_expr_ctxs; + vectorized::VExprContextSPtr _pre_filter_ctx_ptr; bool _is_dynamic_schema = false; std::vector _src_slot_descs_order_by_dest; std::unordered_map _dest_slot_to_src_slot_index; @@ -142,12 +142,12 @@ private: // col names from _slot_descs std::vector _all_col_names; std::unordered_map* _colname_to_value_range; - vectorized::VExprContext* _push_down_expr = nullptr; + vectorized::VExprContextSPtrs _push_down_exprs; const std::unordered_map* _col_name_to_slot_id; // single slot filter conjuncts - std::unordered_map> _slot_id_to_filter_conjuncts; + std::unordered_map _slot_id_to_filter_conjuncts; // not single(zero or multi) slot filter conjuncts - std::vector _not_single_slot_filter_conjuncts; + vectorized::VExprContextSPtrs _not_single_slot_filter_conjuncts; // File source slot descriptors std::vector _file_slot_descs; // row desc for default exprs diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp index f3e9e07769..a0e9b989c1 100644 --- a/be/src/olap/reader.cpp +++ b/be/src/olap/reader.cpp @@ -222,7 +222,7 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params) { _reader_context.use_topn_opt = read_params.use_topn_opt; _reader_context.read_orderby_key_reverse = read_params.read_orderby_key_reverse; _reader_context.read_orderby_key_limit = read_params.read_orderby_key_limit; - _reader_context.filter_block_vconjunct_ctx_ptr = read_params.filter_block_vconjunct_ctx_ptr; + _reader_context.filter_block_conjuncts = read_params.filter_block_conjuncts; _reader_context.return_columns = &_return_columns; _reader_context.read_orderby_key_columns = _orderby_key_columns.size() > 0 ? &_orderby_key_columns : nullptr; @@ -243,8 +243,8 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params) { _reader_context.enable_unique_key_merge_on_write = tablet()->enable_unique_key_merge_on_write(); _reader_context.record_rowids = read_params.record_rowids; _reader_context.is_key_column_group = read_params.is_key_column_group; - _reader_context.remaining_vconjunct_root = read_params.remaining_vconjunct_root; - _reader_context.common_vexpr_ctxs_pushdown = read_params.common_vexpr_ctxs_pushdown; + _reader_context.remaining_conjunct_roots = read_params.remaining_conjunct_roots; + _reader_context.common_expr_ctxs_push_down = read_params.common_expr_ctxs_push_down; _reader_context.output_columns = &read_params.output_columns; return Status::OK(); diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h index 41fff12be1..95bc9d1c0d 100644 --- a/be/src/olap/reader.h +++ b/be/src/olap/reader.h @@ -138,7 +138,8 @@ public: std::unordered_set* tablet_columns_convert_to_null_set = nullptr; TPushAggOp::type push_down_agg_type_opt = TPushAggOp::NONE; vectorized::VExpr* remaining_vconjunct_root = nullptr; - vectorized::VExprContext* common_vexpr_ctxs_pushdown = nullptr; + std::vector remaining_conjunct_roots; + vectorized::VExprContextSPtrs common_expr_ctxs_push_down; // used for compaction to record row ids bool record_rowids = false; @@ -153,7 +154,7 @@ public: // limit of rows for read_orderby_key size_t read_orderby_key_limit = 0; // filter_block arguments - vectorized::VExprContext** filter_block_vconjunct_ctx_ptr = nullptr; + vectorized::VExprContextSPtrs filter_block_conjuncts; // for vertical compaction bool is_key_column_group = false; diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index 670f48d3d3..82a8b761c6 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -88,8 +88,8 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context _read_options.block_row_max = read_context->batch_size; _read_options.stats = _stats; _read_options.push_down_agg_type_opt = _context->push_down_agg_type_opt; - _read_options.remaining_vconjunct_root = _context->remaining_vconjunct_root; - _read_options.common_vexpr_ctxs_pushdown = _context->common_vexpr_ctxs_pushdown; + _read_options.remaining_conjunct_roots = _context->remaining_conjunct_roots; + _read_options.common_expr_ctxs_push_down = _context->common_expr_ctxs_push_down; _read_options.rowset_id = _rowset->rowset_id(); _read_options.version = _rowset->version(); _read_options.tablet_id = _rowset->rowset_meta()->tablet_id(); diff --git a/be/src/olap/rowset/rowset_reader_context.h b/be/src/olap/rowset/rowset_reader_context.h index 1adc63122b..a1c8650011 100644 --- a/be/src/olap/rowset/rowset_reader_context.h +++ b/be/src/olap/rowset/rowset_reader_context.h @@ -47,7 +47,7 @@ struct RowsetReaderContext { // limit of rows for read_orderby_key size_t read_orderby_key_limit = 0; // filter_block arguments - vectorized::VExprContext** filter_block_vconjunct_ctx_ptr = nullptr; + vectorized::VExprContextSPtrs filter_block_conjuncts; // projection columns: the set of columns rowset reader should return const std::vector* return_columns = nullptr; TPushAggOp::type push_down_agg_type_opt = TPushAggOp::NONE; @@ -64,8 +64,8 @@ struct RowsetReaderContext { const DeleteHandler* delete_handler = nullptr; OlapReaderStatistics* stats = nullptr; RuntimeState* runtime_state = nullptr; - vectorized::VExpr* remaining_vconjunct_root = nullptr; - vectorized::VExprContext* common_vexpr_ctxs_pushdown = nullptr; + std::vector remaining_conjunct_roots; + vectorized::VExprContextSPtrs common_expr_ctxs_push_down; bool use_page_cache = false; int sequence_id_idx = -1; int batch_size = 1024; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 4ddc567d6e..297ef34d87 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -236,11 +236,14 @@ Status SegmentIterator::init(const StorageReadOptions& opts) { _output_columns = *(opts.output_columns); } - _remaining_vconjunct_root = opts.remaining_vconjunct_root; - _common_vexpr_ctxs_pushdown = opts.common_vexpr_ctxs_pushdown; - _enable_common_expr_pushdown = _common_vexpr_ctxs_pushdown ? true : false; + _remaining_conjunct_roots = opts.remaining_conjunct_roots; + _common_expr_ctxs_push_down = opts.common_expr_ctxs_push_down; + _enable_common_expr_pushdown = !_common_expr_ctxs_push_down.empty(); _column_predicate_info.reset(new ColumnPredicateInfo()); - _calculate_pred_in_remaining_vconjunct_root(_remaining_vconjunct_root); + + for (auto& expr : _remaining_conjunct_roots) { + _calculate_pred_in_remaining_conjunct_root(expr); + } _column_predicate_info.reset(new ColumnPredicateInfo()); if (_schema.rowid_col_idx() > 0) { @@ -375,9 +378,12 @@ Status SegmentIterator::_get_row_ranges_by_column_conditions() { if (config::enable_index_apply_preds_except_leafnode_of_andnode) { RETURN_IF_ERROR(_apply_index_except_leafnode_of_andnode()); if (_can_filter_by_preds_except_leafnode_of_andnode()) { - auto res = _execute_predicates_except_leafnode_of_andnode(_remaining_vconjunct_root); - if (res.ok() && _pred_except_leafnode_of_andnode_evaluate_result.size() == 1) { - _row_bitmap &= _pred_except_leafnode_of_andnode_evaluate_result[0]; + for (auto& expr : _remaining_conjunct_roots) { + _pred_except_leafnode_of_andnode_evaluate_result.clear(); + auto res = _execute_predicates_except_leafnode_of_andnode(expr); + if (res.ok() && _pred_except_leafnode_of_andnode_evaluate_result.size() == 1) { + _row_bitmap &= _pred_except_leafnode_of_andnode_evaluate_result[0]; + } } } } @@ -521,15 +527,15 @@ bool SegmentIterator::_is_literal_node(const TExprNodeType::type& node_type) { } } -Status SegmentIterator::_extract_common_expr_columns(vectorized::VExpr* expr) { - auto children = expr->children(); +Status SegmentIterator::_extract_common_expr_columns(const vectorized::VExprSPtr& expr) { + auto& children = expr->children(); for (int i = 0; i < children.size(); ++i) { RETURN_IF_ERROR(_extract_common_expr_columns(children[i])); } auto node_type = expr->node_type(); if (node_type == TExprNodeType::SLOT_REF) { - auto slot_expr = dynamic_cast(expr); + auto slot_expr = std::dynamic_pointer_cast(expr); _is_common_expr_column[_schema.column_id(slot_expr->column_id())] = true; _common_expr_columns.insert(_schema.column_id(slot_expr->column_id())); } @@ -537,12 +543,13 @@ Status SegmentIterator::_extract_common_expr_columns(vectorized::VExpr* expr) { return Status::OK(); } -Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode(vectorized::VExpr* expr) { +Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode( + const vectorized::VExprSPtr& expr) { if (expr == nullptr) { return Status::OK(); } - auto children = expr->children(); + auto& children = expr->children(); for (int i = 0; i < children.size(); ++i) { RETURN_IF_ERROR(_execute_predicates_except_leafnode_of_andnode(children[i])); } @@ -551,7 +558,7 @@ Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode(vectorize if (node_type == TExprNodeType::SLOT_REF) { _column_predicate_info->column_name = expr->expr_name(); } else if (_is_literal_node(node_type)) { - auto v_literal_expr = dynamic_cast(expr); + auto v_literal_expr = std::dynamic_pointer_cast(expr); _column_predicate_info->query_value = v_literal_expr->value(); } else if (node_type == TExprNodeType::BINARY_PRED || node_type == TExprNodeType::MATCH_PRED) { if (node_type == TExprNodeType::MATCH_PRED) { @@ -720,7 +727,7 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() { for (auto pred : _col_preds_except_leafnode_of_andnode) { auto column_name = _schema.column(pred->column_id())->name(); - if (_remaining_vconjunct_root != nullptr && + if (!_remaining_conjunct_roots.empty() && _check_column_pred_all_push_down(column_name, true) && !pred->predicate_params()->marked_by_runtime_filter) { int32_t unique_id = _schema.unique_id(pred->column_id()); @@ -1272,8 +1279,10 @@ Status SegmentIterator::_vec_init_lazy_materialization() { // Step2: extract columns that can execute expr context _is_common_expr_column.resize(_schema.columns().size(), false); - if (_enable_common_expr_pushdown && _remaining_vconjunct_root != nullptr) { - RETURN_IF_ERROR(_extract_common_expr_columns(_remaining_vconjunct_root)); + if (_enable_common_expr_pushdown && !_remaining_conjunct_roots.empty()) { + for (auto expr : _remaining_conjunct_roots) { + RETURN_IF_ERROR(_extract_common_expr_columns(expr)); + } if (!_common_expr_columns.empty()) { _is_need_expr_eval = true; for (auto cid : _schema.column_ids()) { @@ -1888,60 +1897,15 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size, vectorized::Block* block) { SCOPED_RAW_TIMER(&_opts.stats->expr_filter_ns); - DCHECK(_remaining_vconjunct_root != nullptr); + DCHECK(!_remaining_conjunct_roots.empty()); DCHECK(block->rows() != 0); size_t prev_columns = block->columns(); - Defer defer {[&]() { vectorized::Block::erase_useless_column(block, prev_columns); }}; - int result_column_id = -1; - RETURN_IF_ERROR(_common_vexpr_ctxs_pushdown->execute(block, &result_column_id)); - vectorized::ColumnPtr filter_column = block->get_by_position(result_column_id).column; - if (auto* nullable_column = - vectorized::check_and_get_column(*filter_column)) { - vectorized::ColumnPtr nested_column = nullable_column->get_nested_column_ptr(); + vectorized::IColumn::Filter filter; + RETURN_IF_ERROR(vectorized::VExprContext::execute_conjuncts_and_filter_block( + _common_expr_ctxs_push_down, block, _columns_to_filter, prev_columns, filter)); - vectorized::MutableColumnPtr mutable_holder = - nested_column->use_count() == 1 - ? nested_column->assume_mutable() - : nested_column->clone_resized(nested_column->size()); - - vectorized::ColumnUInt8* concrete_column = - typeid_cast(mutable_holder.get()); - if (!concrete_column) { - return Status::InvalidArgument( - "Illegal type {} of column for filter. Must be UInt8 or Nullable(UInt8).", - filter_column->get_name()); - } - auto* __restrict null_map = nullable_column->get_null_map_data().data(); - vectorized::IColumn::Filter& filter = concrete_column->get_data(); - auto* __restrict filter_data = filter.data(); - - const size_t size = filter.size(); - for (size_t i = 0; i < size; ++i) { - filter_data[i] &= !null_map[i]; - } - - selected_size = _evaluate_common_expr_filter(sel_rowid_idx, selected_size, filter); - RETURN_IF_CATCH_EXCEPTION( - vectorized::Block::filter_block_internal(block, _columns_to_filter, filter)); - } else if (auto* const_column = - vectorized::check_and_get_column(*filter_column)) { - bool ret = const_column->get_bool(0); - if (!ret) { - for (auto& col : _columns_to_filter) { - std::move(*block->get_by_position(col).column).assume_mutable()->clear(); - } - selected_size = 0; - } - } else { - const vectorized::IColumn::Filter& filter = - assert_cast&>( - *filter_column) - .get_data(); - selected_size = _evaluate_common_expr_filter(sel_rowid_idx, selected_size, filter); - RETURN_IF_CATCH_EXCEPTION( - vectorized::Block::filter_block_internal(block, _columns_to_filter, filter)); - } + selected_size = _evaluate_common_expr_filter(sel_rowid_idx, selected_size, filter); return Status::OK(); } @@ -2119,7 +2083,7 @@ Status SegmentIterator::current_block_row_locations(std::vector* bl */ bool SegmentIterator::_check_column_pred_all_push_down(const std::string& column_name, bool in_compound) { - if (_remaining_vconjunct_root == nullptr) { + if (_remaining_conjunct_roots.empty()) { return true; } @@ -2139,21 +2103,22 @@ bool SegmentIterator::_check_column_pred_all_push_down(const std::string& column return true; } -void SegmentIterator::_calculate_pred_in_remaining_vconjunct_root(const vectorized::VExpr* expr) { +void SegmentIterator::_calculate_pred_in_remaining_conjunct_root( + const vectorized::VExprSPtr& expr) { if (expr == nullptr) { return; } - auto children = expr->children(); + auto& children = expr->children(); for (int i = 0; i < children.size(); ++i) { - _calculate_pred_in_remaining_vconjunct_root(children[i]); + _calculate_pred_in_remaining_conjunct_root(children[i]); } auto node_type = expr->node_type(); if (node_type == TExprNodeType::SLOT_REF) { _column_predicate_info->column_name = expr->expr_name(); } else if (_is_literal_node(node_type)) { - auto v_literal_expr = static_cast(expr); + auto v_literal_expr = static_cast(expr.get()); _column_predicate_info->query_value = v_literal_expr->value(); } else { if (node_type == TExprNodeType::MATCH_PRED) { diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index a339929100..6280dad7ef 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -187,7 +187,8 @@ private: bool _column_has_fulltext_index(int32_t unique_id); inline bool _inverted_index_not_support_pred_type(const PredicateType& type); bool _can_filter_by_preds_except_leafnode_of_andnode(); - [[nodiscard]] Status _execute_predicates_except_leafnode_of_andnode(vectorized::VExpr* expr); + [[nodiscard]] Status _execute_predicates_except_leafnode_of_andnode( + const vectorized::VExprSPtr& expr); [[nodiscard]] Status _execute_compound_fn(const std::string& function_name); bool _is_literal_node(const TExprNodeType::type& node_type); @@ -234,7 +235,7 @@ private: bool _can_evaluated_by_vectorized(ColumnPredicate* predicate); - [[nodiscard]] Status _extract_common_expr_columns(vectorized::VExpr* expr); + [[nodiscard]] Status _extract_common_expr_columns(const vectorized::VExprSPtr& expr); [[nodiscard]] Status _execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size, vectorized::Block* block); uint16_t _evaluate_common_expr_filter(uint16_t* sel_rowid_idx, uint16_t selected_size, @@ -265,7 +266,7 @@ private: // return true means one column's predicates all pushed down bool _check_column_pred_all_push_down(const std::string& column_name, bool in_compound = false); - void _calculate_pred_in_remaining_vconjunct_root(const vectorized::VExpr* expr); + void _calculate_pred_in_remaining_conjunct_root(const vectorized::VExprSPtr& expr); // todo(wb) remove this method after RowCursor is removed void _convert_rowcursor_to_short_key(const RowCursor& key, size_t num_keys) { @@ -386,9 +387,9 @@ private: // make a copy of `_opts.column_predicates` in order to make local changes std::vector _col_predicates; std::vector _col_preds_except_leafnode_of_andnode; - doris::vectorized::VExprContext* _common_vexpr_ctxs_pushdown; + vectorized::VExprContextSPtrs _common_expr_ctxs_push_down; bool _enable_common_expr_pushdown = false; - doris::vectorized::VExpr* _remaining_vconjunct_root; + std::vector _remaining_conjunct_roots; std::vector _pred_except_leafnode_of_andnode_evaluate_result; std::unique_ptr _column_predicate_info; std::unordered_map> diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 5baedefa42..48fbe5bca1 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -270,14 +270,14 @@ Status BlockChanger::change_block(vectorized::Block* ref_block, RowDescriptor(_desc_tbl.get_tuple_descriptor(_desc_tbl.get_row_tuples()[0]), false); if (_where_expr != nullptr) { - vectorized::VExprContext* ctx = nullptr; - RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(&pool, *_where_expr, &ctx)); + vectorized::VExprContextSPtr ctx = nullptr; + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(*_where_expr, ctx)); Defer defer {[&]() { ctx->close(state); }}; RETURN_IF_ERROR(ctx->prepare(state, row_desc)); RETURN_IF_ERROR(ctx->open(state)); RETURN_IF_ERROR( - vectorized::VExprContext::filter_block(ctx, ref_block, ref_block->columns())); + vectorized::VExprContext::filter_block(ctx.get(), ref_block, ref_block->columns())); } const int row_size = ref_block->rows(); @@ -302,9 +302,8 @@ Status BlockChanger::change_block(vectorized::Block* ref_block, value->ptr(), column, row_size); } } else if (_schema_mapping[idx].expr != nullptr) { - vectorized::VExprContext* ctx = nullptr; - RETURN_IF_ERROR( - vectorized::VExpr::create_expr_tree(&pool, *_schema_mapping[idx].expr, &ctx)); + vectorized::VExprContextSPtr ctx; + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(*_schema_mapping[idx].expr, ctx)); Defer defer {[&]() { ctx->close(state); }}; RETURN_IF_ERROR(ctx->prepare(state, row_desc)); RETURN_IF_ERROR(ctx->open(state)); diff --git a/be/src/runtime/fold_constant_executor.cpp b/be/src/runtime/fold_constant_executor.cpp index 36d63a12a1..5e8932ff01 100644 --- a/be/src/runtime/fold_constant_executor.cpp +++ b/be/src/runtime/fold_constant_executor.cpp @@ -79,15 +79,15 @@ Status FoldConstantExecutor::fold_constant_vexpr(const TFoldConstantParams& para for (const auto& m : expr_map) { PExprResultMap pexpr_result_map; for (const auto& n : m.second) { - vectorized::VExprContext* ctx = nullptr; + vectorized::VExprContextSPtr ctx; const TExpr& texpr = n.second; // create expr tree from TExpr - RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(&_pool, texpr, &ctx)); + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(texpr, ctx)); // close context expr Defer defer {[&]() { ctx->close(_runtime_state.get()); }}; // prepare and open context - RETURN_IF_ERROR(_prepare_and_open(ctx)); + RETURN_IF_ERROR(_prepare_and_open(ctx.get())); vectorized::Block tmp_block; tmp_block.insert({vectorized::ColumnUInt8::create(1), diff --git a/be/src/service/point_query_executor.cpp b/be/src/service/point_query_executor.cpp index 45e155b622..4e31442896 100644 --- a/be/src/service/point_query_executor.cpp +++ b/be/src/service/point_query_executor.cpp @@ -47,7 +47,7 @@ namespace doris { Reusable::~Reusable() { - for (vectorized::VExprContext* ctx : _output_exprs_ctxs) { + for (auto& ctx : _output_exprs_ctxs) { ctx->close(_runtime_state.get()); } } @@ -62,8 +62,7 @@ Status Reusable::init(const TDescriptorTable& t_desc_tbl, const std::vectorslots(), 10); } - RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(_runtime_state->obj_pool(), output_exprs, - &_output_exprs_ctxs)); + RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(output_exprs, _output_exprs_ctxs)); RowDescriptor row_desc(tuple_desc(), false); // Prepare the exprs to run. RETURN_IF_ERROR(vectorized::VExpr::prepare(_output_exprs_ctxs, _runtime_state.get(), row_desc)); diff --git a/be/src/service/point_query_executor.h b/be/src/service/point_query_executor.h index 3b1214d3e6..1f24177dcc 100644 --- a/be/src/service/point_query_executor.h +++ b/be/src/service/point_query_executor.h @@ -51,6 +51,7 @@ #include "util/slice.h" #include "vec/core/block.h" #include "vec/data_types/serde/data_type_serde.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { @@ -60,10 +61,6 @@ class RuntimeState; class TDescriptorTable; class TExpr; -namespace vectorized { -class VExprContext; -} // namespace vectorized - // For caching point lookup pre allocted blocks and exprs class Reusable { public: @@ -89,7 +86,7 @@ public: TupleDescriptor* tuple_desc() { return _desc_tbl->get_tuple_descriptor(0); } - const std::vector& output_exprs() { return _output_exprs_ctxs; } + const vectorized::VExprContextSPtrs& output_exprs() { return _output_exprs_ctxs; } private: // caching TupleDescriptor, output_expr, etc... @@ -98,7 +95,7 @@ private: std::mutex _block_mutex; // prevent from allocte too many tmp blocks std::vector> _block_pool; - std::vector _output_exprs_ctxs; + vectorized::VExprContextSPtrs _output_exprs_ctxs; int64_t _create_timestamp = 0; vectorized::DataTypeSerDeSPtrs _data_type_serdes; std::unordered_map _col_uid_to_idx; diff --git a/be/src/vec/common/sort/vsort_exec_exprs.cpp b/be/src/vec/common/sort/vsort_exec_exprs.cpp index 536914770d..a8dd70ae1c 100644 --- a/be/src/vec/common/sort/vsort_exec_exprs.cpp +++ b/be/src/vec/common/sort/vsort_exec_exprs.cpp @@ -27,9 +27,6 @@ namespace doris { class ObjectPool; class RowDescriptor; class RuntimeState; -namespace vectorized { -class VExprContext; -} // namespace vectorized } // namespace doris namespace doris::vectorized { @@ -50,19 +47,19 @@ Status VSortExecExprs::init(const TSortInfo& sort_info, ObjectPool* pool) { Status VSortExecExprs::init(const std::vector& ordering_exprs, const std::vector* sort_tuple_slot_exprs, ObjectPool* pool) { - RETURN_IF_ERROR(VExpr::create_expr_trees(pool, ordering_exprs, &_lhs_ordering_expr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(ordering_exprs, _lhs_ordering_expr_ctxs)); if (sort_tuple_slot_exprs != NULL) { _materialize_tuple = true; - RETURN_IF_ERROR(VExpr::create_expr_trees(pool, *sort_tuple_slot_exprs, - &_sort_tuple_slot_expr_ctxs)); + RETURN_IF_ERROR( + VExpr::create_expr_trees(*sort_tuple_slot_exprs, _sort_tuple_slot_expr_ctxs)); } else { _materialize_tuple = false; } return Status::OK(); } -Status VSortExecExprs::init(const std::vector& lhs_ordering_expr_ctxs, - const std::vector& rhs_ordering_expr_ctxs) { +Status VSortExecExprs::init(const VExprContextSPtrs& lhs_ordering_expr_ctxs, + const VExprContextSPtrs& rhs_ordering_expr_ctxs) { _lhs_ordering_expr_ctxs = lhs_ordering_expr_ctxs; _rhs_ordering_expr_ctxs = rhs_ordering_expr_ctxs; return Status::OK(); @@ -83,7 +80,7 @@ Status VSortExecExprs::open(RuntimeState* state) { } RETURN_IF_ERROR(VExpr::open(_lhs_ordering_expr_ctxs, state)); RETURN_IF_ERROR( - VExpr::clone_if_not_exists(_lhs_ordering_expr_ctxs, state, &_rhs_ordering_expr_ctxs)); + VExpr::clone_if_not_exists(_lhs_ordering_expr_ctxs, state, _rhs_ordering_expr_ctxs)); return Status::OK(); } diff --git a/be/src/vec/common/sort/vsort_exec_exprs.h b/be/src/vec/common/sort/vsort_exec_exprs.h index 14ce9ffbcb..8179c93488 100644 --- a/be/src/vec/common/sort/vsort_exec_exprs.h +++ b/be/src/vec/common/sort/vsort_exec_exprs.h @@ -20,6 +20,7 @@ #include #include "common/status.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { @@ -36,7 +37,6 @@ class TSortInfo; // If _materialize_tuple is true, SortExecExprs also stores the slot expressions used to // materialize the sort tuples. namespace vectorized { -class VExprContext; class VSortExecExprs { public: @@ -53,19 +53,15 @@ public: // close all expressions used for sorting and tuple materialization. void close(RuntimeState* state); - const std::vector& sort_tuple_slot_expr_ctxs() const { + const VExprContextSPtrs& sort_tuple_slot_expr_ctxs() const { return _sort_tuple_slot_expr_ctxs; } // Can only be used after calling prepare() - const std::vector& lhs_ordering_expr_ctxs() const { - return _lhs_ordering_expr_ctxs; - } + const VExprContextSPtrs& lhs_ordering_expr_ctxs() const { return _lhs_ordering_expr_ctxs; } // Can only be used after calling open() - const std::vector& rhs_ordering_expr_ctxs() const { - return _rhs_ordering_expr_ctxs; - } + const VExprContextSPtrs& rhs_ordering_expr_ctxs() const { return _rhs_ordering_expr_ctxs; } bool need_materialize_tuple() const { return _materialize_tuple; } @@ -75,8 +71,8 @@ public: private: // Create two VExprContexts for evaluating over the TupleRows. - std::vector _lhs_ordering_expr_ctxs; - std::vector _rhs_ordering_expr_ctxs; + VExprContextSPtrs _lhs_ordering_expr_ctxs; + VExprContextSPtrs _rhs_ordering_expr_ctxs; // If true, the tuples to be sorted are materialized by // _sort_tuple_slot_exprs before the actual sort is performed. @@ -85,7 +81,7 @@ private: // Expressions used to materialize slots in the tuples to be sorted. // One expr per slot in the materialized tuple. Valid only if // _materialize_tuple is true. - std::vector _sort_tuple_slot_expr_ctxs; + VExprContextSPtrs _sort_tuple_slot_expr_ctxs; // for some reason, _sort_tuple_slot_expr_ctxs is not-null but _lhs_ordering_expr_ctxs is nullable // this flag list would be used to convert column to nullable. @@ -94,8 +90,8 @@ private: // Initialize directly from already-created VExprContexts. Callers should manually call // Prepare(), Open(), and Close() on input VExprContexts (instead of calling the // analogous functions in this class). Used for testing. - Status init(const std::vector& lhs_ordering_expr_ctxs, - const std::vector& rhs_ordering_expr_ctxs); + Status init(const VExprContextSPtrs& lhs_ordering_expr_ctxs, + const VExprContextSPtrs& rhs_ordering_expr_ctxs); // Initialize the ordering and (optionally) materialization expressions from the thrift // TExprs into the specified pool. sort_tuple_slot_exprs is NULL if the tuple is not diff --git a/be/src/vec/core/sort_cursor.h b/be/src/vec/core/sort_cursor.h index 8c6accaba8..e1efe7cd01 100644 --- a/be/src/vec/core/sort_cursor.h +++ b/be/src/vec/core/sort_cursor.h @@ -208,7 +208,7 @@ using BlockSupplier = std::function; struct BlockSupplierSortCursorImpl : public MergeSortCursorImpl { BlockSupplierSortCursorImpl(const BlockSupplier& block_supplier, - const std::vector& ordering_expr, + const VExprContextSPtrs& ordering_expr, const std::vector& is_asc_order, const std::vector& nulls_first) : _ordering_expr(ordering_expr), _block_supplier(block_supplier) { @@ -266,7 +266,7 @@ struct BlockSupplierSortCursorImpl : public MergeSortCursorImpl { return _block.clone_with_columns(std::move(columns)); } - std::vector _ordering_expr; + VExprContextSPtrs _ordering_expr; Block _block; BlockSupplier _block_supplier {}; bool _is_eof = false; diff --git a/be/src/vec/exec/format/generic_reader.h b/be/src/vec/exec/format/generic_reader.h index fc09a23710..d83cc1d2ce 100644 --- a/be/src/vec/exec/format/generic_reader.h +++ b/be/src/vec/exec/format/generic_reader.h @@ -56,7 +56,7 @@ public: virtual Status set_fill_columns( const std::unordered_map>& partition_columns, - const std::unordered_map& missing_columns) { + const std::unordered_map& missing_columns) { return Status::OK(); } diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index f6f9d136e2..b414247c7c 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -241,10 +241,10 @@ Status OrcReader::_create_file_reader() { Status OrcReader::init_reader( std::unordered_map* colname_to_value_range, - VExprContext* vconjunct_ctx) { + VExprContextSPtrs& conjuncts) { _colname_to_value_range = colname_to_value_range; - _lazy_read_ctx.vconjunct_ctx = vconjunct_ctx; _text_converter.reset(new TextConverter('\\')); + _lazy_read_ctx.conjuncts = conjuncts; SCOPED_RAW_TIMER(&_statistics.parse_meta_time); RETURN_IF_ERROR(_create_file_reader()); RETURN_IF_ERROR(_init_read_columns()); @@ -622,7 +622,7 @@ bool OrcReader::_init_search_argument( Status OrcReader::set_fill_columns( const std::unordered_map>& partition_columns, - const std::unordered_map& missing_columns) { + const std::unordered_map& missing_columns) { SCOPED_RAW_TIMER(&_statistics.parse_meta_time); // std::unordered_map> @@ -642,28 +642,29 @@ Status OrcReader::set_fill_columns( return; } else if (VRuntimeFilterWrapper* runtime_filter = typeid_cast(expr)) { - VExpr* filter_impl = const_cast(runtime_filter->get_impl()); + auto filter_impl = const_cast(runtime_filter->get_impl().get()); if (VBloomPredicate* bloom_predicate = typeid_cast(filter_impl)) { - for (VExpr* child : bloom_predicate->children()) { - visit_slot(child); + for (auto& child : bloom_predicate->children()) { + visit_slot(child.get()); } } else if (VInPredicate* in_predicate = typeid_cast(filter_impl)) { if (in_predicate->children().size() > 0) { - visit_slot(in_predicate->children()[0]); + visit_slot(in_predicate->children()[0].get()); } } else { - for (VExpr* child : filter_impl->children()) { - visit_slot(child); + for (auto& child : filter_impl->children()) { + visit_slot(child.get()); } } } else { - for (VExpr* child : expr->children()) { - visit_slot(child); + for (auto& child : expr->children()) { + visit_slot(child.get()); } } }; - if (_lazy_read_ctx.vconjunct_ctx != nullptr) { - visit_slot(_lazy_read_ctx.vconjunct_ctx->root()); + + for (auto& conjunct : _lazy_read_ctx.conjuncts) { + visit_slot(conjunct->root().get()); } for (auto& read_col : _read_cols_lower_case) { @@ -778,7 +779,7 @@ Status OrcReader::_fill_partition_columns( Status OrcReader::_fill_missing_columns( Block* block, size_t rows, - const std::unordered_map& missing_columns) { + const std::unordered_map& missing_columns) { for (auto& kv : missing_columns) { if (kv.second == nullptr) { // no default column, fill with null @@ -787,7 +788,7 @@ Status OrcReader::_fill_missing_columns( nullable_column->insert_many_defaults(rows); } else { // fill with default value - auto* ctx = kv.second; + auto& ctx = kv.second; auto origin_column_num = block->columns(); int result_column_id = -1; // PT1 => dest primitive type @@ -1237,18 +1238,14 @@ Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { _fill_partition_columns(block, *read_rows, _lazy_read_ctx.partition_columns)); RETURN_IF_ERROR(_fill_missing_columns(block, *read_rows, _lazy_read_ctx.missing_columns)); - if (_lazy_read_ctx.vconjunct_ctx != nullptr) { - std::vector columns_to_filter; + if (!_lazy_read_ctx.conjuncts.empty()) { int column_to_keep = block->columns(); - columns_to_filter.resize(column_to_keep); - for (uint32_t i = 0; i < column_to_keep; ++i) { - columns_to_filter[i] = i; + VExprContextSPtrs filter_conjuncts; + for (auto& conjunct : _lazy_read_ctx.conjuncts) { + filter_conjuncts.push_back(conjunct); } - std::vector filter_conjuncts; - filter_conjuncts.push_back(_lazy_read_ctx.vconjunct_ctx); - RETURN_IF_CATCH_EXCEPTION( - RETURN_IF_ERROR(VExprContext::execute_conjuncts_and_filter_block( - filter_conjuncts, nullptr, block, columns_to_filter, column_to_keep))); + RETURN_IF_ERROR( + VExprContext::filter_block(_lazy_read_ctx.conjuncts, block, column_to_keep)); } } return Status::OK(); @@ -1291,8 +1288,10 @@ Status OrcReader::filter(orc::ColumnVectorBatch& data, uint16_t* sel, uint16_t s _filter.reset(new IColumn::Filter(size, 1)); auto* __restrict result_filter_data = _filter->data(); bool can_filter_all = false; - std::vector filter_conjuncts; - filter_conjuncts.push_back(_lazy_read_ctx.vconjunct_ctx); + VExprContextSPtrs filter_conjuncts; + for (auto& conjunct : _lazy_read_ctx.conjuncts) { + filter_conjuncts.push_back(conjunct); + } RETURN_IF_CATCH_EXCEPTION(RETURN_IF_ERROR(VExprContext::execute_conjuncts( filter_conjuncts, nullptr, block, _filter.get(), &can_filter_all))); diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 4af091981e..52a230147a 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -90,7 +90,7 @@ struct OrcPredicate { }; struct LazyReadContext { - VExprContext* vconjunct_ctx = nullptr; + VExprContextSPtrs conjuncts; bool can_lazy_read = false; // block->rows() returns the number of rows of the first column, // so we should check and resize the first column @@ -109,9 +109,9 @@ struct LazyReadContext { // lazy read partition columns or all partition columns std::unordered_map> partition_columns; - std::unordered_map predicate_missing_columns; + std::unordered_map predicate_missing_columns; // lazy read missing columns or all missing columns - std::unordered_map missing_columns; + std::unordered_map missing_columns; }; class OrcReader : public GenericReader { @@ -142,12 +142,12 @@ public: Status init_reader( std::unordered_map* colname_to_value_range, - VExprContext* vconjunct_ctx); + VExprContextSPtrs& conjuncts); Status set_fill_columns( const std::unordered_map>& partition_columns, - const std::unordered_map& missing_columns) override; + const std::unordered_map& missing_columns) override; Status _init_select_types(const orc::Type& type, int idx); @@ -157,7 +157,7 @@ public: partition_columns); Status _fill_missing_columns( Block* block, size_t rows, - const std::unordered_map& missing_columns); + const std::unordered_map& missing_columns); Status get_next_block(Block* block, size_t* read_rows, bool* eof) override; diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index abda93afde..f04f46c9a2 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -97,7 +97,7 @@ RowGroupReader::RowGroupReader(io::FileReaderSPtr file_reader, RowGroupReader::~RowGroupReader() { _column_readers.clear(); - for (auto* ctx : _dict_filter_conjuncts) { + for (auto& ctx : _dict_filter_conjuncts) { if (ctx) { ctx->close(_state); } @@ -110,8 +110,8 @@ Status RowGroupReader::init( std::unordered_map& col_offsets, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, - const std::vector* not_single_slot_filter_conjuncts, - const std::unordered_map>* slot_id_to_filter_conjuncts) { + const VExprContextSPtrs* not_single_slot_filter_conjuncts, + const std::unordered_map* slot_id_to_filter_conjuncts) { _tuple_descriptor = tuple_descriptor; _row_descriptor = row_descriptor; _col_name_to_slot_id = colname_to_slot_id; @@ -162,7 +162,7 @@ Status RowGroupReader::init( } else { if (_slot_id_to_filter_conjuncts->find(slot_id) != _slot_id_to_filter_conjuncts->end()) { - for (VExprContext* ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { + for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { _filter_conjuncts.push_back(ctx); } } @@ -174,7 +174,7 @@ Status RowGroupReader::init( auto& [value, slot_desc] = kv.second; auto iter = _slot_id_to_filter_conjuncts->find(slot_desc->id()); if (iter != _slot_id_to_filter_conjuncts->end()) { - for (VExprContext* ctx : iter->second) { + for (auto& ctx : iter->second) { _filter_conjuncts.push_back(ctx); } } @@ -206,8 +206,8 @@ bool RowGroupReader::_can_filter_by_dict(int slot_id, } // TODO:check expr like 'a > 10 is null', 'a > 10' should can be filter by dict. - for (VExprContext* ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { - const VExpr* root_expr = ctx->root(); + for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { + const auto& root_expr = ctx->root(); if (root_expr->node_type() == TExprNodeType::FUNCTION_CALL) { std::string is_null_str; std::string function_name = root_expr->fn().name.function_name; @@ -296,8 +296,7 @@ Status RowGroupReader::next_batch(Block* block, size_t batch_size, size_t* read_ _fill_partition_columns(block, *read_rows, _lazy_read_ctx.partition_columns)); RETURN_IF_ERROR(_fill_missing_columns(block, *read_rows, _lazy_read_ctx.missing_columns)); - Status st = - VExprContext::filter_block(_lazy_read_ctx.vconjunct_ctx, block, block->columns()); + Status st = VExprContext::filter_block(_lazy_read_ctx.conjuncts, block, block->columns()); *read_rows = block->rows(); return st; } @@ -326,11 +325,12 @@ Status RowGroupReader::next_batch(Block* block, size_t batch_size, size_t* read_ for (uint32_t i = 0; i < column_to_keep; ++i) { columns_to_filter[i] = i; } - if (_lazy_read_ctx.vconjunct_ctx != nullptr) { + if (!_lazy_read_ctx.conjuncts.empty()) { std::vector filters; if (_position_delete_ctx.has_filter) { filters.push_back(_pos_delete_filter_ptr.get()); } + RETURN_IF_CATCH_EXCEPTION( RETURN_IF_ERROR(VExprContext::execute_conjuncts_and_filter_block( _filter_conjuncts, &filters, block, columns_to_filter, @@ -441,7 +441,12 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re if (_position_delete_ctx.has_filter) { filters.push_back(_pos_delete_filter_ptr.get()); } - RETURN_IF_ERROR(VExprContext::execute_conjuncts(_filter_conjuncts, &filters, block, + + VExprContextSPtrs filter_contexts; + for (auto& conjunct : _filter_conjuncts) { + filter_contexts.emplace_back(conjunct.get()); + } + RETURN_IF_ERROR(VExprContext::execute_conjuncts(filter_contexts, &filters, block, &result_filter, &can_filter_all)); if (_lazy_read_ctx.resize_first_column) { @@ -589,7 +594,7 @@ Status RowGroupReader::_fill_partition_columns( Status RowGroupReader::_fill_missing_columns( Block* block, size_t rows, - const std::unordered_map& missing_columns) { + const std::unordered_map& missing_columns) { for (auto& kv : missing_columns) { if (kv.second == nullptr) { // no default column, fill with null @@ -598,7 +603,7 @@ Status RowGroupReader::_fill_missing_columns( nullable_column->insert_many_defaults(rows); } else { // fill with default value - auto* ctx = kv.second; + auto& ctx = kv.second; auto origin_column_num = block->columns(); int result_column_id = -1; // PT1 => dest primitive type @@ -761,10 +766,13 @@ Status RowGroupReader::_rewrite_dict_predicates() { } // 2.2 Execute conjuncts and filter block. - const std::vector* ctxs = nullptr; + VExprContextSPtrs ctxs; auto iter = _slot_id_to_filter_conjuncts->find(slot_id); if (iter != _slot_id_to_filter_conjuncts->end()) { - ctxs = &(iter->second); + for (auto& ctx : iter->second) { + ctxs.emplace_back(ctx.get()); + _filter_conjuncts.push_back(ctx); + } } else { std::stringstream msg; msg << "_slot_id_to_filter_conjuncts: slot_id [" << slot_id << "] not found"; @@ -779,7 +787,7 @@ Status RowGroupReader::_rewrite_dict_predicates() { temp_block.get_by_position(0).column->assume_mutable()->resize(dict_value_column_size); } RETURN_IF_CATCH_EXCEPTION(RETURN_IF_ERROR(VExprContext::execute_conjuncts_and_filter_block( - *ctxs, nullptr, &temp_block, columns_to_filter, column_to_keep))); + ctxs, nullptr, &temp_block, columns_to_filter, column_to_keep))); if (dict_pos != 0) { // We have to clean the first column to insert right data. temp_block.get_by_position(0).column->assume_mutable()->clear(); @@ -795,9 +803,6 @@ Status RowGroupReader::_rewrite_dict_predicates() { // About Performance: if dict_column size is too large, it will generate a large IN filter. if (dict_column->size() > MAX_DICT_CODE_PREDICATE_TO_REWRITE) { - for (auto& ctx : (*ctxs)) { - _filter_conjuncts.push_back(ctx); - } it = _dict_filter_cols.erase(it); continue; } @@ -825,7 +830,7 @@ Status RowGroupReader::_rewrite_dict_predicates() { Status RowGroupReader::_rewrite_dict_conjuncts(std::vector& dict_codes, int slot_id, bool is_nullable) { - VExpr* root; + VExprSPtr root; if (dict_codes.size() == 1) { { TFunction fn; @@ -850,7 +855,7 @@ Status RowGroupReader::_rewrite_dict_conjuncts(std::vector& dict_codes, texpr_node.__set_child_type(TPrimitiveType::INT); texpr_node.__set_num_children(2); texpr_node.__set_is_nullable(is_nullable); - root = _obj_pool->add(VectorizedFnCall::create_unique(texpr_node).release()); + root = VectorizedFnCall::create_shared(texpr_node); } { SlotDescriptor* slot = nullptr; @@ -861,8 +866,7 @@ Status RowGroupReader::_rewrite_dict_conjuncts(std::vector& dict_codes, break; } } - VExpr* slot_ref_expr = _obj_pool->add(VSlotRef::create_unique(slot).release()); - root->add_child(slot_ref_expr); + root->add_child(VSlotRef::create_shared(slot)); } { TExprNode texpr_node; @@ -872,8 +876,7 @@ Status RowGroupReader::_rewrite_dict_conjuncts(std::vector& dict_codes, int_literal.__set_value(dict_codes[0]); texpr_node.__set_int_literal(int_literal); texpr_node.__set_is_nullable(is_nullable); - VExpr* literal_expr = _obj_pool->add(VLiteral::create_unique(texpr_node).release()); - root->add_child(literal_expr); + root->add_child(VLiteral::create_shared(texpr_node)); } } else { { @@ -888,13 +891,13 @@ Status RowGroupReader::_rewrite_dict_conjuncts(std::vector& dict_codes, // VdirectInPredicate assume is_nullable = false. node.__set_is_nullable(false); - root = _obj_pool->add(vectorized::VDirectInPredicate::create_unique(node).release()); + root = vectorized::VDirectInPredicate::create_shared(node); std::shared_ptr hybrid_set( create_set(PrimitiveType::TYPE_INT, dict_codes.size())); for (int j = 0; j < dict_codes.size(); ++j) { hybrid_set->insert(&dict_codes[j]); } - static_cast(root)->set_filter(hybrid_set); + static_cast(root.get())->set_filter(hybrid_set); } { SlotDescriptor* slot = nullptr; @@ -905,12 +908,10 @@ Status RowGroupReader::_rewrite_dict_conjuncts(std::vector& dict_codes, break; } } - VExpr* slot_ref_expr = _obj_pool->add(VSlotRef::create_unique(slot).release()); - root->add_child(slot_ref_expr); + root->add_child(VSlotRef::create_shared(slot)); } } - VExprContext* rewritten_conjunct_ctx = - _obj_pool->add(VExprContext::create_unique(root).release()); + VExprContextSPtr rewritten_conjunct_ctx = VExprContext::create_shared(root); RETURN_IF_ERROR(rewritten_conjunct_ctx->prepare(_state, *_row_descriptor)); RETURN_IF_ERROR(rewritten_conjunct_ctx->open(_state)); _dict_filter_conjuncts.push_back(rewritten_conjunct_ctx); diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.h b/be/src/vec/exec/format/parquet/vparquet_group_reader.h index a6af66d7a3..393f738857 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.h @@ -32,6 +32,7 @@ #include "vec/columns/column.h" #include "vec/common/allocator.h" #include "vec/exec/format/parquet/parquet_common.h" +#include "vec/exprs/vexpr_fwd.h" #include "vparquet_column_reader.h" namespace cctz { @@ -50,7 +51,6 @@ class IOContext; namespace vectorized { class Block; class FieldDescriptor; -class VExprContext; } // namespace vectorized } // namespace doris namespace tparquet { @@ -77,7 +77,7 @@ public: }; struct LazyReadContext { - VExprContext* vconjunct_ctx = nullptr; + VExprContextSPtrs conjuncts; bool can_lazy_read = false; // block->rows() returns the number of rows of the first column, // so we should check and resize the first column @@ -95,9 +95,9 @@ public: // lazy read partition columns or all partition columns std::unordered_map> partition_columns; - std::unordered_map predicate_missing_columns; + std::unordered_map predicate_missing_columns; // lazy read missing columns or all missing columns - std::unordered_map missing_columns; + std::unordered_map missing_columns; }; /** @@ -148,13 +148,12 @@ public: const LazyReadContext& lazy_read_ctx, RuntimeState* state); ~RowGroupReader(); - Status init( - const FieldDescriptor& schema, std::vector& row_ranges, - std::unordered_map& col_offsets, - const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, - const std::unordered_map* colname_to_slot_id, - const std::vector* not_single_slot_filter_conjuncts, - const std::unordered_map>* slot_id_to_filter_conjuncts); + Status init(const FieldDescriptor& schema, std::vector& row_ranges, + std::unordered_map& col_offsets, + const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, + const std::unordered_map* colname_to_slot_id, + const VExprContextSPtrs* not_single_slot_filter_conjuncts, + const std::unordered_map* slot_id_to_filter_conjuncts); Status next_batch(Block* block, size_t batch_size, size_t* read_rows, bool* batch_eof); int64_t lazy_read_filtered_rows() const { return _lazy_read_filtered_rows; } @@ -175,7 +174,7 @@ private: partition_columns); Status _fill_missing_columns( Block* block, size_t rows, - const std::unordered_map& missing_columns); + const std::unordered_map& missing_columns); Status _build_pos_delete_filter(size_t read_rows); Status _filter_block(Block* block, int column_to_keep, const std::vector& columns_to_filter); @@ -210,9 +209,9 @@ private: const TupleDescriptor* _tuple_descriptor; const RowDescriptor* _row_descriptor; const std::unordered_map* _col_name_to_slot_id; - const std::unordered_map>* _slot_id_to_filter_conjuncts; - std::vector _dict_filter_conjuncts; - std::vector _filter_conjuncts; + const std::unordered_map* _slot_id_to_filter_conjuncts; + VExprContextSPtrs _dict_filter_conjuncts; + VExprContextSPtrs _filter_conjuncts; // std::pair std::vector> _dict_filter_cols; RuntimeState* _state; diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_reader.cpp index a9bf8dc6c8..23e1fcf7d9 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp @@ -283,11 +283,11 @@ Status ParquetReader::init_reader( const std::vector& all_column_names, const std::vector& missing_column_names, std::unordered_map* colname_to_value_range, - VExprContext* vconjunct_ctx, const TupleDescriptor* tuple_descriptor, + const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, - const std::vector* not_single_slot_filter_conjuncts, - const std::unordered_map>* slot_id_to_filter_conjuncts, + const VExprContextSPtrs* not_single_slot_filter_conjuncts, + const std::unordered_map* slot_id_to_filter_conjuncts, bool filter_groups) { _tuple_descriptor = tuple_descriptor; _row_descriptor = row_descriptor; @@ -325,7 +325,7 @@ Status ParquetReader::init_reader( _colname_to_value_range = colname_to_value_range; RETURN_IF_ERROR(_init_read_columns()); // build column predicates for column lazy read - _lazy_read_ctx.vconjunct_ctx = vconjunct_ctx; + _lazy_read_ctx.conjuncts = conjuncts; RETURN_IF_ERROR(_init_row_groups(filter_groups)); return Status::OK(); } @@ -333,7 +333,7 @@ Status ParquetReader::init_reader( Status ParquetReader::set_fill_columns( const std::unordered_map>& partition_columns, - const std::unordered_map& missing_columns) { + const std::unordered_map& missing_columns) { SCOPED_RAW_TIMER(&_statistics.parse_meta_time); // std::unordered_map> std::unordered_map> predicate_columns; @@ -352,28 +352,30 @@ Status ParquetReader::set_fill_columns( return; } else if (VRuntimeFilterWrapper* runtime_filter = typeid_cast(expr)) { - VExpr* filter_impl = const_cast(runtime_filter->get_impl()); + VExpr* filter_impl = const_cast(runtime_filter->get_impl().get()); if (VBloomPredicate* bloom_predicate = typeid_cast(filter_impl)) { - for (VExpr* child : bloom_predicate->children()) { - visit_slot(child); + for (auto& child : bloom_predicate->children()) { + visit_slot(child.get()); } } else if (VInPredicate* in_predicate = typeid_cast(filter_impl)) { if (in_predicate->children().size() > 0) { - visit_slot(in_predicate->children()[0]); + visit_slot(in_predicate->children()[0].get()); } } else { - for (VExpr* child : filter_impl->children()) { - visit_slot(child); + for (auto& child : filter_impl->children()) { + visit_slot(child.get()); } } } else { - for (VExpr* child : expr->children()) { - visit_slot(child); + for (auto& child : expr->children()) { + visit_slot(child.get()); } } }; - if (_lazy_read_ctx.vconjunct_ctx != nullptr) { - visit_slot(_lazy_read_ctx.vconjunct_ctx->root()); + if (!_lazy_read_ctx.conjuncts.empty()) { + for (auto& conjunct : _lazy_read_ctx.conjuncts) { + visit_slot(conjunct->root().get()); + } } const FieldDescriptor& schema = _file_metadata->schema(); @@ -700,7 +702,7 @@ Status ParquetReader::_process_page_index(const tparquet::RowGroup& row_group, _statistics.read_rows += row_group.num_rows; }; - if (_has_complex_type || _lazy_read_ctx.vconjunct_ctx == nullptr || + if (_has_complex_type || _lazy_read_ctx.conjuncts.empty() || _colname_to_value_range == nullptr || _colname_to_value_range->empty()) { read_whole_row_group(); return Status::OK(); diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.h b/be/src/vec/exec/format/parquet/vparquet_reader.h index 4f45e6979f..9ae83845cf 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_reader.h @@ -108,11 +108,11 @@ public: const std::vector& all_column_names, const std::vector& missing_column_names, std::unordered_map* colname_to_value_range, - VExprContext* vconjunct_ctx, const TupleDescriptor* tuple_descriptor, + const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, - const std::vector* not_single_slot_filter_conjuncts, - const std::unordered_map>* slot_id_to_filter_conjuncts, + const VExprContextSPtrs* not_single_slot_filter_conjuncts, + const std::unordered_map* slot_id_to_filter_conjuncts, bool filter_groups = true); Status get_next_block(Block* block, size_t* read_rows, bool* eof) override; @@ -140,7 +140,7 @@ public: Status set_fill_columns( const std::unordered_map>& partition_columns, - const std::unordered_map& missing_columns) override; + const std::unordered_map& missing_columns) override; std::vector get_metadata_key_values(); void set_table_to_file_col_map(std::unordered_map& map) { @@ -261,7 +261,7 @@ private: const TupleDescriptor* _tuple_descriptor; const RowDescriptor* _row_descriptor; const std::unordered_map* _colname_to_slot_id; - const std::vector* _not_single_slot_filter_conjuncts; - const std::unordered_map>* _slot_id_to_filter_conjuncts; + const VExprContextSPtrs* _not_single_slot_filter_conjuncts; + const std::unordered_map* _slot_id_to_filter_conjuncts; }; } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/iceberg_reader.cpp b/be/src/vec/exec/format/table/iceberg_reader.cpp index 6144863bfa..6d2f572586 100644 --- a/be/src/vec/exec/format/table/iceberg_reader.cpp +++ b/be/src/vec/exec/format/table/iceberg_reader.cpp @@ -114,11 +114,11 @@ Status IcebergTableReader::init_reader( const std::vector& file_col_names, const std::unordered_map& col_id_name_map, std::unordered_map* colname_to_value_range, - VExprContext* vconjunct_ctx, const TupleDescriptor* tuple_descriptor, + const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, - const std::vector* not_single_slot_filter_conjuncts, - const std::unordered_map>* slot_id_to_filter_conjuncts) { + const VExprContextSPtrs* not_single_slot_filter_conjuncts, + const std::unordered_map* slot_id_to_filter_conjuncts) { ParquetReader* parquet_reader = static_cast(_file_format_reader.get()); _col_id_name_map = col_id_name_map; _file_col_names = file_col_names; @@ -130,7 +130,7 @@ Status IcebergTableReader::init_reader( parquet_reader->set_table_to_file_col_map(_table_col_to_file_col); Status status = parquet_reader->init_reader( _all_required_col_names, _not_in_file_col_names, &_new_colname_to_value_range, - vconjunct_ctx, tuple_descriptor, row_descriptor, colname_to_slot_id, + conjuncts, tuple_descriptor, row_descriptor, colname_to_slot_id, not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts); return status; } @@ -167,7 +167,7 @@ Status IcebergTableReader::get_next_block(Block* block, size_t* read_rows, bool* Status IcebergTableReader::set_fill_columns( const std::unordered_map>& partition_columns, - const std::unordered_map& missing_columns) { + const std::unordered_map& missing_columns) { return _file_format_reader->set_fill_columns(partition_columns, missing_columns); } @@ -248,7 +248,7 @@ Status IcebergTableReader::_position_delete( return nullptr; } create_status = delete_reader.init_reader(delete_file_col_names, _not_in_file_col_names, - nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, {}, nullptr, nullptr, nullptr, nullptr, nullptr, false); if (!create_status.ok()) { return nullptr; @@ -256,7 +256,7 @@ Status IcebergTableReader::_position_delete( std::unordered_map> partition_columns; - std::unordered_map missing_columns; + std::unordered_map missing_columns; delete_reader.set_fill_columns(partition_columns, missing_columns); bool dictionary_coded = true; diff --git a/be/src/vec/exec/format/table/iceberg_reader.h b/be/src/vec/exec/format/table/iceberg_reader.h index e469b54ae7..451c51445e 100644 --- a/be/src/vec/exec/format/table/iceberg_reader.h +++ b/be/src/vec/exec/format/table/iceberg_reader.h @@ -80,7 +80,7 @@ public: Status set_fill_columns( const std::unordered_map>& partition_columns, - const std::unordered_map& missing_columns) override; + const std::unordered_map& missing_columns) override; bool fill_all_columns() const override; @@ -91,11 +91,11 @@ public: const std::vector& file_col_names, const std::unordered_map& col_id_name_map, std::unordered_map* colname_to_value_range, - VExprContext* vconjunct_ctx, const TupleDescriptor* tuple_descriptor, + const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, - const std::vector* not_single_slot_filter_conjuncts, - const std::unordered_map>* slot_id_to_filter_conjuncts); + const VExprContextSPtrs* not_single_slot_filter_conjuncts, + const std::unordered_map* slot_id_to_filter_conjuncts); enum { DATA, POSITION_DELETE, EQUALITY_DELETE }; diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h b/be/src/vec/exec/join/process_hash_table_probe_impl.h index e1ce4c1c91..5ef335ea52 100644 --- a/be/src/vec/exec/join/process_hash_table_probe_impl.h +++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h @@ -677,11 +677,20 @@ Status ProcessHashTableProbe::do_process_with_other_join_conjuncts( // dispose the other join conjunct exec auto row_count = output_block->rows(); if (row_count) { - int result_column_id = -1; int orig_columns = output_block->columns(); - RETURN_IF_ERROR((*_join_node->_vother_join_conjunct_ptr) - ->execute(output_block, &result_column_id)); + IColumn::Filter other_conjunct_filter(row_count, 1); + bool can_be_filter_all; + RETURN_IF_ERROR(VExprContext::execute_conjuncts( + _join_node->_other_join_conjuncts, nullptr, output_block, + &other_conjunct_filter, &can_be_filter_all)); + auto result_column_id = output_block->columns(); + auto filter_column = ColumnVector::create(); + if (can_be_filter_all) { + memset(other_conjunct_filter.data(), 0, row_count); + } + filter_column->get_data() = std::move(other_conjunct_filter); + output_block->insert({std::move(filter_column), std::make_shared(), ""}); auto column = output_block->get_by_position(result_column_id).column; if constexpr (JoinOpType == TJoinOp::LEFT_OUTER_JOIN || JoinOpType == TJoinOp::FULL_OUTER_JOIN) { diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index dedcfdf9d2..2e91f199c2 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -346,10 +346,10 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { std::vector probe_not_ignore_null(eq_join_conjuncts.size()); size_t conjuncts_index = 0; for (const auto& eq_join_conjunct : eq_join_conjuncts) { - VExprContext* ctx = nullptr; - RETURN_IF_ERROR(VExpr::create_expr_tree(_pool, eq_join_conjunct.left, &ctx)); + VExprContextSPtr ctx; + RETURN_IF_ERROR(VExpr::create_expr_tree(eq_join_conjunct.left, ctx)); _probe_expr_ctxs.push_back(ctx); - RETURN_IF_ERROR(VExpr::create_expr_tree(_pool, eq_join_conjunct.right, &ctx)); + RETURN_IF_ERROR(VExpr::create_expr_tree(eq_join_conjunct.right, ctx)); _build_expr_ctxs.push_back(ctx); bool null_aware = eq_join_conjunct.__isset.opcode && @@ -371,10 +371,17 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { _probe_column_disguise_null.reserve(eq_join_conjuncts.size()); - if (tnode.hash_join_node.__isset.vother_join_conjunct) { - _vother_join_conjunct_ptr.reset(new VExprContext*); - RETURN_IF_ERROR(VExpr::create_expr_tree(_pool, tnode.hash_join_node.vother_join_conjunct, - _vother_join_conjunct_ptr.get())); + if (tnode.hash_join_node.__isset.other_join_conjuncts && + !tnode.hash_join_node.other_join_conjuncts.empty()) { + RETURN_IF_ERROR(VExpr::create_expr_trees(tnode.hash_join_node.other_join_conjuncts, + _other_join_conjuncts)); + + DCHECK(!_build_unique); + DCHECK(_have_other_join_conjunct); + } else if (tnode.hash_join_node.__isset.vother_join_conjunct) { + _other_join_conjuncts.resize(1); + RETURN_IF_ERROR(VExpr::create_expr_tree(tnode.hash_join_node.vother_join_conjunct, + _other_join_conjuncts[0])); // If LEFT SEMI JOIN/LEFT ANTI JOIN with not equal predicate, // build table should not be deduplicated. @@ -474,9 +481,9 @@ Status HashJoinNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(VExpr::prepare(_build_expr_ctxs, state, child(1)->row_desc())); RETURN_IF_ERROR(VExpr::prepare(_probe_expr_ctxs, state, child(0)->row_desc())); - // _vother_join_conjuncts are evaluated in the context of the rows produced by this node - if (_vother_join_conjunct_ptr) { - RETURN_IF_ERROR((*_vother_join_conjunct_ptr)->prepare(state, *_intermediate_row_desc)); + // _other_join_conjuncts are evaluated in the context of the rows produced by this node + for (auto& conjunct : _other_join_conjuncts) { + RETURN_IF_ERROR(conjunct->prepare(state, *_intermediate_row_desc)); } RETURN_IF_ERROR(VExpr::prepare(_output_expr_ctxs, state, *_intermediate_row_desc)); @@ -605,8 +612,7 @@ Status HashJoinNode::pull(doris::RuntimeState* state, vectorized::Block* output_ DCHECK(output_rows <= state->batch_size()); { SCOPED_TIMER(_join_filter_timer); - RETURN_IF_ERROR( - VExprContext::filter_block(_vconjunct_ctx_ptr, &temp_block, temp_block.columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, &temp_block, temp_block.columns())); } RETURN_IF_ERROR(_build_output_block(&temp_block, output_block)); _reset_tuple_is_null_column(); @@ -753,8 +759,8 @@ Status HashJoinNode::alloc_resource(doris::RuntimeState* state) { } RETURN_IF_ERROR(VExpr::open(_build_expr_ctxs, state)); RETURN_IF_ERROR(VExpr::open(_probe_expr_ctxs, state)); - if (_vother_join_conjunct_ptr) { - RETURN_IF_ERROR((*_vother_join_conjunct_ptr)->open(state)); + for (auto& conjunct : _other_join_conjuncts) { + RETURN_IF_ERROR(conjunct->open(state)); } return Status::OK(); } @@ -763,8 +769,8 @@ void HashJoinNode::release_resource(RuntimeState* state) { VExpr::close(_build_expr_ctxs, state); VExpr::close(_probe_expr_ctxs, state); - if (_vother_join_conjunct_ptr) { - (*_vother_join_conjunct_ptr)->close(state); + for (auto& conjunct : _other_join_conjuncts) { + conjunct->close(state); } _release_mem(); VJoinNodeBase::release_resource(state); @@ -985,7 +991,7 @@ Status HashJoinNode::_extract_join_column(Block& block, ColumnUInt8::MutablePtr& return Status::OK(); } -Status HashJoinNode::_do_evaluate(Block& block, std::vector& exprs, +Status HashJoinNode::_do_evaluate(Block& block, VExprContextSPtrs& exprs, RuntimeProfile::Counter& expr_call_timer, std::vector& res_col_ids) { for (size_t i = 0; i < exprs.size(); ++i) { diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index eef8b30b90..33ae53f923 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -43,6 +43,7 @@ #include "vec/core/block.h" #include "vec/core/types.h" #include "vec/exec/join/join_op.h" // IWYU pragma: keep +#include "vec/exprs/vexpr_fwd.h" #include "vec/runtime/shared_hash_table_controller.h" #include "vjoin_node_base.h" @@ -252,13 +253,12 @@ public: bool should_build_hash_table() const { return _should_build_hash_table; } private: - using VExprContexts = std::vector; // probe expr - VExprContexts _probe_expr_ctxs; + VExprContextSPtrs _probe_expr_ctxs; // build expr - VExprContexts _build_expr_ctxs; + VExprContextSPtrs _build_expr_ctxs; // other expr - std::unique_ptr _vother_join_conjunct_ptr; + VExprContextSPtrs _other_join_conjuncts; // mark the join column whether support null eq std::vector _is_null_safe_eq_join; @@ -346,7 +346,7 @@ private: Status _process_build_block(RuntimeState* state, Block& block, uint8_t offset); - Status _do_evaluate(Block& block, std::vector& exprs, + Status _do_evaluate(Block& block, VExprContextSPtrs& exprs, RuntimeProfile::Counter& expr_call_timer, std::vector& res_col_ids); template diff --git a/be/src/vec/exec/join/vjoin_node_base.cpp b/be/src/vec/exec/join/vjoin_node_base.cpp index f6f84b2b91..3bc1d93c8b 100644 --- a/be/src/vec/exec/join/vjoin_node_base.cpp +++ b/be/src/vec/exec/join/vjoin_node_base.cpp @@ -54,9 +54,10 @@ VJoinNodeBase::VJoinNodeBase(ObjectPool* pool, const TPlanNode& tnode, const Des : (tnode.__isset.nested_loop_join_node ? tnode.nested_loop_join_node.join_op : TJoinOp::CROSS_JOIN)), - _have_other_join_conjunct(tnode.__isset.hash_join_node - ? tnode.hash_join_node.__isset.vother_join_conjunct - : false), + _have_other_join_conjunct(tnode.__isset.hash_join_node && + ((tnode.hash_join_node.__isset.other_join_conjuncts && + !tnode.hash_join_node.other_join_conjuncts.empty()) || + tnode.hash_join_node.__isset.vother_join_conjunct)), _match_all_probe(_join_op == TJoinOp::LEFT_OUTER_JOIN || _join_op == TJoinOp::FULL_OUTER_JOIN), _match_all_build(_join_op == TJoinOp::RIGHT_OUTER_JOIN || @@ -183,8 +184,8 @@ Status VJoinNodeBase::init(const TPlanNode& tnode, RuntimeState* state) { ? tnode.hash_join_node.srcExprList : tnode.nested_loop_join_node.srcExprList; for (const auto& expr : output_exprs) { - VExprContext* ctx = nullptr; - RETURN_IF_ERROR(VExpr::create_expr_tree(_pool, expr, &ctx)); + VExprContextSPtr ctx; + RETURN_IF_ERROR(VExpr::create_expr_tree(expr, ctx)); _output_expr_ctxs.push_back(ctx); } } diff --git a/be/src/vec/exec/join/vjoin_node_base.h b/be/src/vec/exec/join/vjoin_node_base.h index 9a33f41579..6972357083 100644 --- a/be/src/vec/exec/join/vjoin_node_base.h +++ b/be/src/vec/exec/join/vjoin_node_base.h @@ -31,14 +31,12 @@ #include "util/runtime_profile.h" #include "vec/core/block.h" #include "vec/data_types/data_type.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class ObjectPool; class RuntimeState; -namespace vectorized { -class VExprContext; -} // namespace vectorized } // namespace doris namespace doris::vectorized { @@ -118,7 +116,7 @@ protected: std::unique_ptr _output_row_desc; std::unique_ptr _intermediate_row_desc; // output expr - std::vector _output_expr_ctxs; + VExprContextSPtrs _output_expr_ctxs; Block _join_block; diff --git a/be/src/vec/exec/join/vnested_loop_join_node.cpp b/be/src/vec/exec/join/vnested_loop_join_node.cpp index 9d314f4160..b8160b2ade 100644 --- a/be/src/vec/exec/join/vnested_loop_join_node.cpp +++ b/be/src/vec/exec/join/vnested_loop_join_node.cpp @@ -111,10 +111,15 @@ Status VNestedLoopJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { _is_output_left_side_only = tnode.nested_loop_join_node.is_output_left_side_only; } - if (tnode.nested_loop_join_node.__isset.vjoin_conjunct) { - _vjoin_conjunct_ptr.reset(new VExprContext*); - RETURN_IF_ERROR(VExpr::create_expr_tree(_pool, tnode.nested_loop_join_node.vjoin_conjunct, - _vjoin_conjunct_ptr.get())); + if (tnode.nested_loop_join_node.__isset.join_conjuncts && + !tnode.nested_loop_join_node.join_conjuncts.empty()) { + RETURN_IF_ERROR(VExpr::create_expr_trees(tnode.nested_loop_join_node.join_conjuncts, + _join_conjuncts)); + } else if (tnode.nested_loop_join_node.__isset.vjoin_conjunct) { + VExprContextSPtr context; + RETURN_IF_ERROR( + VExpr::create_expr_tree(tnode.nested_loop_join_node.vjoin_conjunct, context)); + _join_conjuncts.emplace_back(context); } std::vector filter_src_exprs; @@ -123,8 +128,7 @@ Status VNestedLoopJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(state->runtime_filter_mgr()->register_filter( RuntimeFilterRole::PRODUCER, _runtime_filter_descs[i], state->query_options())); } - RETURN_IF_ERROR( - vectorized::VExpr::create_expr_trees(_pool, filter_src_exprs, &_filter_src_expr_ctxs)); + RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(filter_src_exprs, _filter_src_expr_ctxs)); return Status::OK(); } @@ -149,8 +153,8 @@ Status VNestedLoopJoinNode::prepare(RuntimeState* state) { RETURN_IF_INVALID_TUPLE_IDX(build_tuple_desc->id(), tuple_idx); } - if (_vjoin_conjunct_ptr) { - RETURN_IF_ERROR((*_vjoin_conjunct_ptr)->prepare(state, *_intermediate_row_desc)); + for (auto& conjunct : _join_conjuncts) { + RETURN_IF_ERROR(conjunct->prepare(state, *_intermediate_row_desc)); } _num_probe_side_columns = child(0)->row_desc().num_materialized_slots(); _num_build_side_columns = child(1)->row_desc().num_materialized_slots(); @@ -551,66 +555,15 @@ Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags(Block* block, size_t build_block_idx = _current_build_pos == 0 ? _build_blocks.size() - 1 : _current_build_pos - 1; size_t processed_blocks_num = _offset_stack.size(); - if (LIKELY(_vjoin_conjunct_ptr != nullptr && block->rows() > 0)) { - DCHECK((*_vjoin_conjunct_ptr) != nullptr); - int result_column_id = -1; - RETURN_IF_ERROR((*_vjoin_conjunct_ptr)->execute(block, &result_column_id)); - const auto& filter_column = block->get_by_position(result_column_id).column; - if (auto* nullable_column = check_and_get_column(*filter_column)) { - const auto& nested_column = nullable_column->get_nested_column_ptr(); + if (LIKELY(!_join_conjuncts.empty() && block->rows() > 0)) { + IColumn::Filter filter(block->rows(), 1); + bool can_filter_all = false; + RETURN_IF_ERROR(VExprContext::execute_conjuncts(_join_conjuncts, nullptr, IgnoreNull, block, + &filter, &can_filter_all)); - MutableColumnPtr mutable_holder = - nested_column->use_count() == 1 - ? nested_column->assume_mutable() - : nested_column->clone_resized(nested_column->size()); - - ColumnUInt8* concrete_column = assert_cast(mutable_holder.get()); - auto* __restrict null_map = nullable_column->get_null_map_data().data(); - IColumn::Filter& filter = concrete_column->get_data(); - auto* __restrict filter_data = filter.data(); - - const size_t size = filter.size(); - if constexpr (IgnoreNull) { - for (size_t i = 0; i < size; ++i) { - filter_data[i] |= null_map[i]; - } - } else { - for (size_t i = 0; i < size; ++i) { - filter_data[i] &= !null_map[i]; - } - } - _do_filtering_and_update_visited_flags_impl( - block, column_to_keep, build_block_idx, processed_blocks_num, materialize, - filter); - } else if (auto* const_column = check_and_get_column(*filter_column)) { - bool ret = const_column->get_bool(0); - if (ret) { - if constexpr (SetBuildSideFlag) { - for (size_t i = 0; i < processed_blocks_num; i++) { - auto& build_side_flag = - assert_cast( - _build_side_visited_flags[build_block_idx].get()) - ->get_data(); - auto* __restrict build_side_flag_data = build_side_flag.data(); - auto cur_sz = build_side_flag.size(); - _offset_stack.pop(); - memset(reinterpret_cast(build_side_flag_data), 1, cur_sz); - build_block_idx = build_block_idx == 0 ? _build_blocks.size() - 1 - : build_block_idx - 1; - } - } - if constexpr (SetProbeSideFlag) { - _cur_probe_row_visited_flags |= ret; - } - } - if (!materialize || !ret) { - CLEAR_BLOCK - } + if (can_filter_all) { + CLEAR_BLOCK } else { - const IColumn::Filter& filter = - assert_cast&>(*filter_column) - .get_data(); _do_filtering_and_update_visited_flags_impl( block, column_to_keep, build_block_idx, processed_blocks_num, materialize, @@ -643,8 +596,8 @@ Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags(Block* block, Status VNestedLoopJoinNode::alloc_resource(doris::RuntimeState* state) { RETURN_IF_ERROR(VJoinNodeBase::alloc_resource(state)); - if (_vjoin_conjunct_ptr) { - RETURN_IF_ERROR((*_vjoin_conjunct_ptr)->open(state)); + for (auto& conjunct : _join_conjuncts) { + RETURN_IF_ERROR(conjunct->open(state)); } return VExpr::open(_filter_src_expr_ctxs, state); } @@ -697,8 +650,8 @@ Status VNestedLoopJoinNode::pull(RuntimeState* state, vectorized::Block* block, _add_tuple_is_null_column(&tmp_block); { SCOPED_TIMER(_join_filter_timer); - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, &tmp_block, - tmp_block.columns())); + RETURN_IF_ERROR( + VExprContext::filter_block(_conjuncts, &tmp_block, tmp_block.columns())); } RETURN_IF_ERROR(_build_output_block(&tmp_block, block)); _reset_tuple_is_null_column(); @@ -729,7 +682,9 @@ bool VNestedLoopJoinNode::need_more_input_data() const { void VNestedLoopJoinNode::release_resource(doris::RuntimeState* state) { VJoinNodeBase::release_resource(state); VExpr::close(_filter_src_expr_ctxs, state); - if (_vjoin_conjunct_ptr) (*_vjoin_conjunct_ptr)->close(state); + for (auto& conjunct : _join_conjuncts) { + conjunct->close(state); + } } } // namespace doris::vectorized diff --git a/be/src/vec/exec/join/vnested_loop_join_node.h b/be/src/vec/exec/join/vnested_loop_join_node.h index 4249af9266..4bd66798d9 100644 --- a/be/src/vec/exec/join/vnested_loop_join_node.h +++ b/be/src/vec/exec/join/vnested_loop_join_node.h @@ -246,11 +246,11 @@ private: MutableColumns _dst_columns; std::vector _runtime_filter_descs; - std::vector _filter_src_expr_ctxs; + VExprContextSPtrs _filter_src_expr_ctxs; bool _is_output_left_side_only = false; bool _need_more_input_data = true; std::stack _offset_stack; - std::unique_ptr _vjoin_conjunct_ptr; + VExprContextSPtrs _join_conjuncts; friend struct RuntimeFilterBuild; }; diff --git a/be/src/vec/exec/scan/new_es_scan_node.cpp b/be/src/vec/exec/scan/new_es_scan_node.cpp index a037761278..088784b330 100644 --- a/be/src/vec/exec/scan/new_es_scan_node.cpp +++ b/be/src/vec/exec/scan/new_es_scan_node.cpp @@ -167,7 +167,7 @@ Status NewEsScanNode::_init_scanners(std::list* scanners) { _state, this, _limit_per_scanner, _tuple_id, properties, _docvalue_context, doc_value_mode, _state->runtime_profile()); - RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr)); + RETURN_IF_ERROR(scanner->prepare(_state, _conjuncts)); scanners->push_back(scanner); } return Status::OK(); diff --git a/be/src/vec/exec/scan/new_es_scanner.cpp b/be/src/vec/exec/scan/new_es_scanner.cpp index d65ad50615..0bd492b79c 100644 --- a/be/src/vec/exec/scan/new_es_scanner.cpp +++ b/be/src/vec/exec/scan/new_es_scanner.cpp @@ -58,9 +58,9 @@ NewEsScanner::NewEsScanner(RuntimeState* state, NewEsScanNode* parent, int64_t l _docvalue_context(docvalue_context), _doc_value_mode(doc_value_mode) {} -Status NewEsScanner::prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr) { +Status NewEsScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) { VLOG_CRITICAL << NEW_SCANNER_TYPE << "::prepare"; - RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); + RETURN_IF_ERROR(VScanner::prepare(_state, conjuncts)); if (_is_init) { return Status::OK(); diff --git a/be/src/vec/exec/scan/new_es_scanner.h b/be/src/vec/exec/scan/new_es_scanner.h index 28a9872cd5..90b61344de 100644 --- a/be/src/vec/exec/scan/new_es_scanner.h +++ b/be/src/vec/exec/scan/new_es_scanner.h @@ -60,7 +60,7 @@ public: Status close(RuntimeState* state) override; public: - Status prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr); + Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eof) override; diff --git a/be/src/vec/exec/scan/new_file_scan_node.cpp b/be/src/vec/exec/scan/new_file_scan_node.cpp index ed8b80ab57..127539d26e 100644 --- a/be/src/vec/exec/scan/new_file_scan_node.cpp +++ b/be/src/vec/exec/scan/new_file_scan_node.cpp @@ -111,8 +111,8 @@ Status NewFileScanNode::_init_scanners(std::list* scanners) { VFileScanner::create_unique(_state, this, _limit_per_scanner, scan_range.scan_range.ext_scan_range.file_scan_range, runtime_profile(), _kv_cache.get()); - RETURN_IF_ERROR(scanner->prepare(_vconjunct_ctx_ptr, &_colname_to_value_range, - &_colname_to_slot_id)); + RETURN_IF_ERROR( + scanner->prepare(_conjuncts, &_colname_to_value_range, &_colname_to_slot_id)); scanners->push_back(std::move(scanner)); } diff --git a/be/src/vec/exec/scan/new_jdbc_scan_node.cpp b/be/src/vec/exec/scan/new_jdbc_scan_node.cpp index 8b240ef9c6..f8219b4337 100644 --- a/be/src/vec/exec/scan/new_jdbc_scan_node.cpp +++ b/be/src/vec/exec/scan/new_jdbc_scan_node.cpp @@ -68,7 +68,7 @@ Status NewJdbcScanNode::_init_scanners(std::list* scanners) { std::unique_ptr scanner = NewJdbcScanner::create_unique(_state, this, _limit_per_scanner, _tuple_id, _query_string, _table_type, _state->runtime_profile()); - RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr)); + RETURN_IF_ERROR(scanner->prepare(_state, _conjuncts)); scanners->push_back(std::move(scanner)); return Status::OK(); } diff --git a/be/src/vec/exec/scan/new_jdbc_scanner.cpp b/be/src/vec/exec/scan/new_jdbc_scanner.cpp index 150203035b..bc8bf6e044 100644 --- a/be/src/vec/exec/scan/new_jdbc_scanner.cpp +++ b/be/src/vec/exec/scan/new_jdbc_scanner.cpp @@ -54,12 +54,9 @@ NewJdbcScanner::NewJdbcScanner(RuntimeState* state, NewJdbcScanNode* parent, int _connector_close_timer = ADD_TIMER(get_parent()->_scanner_profile, "ConnectorCloseTime"); } -Status NewJdbcScanner::prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr) { +Status NewJdbcScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) { VLOG_CRITICAL << "NewJdbcScanner::Prepare"; - if (vconjunct_ctx_ptr != nullptr) { - // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR(vconjunct_ctx_ptr->clone(state, &_vconjunct_ctx)); - } + RETURN_IF_ERROR(VScanner::prepare(state, conjuncts)); if (_is_init) { return Status::OK(); diff --git a/be/src/vec/exec/scan/new_jdbc_scanner.h b/be/src/vec/exec/scan/new_jdbc_scanner.h index 8db47fb95f..1fb1b64874 100644 --- a/be/src/vec/exec/scan/new_jdbc_scanner.h +++ b/be/src/vec/exec/scan/new_jdbc_scanner.h @@ -52,7 +52,7 @@ public: Status open(RuntimeState* state) override; Status close(RuntimeState* state) override; - Status prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr); + Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eos) override; diff --git a/be/src/vec/exec/scan/new_odbc_scan_node.cpp b/be/src/vec/exec/scan/new_odbc_scan_node.cpp index 856a5f8de5..eafad16596 100644 --- a/be/src/vec/exec/scan/new_odbc_scan_node.cpp +++ b/be/src/vec/exec/scan/new_odbc_scan_node.cpp @@ -67,7 +67,7 @@ Status NewOdbcScanNode::_init_scanners(std::list* scanners) { } std::shared_ptr scanner = NewOdbcScanner::create_shared( _state, this, _limit_per_scanner, _odbc_scan_node, _state->runtime_profile()); - RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr)); + RETURN_IF_ERROR(scanner->prepare(_state, _conjuncts)); scanners->push_back(scanner); return Status::OK(); } diff --git a/be/src/vec/exec/scan/new_odbc_scanner.cpp b/be/src/vec/exec/scan/new_odbc_scanner.cpp index 1022be3c83..2b371bbea7 100644 --- a/be/src/vec/exec/scan/new_odbc_scanner.cpp +++ b/be/src/vec/exec/scan/new_odbc_scanner.cpp @@ -60,12 +60,9 @@ NewOdbcScanner::NewOdbcScanner(RuntimeState* state, NewOdbcScanNode* parent, int _tuple_id(odbc_scan_node.tuple_id), _tuple_desc(nullptr) {} -Status NewOdbcScanner::prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr) { +Status NewOdbcScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) { VLOG_CRITICAL << NEW_SCANNER_TYPE << "::prepare"; - if (vconjunct_ctx_ptr != nullptr) { - // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR(vconjunct_ctx_ptr->clone(state, &_vconjunct_ctx)); - } + RETURN_IF_ERROR(VScanner::prepare(state, conjuncts)); if (_is_init) { return Status::OK(); diff --git a/be/src/vec/exec/scan/new_odbc_scanner.h b/be/src/vec/exec/scan/new_odbc_scanner.h index 0fbafd0c5c..e75b6465a0 100644 --- a/be/src/vec/exec/scan/new_odbc_scanner.h +++ b/be/src/vec/exec/scan/new_odbc_scanner.h @@ -56,7 +56,7 @@ public: Status close(RuntimeState* state) override; public: - Status prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr); + Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eos) override; diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp b/be/src/vec/exec/scan/new_olap_scan_node.cpp index e713dde642..b656444329 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.cpp +++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp @@ -431,9 +431,17 @@ Status NewOlapScanNode::_init_scanners(std::list* scanners) { SCOPED_TIMER(_scanner_init_timer); auto span = opentelemetry::trace::Tracer::GetCurrentSpan(); - if (_vconjunct_ctx_ptr && _vconjunct_ctx_ptr->root()) { - _runtime_profile->add_info_string("RemainedDownPredicates", - _vconjunct_ctx_ptr->root()->debug_string()); + if (!_conjuncts.empty()) { + std::string message; + for (auto& conjunct : _conjuncts) { + if (conjunct->root()) { + if (!message.empty()) { + message += ", "; + } + message += conjunct->root()->debug_string(); + } + } + _runtime_profile->add_info_string("RemainedDownPredicates", message); } if (!_olap_scan_node.output_column_unique_ids.empty()) { diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 450753b2a5..605688bd0d 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -103,11 +103,12 @@ static std::string read_columns_to_string(TabletSchemaSPtr tablet_schema, Status NewOlapScanner::init() { _is_init = true; auto parent = static_cast(_parent); - RETURN_IF_ERROR(VScanner::prepare(_state, parent->_vconjunct_ctx_ptr)); - if (parent->_common_vexpr_ctxs_pushdown != nullptr) { - // Copy common_vexpr_ctxs_pushdown from scan node to this scanner's _common_vexpr_ctxs_pushdown, just necessary. - RETURN_IF_ERROR( - parent->_common_vexpr_ctxs_pushdown->clone(_state, &_common_vexpr_ctxs_pushdown)); + RETURN_IF_ERROR(VScanner::prepare(_state, parent->_conjuncts)); + + for (auto& ctx : parent->_common_expr_ctxs_push_down) { + VExprContextSPtr context; + RETURN_IF_ERROR(ctx->clone(_state, context)); + _common_expr_ctxs_push_down.emplace_back(context); } // set limit to reduce end of rowset and segment mem use @@ -264,14 +265,19 @@ Status NewOlapScanner::_init_tablet_reader_params( real_parent->_olap_scan_node.push_down_agg_type_opt; } _tablet_reader_params.version = Version(0, _version); - // TODO: If a new runtime filter arrives after `_vconjunct_ctx` move to `_common_vexpr_ctxs_pushdown`, - // `_vconjunct_ctx` and `_common_vexpr_ctxs_pushdown` will have values at the same time, - // and the root() of `_vconjunct_ctx` and `_common_vexpr_ctxs_pushdown` should be merged as `remaining_vconjunct_root` - _tablet_reader_params.remaining_vconjunct_root = - (_common_vexpr_ctxs_pushdown == nullptr) - ? (_vconjunct_ctx == nullptr ? nullptr : _vconjunct_ctx->root()) - : _common_vexpr_ctxs_pushdown->root(); - _tablet_reader_params.common_vexpr_ctxs_pushdown = _common_vexpr_ctxs_pushdown; + + // TODO: If a new runtime filter arrives after `_conjuncts` move to `_common_expr_ctxs_push_down`, + if (_common_expr_ctxs_push_down.empty()) { + for (auto& conjunct : _conjuncts) { + _tablet_reader_params.remaining_conjunct_roots.emplace_back(conjunct->root()); + } + } else { + for (auto& ctx : _common_expr_ctxs_push_down) { + _tablet_reader_params.remaining_conjunct_roots.emplace_back(ctx->root()); + } + } + + _tablet_reader_params.common_expr_ctxs_push_down = _common_expr_ctxs_push_down; _tablet_reader_params.output_columns = ((NewOlapScanNode*)_parent)->_maybe_read_column_ids; // Condition @@ -403,7 +409,7 @@ Status NewOlapScanner::_init_tablet_reader_params( _tablet_reader_params.read_orderby_key_num_prefix_columns = olap_scan_node.sort_info.is_asc_order.size(); _tablet_reader_params.read_orderby_key_limit = _limit; - _tablet_reader_params.filter_block_vconjunct_ctx_ptr = &_vconjunct_ctx; + _tablet_reader_params.filter_block_conjuncts = _conjuncts; } // runtime predicate push down optimization for topn diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index f4a9265d62..767106b936 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -97,10 +97,10 @@ VFileScanner::VFileScanner(RuntimeState* state, NewFileScanNode* parent, int64_t } Status VFileScanner::prepare( - VExprContext* vconjunct_ctx_ptr, + const VExprContextSPtrs& conjuncts, std::unordered_map* colname_to_value_range, const std::unordered_map* colname_to_slot_id) { - RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); + RETURN_IF_ERROR(VScanner::prepare(_state, conjuncts)); _colname_to_value_range = colname_to_value_range; _col_name_to_slot_id = colname_to_slot_id; @@ -126,11 +126,19 @@ Status VFileScanner::prepare( std::vector({_input_tuple_desc->id()}), std::vector({false}))); // prepare pre filters - if (_params.__isset.pre_filter_exprs) { - RETURN_IF_ERROR(doris::vectorized::VExpr::create_expr_tree( - _state->obj_pool(), _params.pre_filter_exprs, &_pre_conjunct_ctx_ptr)); - RETURN_IF_ERROR(_pre_conjunct_ctx_ptr->prepare(_state, *_src_row_desc)); - RETURN_IF_ERROR(_pre_conjunct_ctx_ptr->open(_state)); + if (_params.__isset.pre_filter_exprs_list) { + RETURN_IF_ERROR(doris::vectorized::VExpr::create_expr_trees( + _params.pre_filter_exprs_list, _pre_conjunct_ctxs)); + } else if (_params.__isset.pre_filter_exprs) { + VExprContextSPtr context; + RETURN_IF_ERROR( + doris::vectorized::VExpr::create_expr_tree(_params.pre_filter_exprs, context)); + _pre_conjunct_ctxs.emplace_back(context); + } + + for (auto& conjunct : _pre_conjunct_ctxs) { + RETURN_IF_ERROR(conjunct->prepare(_state, *_src_row_desc)); + RETURN_IF_ERROR(conjunct->open(_state)); } } @@ -141,21 +149,27 @@ Status VFileScanner::prepare( return Status::OK(); } -Status VFileScanner::_split_conjuncts(VExpr* conjunct_expr_root) { - static constexpr auto is_leaf = [](VExpr* expr) { return !expr->is_and_expr(); }; - if (conjunct_expr_root != nullptr) { +Status VFileScanner::_split_conjuncts() { + for (auto& conjunct : _conjuncts) { + RETURN_IF_ERROR(_split_conjuncts_expr(conjunct, conjunct->root())); + } + return Status::OK(); +} +Status VFileScanner::_split_conjuncts_expr(const VExprContextSPtr& context, + const VExprSPtr& conjunct_expr_root) { + static constexpr auto is_leaf = [](const auto& expr) { return !expr->is_and_expr(); }; + if (conjunct_expr_root) { if (is_leaf(conjunct_expr_root)) { auto impl = conjunct_expr_root->get_impl(); // If impl is not null, which means this a conjuncts from runtime filter. - VExpr* cur_expr = impl ? const_cast(impl) : conjunct_expr_root; - VExprContext* new_ctx = - _state->obj_pool()->add(VExprContext::create_unique(cur_expr).release()); - _vconjunct_ctx->clone_fn_contexts(new_ctx); + auto cur_expr = impl ? impl : conjunct_expr_root; + VExprContextSPtr new_ctx = VExprContext::create_shared(cur_expr); + context->clone_fn_contexts(new_ctx.get()); RETURN_IF_ERROR(new_ctx->prepare(_state, *_default_val_row_desc)); RETURN_IF_ERROR(new_ctx->open(_state)); std::vector slot_ids; - _get_slot_ids(cur_expr, &slot_ids); + _get_slot_ids(cur_expr.get(), &slot_ids); if (slot_ids.size() == 0) { _not_single_slot_filter_conjuncts.emplace_back(new_ctx); return Status::OK(); @@ -169,29 +183,25 @@ Status VFileScanner::_split_conjuncts(VExpr* conjunct_expr_root) { } if (single_slot) { SlotId slot_id = slot_ids[0]; - if (_slot_id_to_filter_conjuncts.find(slot_id) == - _slot_id_to_filter_conjuncts.end()) { - _slot_id_to_filter_conjuncts.insert({slot_id, std::vector()}); - } _slot_id_to_filter_conjuncts[slot_id].emplace_back(new_ctx); } else { _not_single_slot_filter_conjuncts.emplace_back(new_ctx); } } else { - RETURN_IF_ERROR(_split_conjuncts(conjunct_expr_root->children()[0])); - RETURN_IF_ERROR(_split_conjuncts(conjunct_expr_root->children()[1])); + RETURN_IF_ERROR(_split_conjuncts_expr(context, conjunct_expr_root->children()[0])); + RETURN_IF_ERROR(_split_conjuncts_expr(context, conjunct_expr_root->children()[1])); } } return Status::OK(); } void VFileScanner::_get_slot_ids(VExpr* expr, std::vector* slot_ids) { - for (VExpr* child_expr : expr->children()) { + for (auto& child_expr : expr->children()) { if (child_expr->is_slot_ref()) { - VSlotRef* slot_ref = reinterpret_cast(child_expr); + VSlotRef* slot_ref = reinterpret_cast(child_expr.get()); slot_ids->emplace_back(slot_ref->slot_id()); } - _get_slot_ids(child_expr, slot_ids); + _get_slot_ids(child_expr.get(), slot_ids); } } @@ -255,7 +265,7 @@ Status VFileScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eo // Fill columns not exist in file with null or default value RETURN_IF_ERROR(_fill_missing_columns(read_rows)); } - // Apply _pre_conjunct_ctx_ptr to filter src block. + // Apply _pre_conjunct_ctxs to filter src block. RETURN_IF_ERROR(_pre_filter_src_block()); // Convert src block to output block (dest block), string to dest data type and apply filters. RETURN_IF_ERROR(_convert_to_output_block(block)); @@ -409,7 +419,7 @@ Status VFileScanner::_fill_missing_columns(size_t rows) { nullable_column->insert_many_defaults(rows); } else { // fill with default value - auto* ctx = it->second; + auto& ctx = it->second; auto origin_column_num = _src_block_ptr->columns(); int result_column_id = -1; // PT1 => dest primitive type @@ -441,12 +451,12 @@ Status VFileScanner::_pre_filter_src_block() { if (!_is_load) { return Status::OK(); } - if (_pre_conjunct_ctx_ptr) { + if (!_pre_conjunct_ctxs.empty()) { SCOPED_TIMER(_pre_filter_timer); auto origin_column_num = _src_block_ptr->columns(); auto old_rows = _src_block_ptr->rows(); - RETURN_IF_ERROR(vectorized::VExprContext::filter_block(_pre_conjunct_ctx_ptr, - _src_block_ptr, origin_column_num)); + RETURN_IF_ERROR(vectorized::VExprContext::filter_block(_pre_conjunct_ctxs, _src_block_ptr, + origin_column_num)); _counter.num_rows_unselected += old_rows - _src_block.rows(); } return Status::OK(); @@ -476,7 +486,7 @@ Status VFileScanner::_convert_to_output_block(Block* block) { int dest_index = ctx_idx++; vectorized::ColumnPtr column_ptr; - auto* ctx = _dest_vexpr_ctx[dest_index]; + auto& ctx = _dest_vexpr_ctx[dest_index]; int result_column_id = -1; // PT1 => dest primitive type RETURN_IF_ERROR(ctx->execute(&_src_block, &result_column_id)); @@ -583,8 +593,11 @@ Status VFileScanner::_get_next_reader() { const_cast(&_state->timezone_obj()), _io_ctx.get(), _state, _kv_cache, _state->query_options().enable_parquet_lazy_mat); RETURN_IF_ERROR(parquet_reader->open()); - if (!_is_load && _push_down_expr == nullptr && _vconjunct_ctx != nullptr) { - RETURN_IF_ERROR(_vconjunct_ctx->clone(_state, &_push_down_expr)); + if (!_is_load && _push_down_conjuncts.empty() && !_conjuncts.empty()) { + _push_down_conjuncts.resize(_conjuncts.size()); + for (size_t i = 0; i != _conjuncts.size(); ++i) { + RETURN_IF_ERROR(_conjuncts[i]->clone(_state, _push_down_conjuncts[i])); + } _discard_conjuncts(); } if (range.__isset.table_format_params && @@ -594,24 +607,29 @@ Status VFileScanner::_get_next_reader() { _state, _params, range, _kv_cache, _io_ctx.get()); init_status = iceberg_reader->init_reader( - _file_col_names, _col_id_name_map, _colname_to_value_range, _push_down_expr, - _real_tuple_desc, _default_val_row_desc.get(), _col_name_to_slot_id, - &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); + _file_col_names, _col_id_name_map, _colname_to_value_range, + _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(), + _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, + &_slot_id_to_filter_conjuncts); RETURN_IF_ERROR(iceberg_reader->init_row_filters(range)); _cur_reader = std::move(iceberg_reader); } else { std::vector place_holder; init_status = parquet_reader->init_reader( - _file_col_names, place_holder, _colname_to_value_range, _push_down_expr, - _real_tuple_desc, _default_val_row_desc.get(), _col_name_to_slot_id, - &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); + _file_col_names, place_holder, _colname_to_value_range, + _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(), + _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, + &_slot_id_to_filter_conjuncts); _cur_reader = std::move(parquet_reader); } break; } case TFileFormatType::FORMAT_ORC: { - if (!_is_load && _push_down_expr == nullptr && _vconjunct_ctx != nullptr) { - RETURN_IF_ERROR(_vconjunct_ctx->clone(_state, &_push_down_expr)); + if (!_is_load && _push_down_conjuncts.empty() && !_conjuncts.empty()) { + _push_down_conjuncts.resize(_conjuncts.size()); + for (size_t i = 0; i != _conjuncts.size(); ++i) { + RETURN_IF_ERROR(_conjuncts[i]->clone(_state, _push_down_conjuncts[i])); + } _discard_conjuncts(); } _cur_reader = OrcReader::create_unique( @@ -619,7 +637,7 @@ Status VFileScanner::_get_next_reader() { _state->query_options().batch_size, _state->timezone(), _io_ctx.get(), _state->query_options().enable_orc_lazy_mat); init_status = ((OrcReader*)(_cur_reader.get())) - ->init_reader(_colname_to_value_range, _push_down_expr); + ->init_reader(_colname_to_value_range, _push_down_conjuncts); break; } case TFileFormatType::FORMAT_CSV_PLAIN: @@ -679,7 +697,7 @@ Status VFileScanner::_get_next_reader() { Status VFileScanner::_generate_fill_columns() { std::unordered_map> partition_columns; - std::unordered_map missing_columns; + std::unordered_map missing_columns; const TFileRangeDesc& range = _ranges.at(_next_range - 1); if (range.__isset.columns_from_path && !_partition_slot_descs.empty()) { @@ -777,12 +795,11 @@ Status VFileScanner::_init_expr_ctxes() { if (!slot_desc->is_materialized()) { continue; } - vectorized::VExprContext* ctx = nullptr; + vectorized::VExprContextSPtr ctx; auto it = _params.default_value_of_src_slot.find(slot_desc->id()); if (it != std::end(_params.default_value_of_src_slot)) { if (!it->second.nodes.empty()) { - RETURN_IF_ERROR( - vectorized::VExpr::create_expr_tree(_state->obj_pool(), it->second, &ctx)); + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(it->second, ctx)); RETURN_IF_ERROR(ctx->prepare(_state, *_default_val_row_desc)); RETURN_IF_ERROR(ctx->open(_state)); } @@ -805,10 +822,9 @@ Status VFileScanner::_init_expr_ctxes() { slot_desc->id(), slot_desc->col_name()); } - vectorized::VExprContext* ctx = nullptr; + vectorized::VExprContextSPtr ctx; if (!it->second.nodes.empty()) { - RETURN_IF_ERROR( - vectorized::VExpr::create_expr_tree(_state->obj_pool(), it->second, &ctx)); + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(it->second, ctx)); RETURN_IF_ERROR(ctx->prepare(_state, *_src_row_desc)); RETURN_IF_ERROR(ctx->open(_state)); } @@ -837,8 +853,8 @@ Status VFileScanner::_init_expr_ctxes() { _output_tuple_desc && _output_tuple_desc->slots().back()->type().is_variant_type(); // TODO: It should can move to scan node to process. - if (_vconjunct_ctx && _vconjunct_ctx->root()) { - _split_conjuncts(_vconjunct_ctx->root()); + if (!_conjuncts.empty()) { + _split_conjuncts(); } return Status::OK(); } @@ -854,18 +870,18 @@ Status VFileScanner::close(RuntimeState* state) { } } - for (auto it : _col_default_value_ctx) { + for (auto& it : _col_default_value_ctx) { if (it.second != nullptr) { it.second->close(state); } } - if (_pre_conjunct_ctx_ptr) { - _pre_conjunct_ctx_ptr->close(state); + for (auto& conjunct : _pre_conjunct_ctxs) { + conjunct->close(state); } - if (_push_down_expr) { - _push_down_expr->close(state); + for (auto& conjunct : _push_down_conjuncts) { + conjunct->close(state); } for (auto& [k, v] : _slot_id_to_filter_conjuncts) { @@ -876,7 +892,7 @@ Status VFileScanner::close(RuntimeState* state) { } } - for (auto* ctx : _not_single_slot_filter_conjuncts) { + for (auto ctx : _not_single_slot_filter_conjuncts) { if (ctx != nullptr) { ctx->close(state); } diff --git a/be/src/vec/exec/scan/vfile_scanner.h b/be/src/vec/exec/scan/vfile_scanner.h index 54a1df77d3..162be02bda 100644 --- a/be/src/vec/exec/scan/vfile_scanner.h +++ b/be/src/vec/exec/scan/vfile_scanner.h @@ -71,7 +71,7 @@ public: Status close(RuntimeState* state) override; - Status prepare(VExprContext* vconjunct_ctx_ptr, + Status prepare(const VExprContextSPtrs& conjuncts, std::unordered_map* colname_to_value_range, const std::unordered_map* colname_to_slot_id); @@ -108,11 +108,11 @@ protected: // created from param.expr_of_dest_slot // For query, it saves default value expr of all dest columns, or nullptr for NULL. // For load, it saves conversion expr/default value of all dest columns. - std::vector _dest_vexpr_ctx; + VExprContextSPtrs _dest_vexpr_ctx; // dest slot name to index in _dest_vexpr_ctx; std::unordered_map _dest_slot_name_to_idx; // col name to default value expr - std::unordered_map _col_default_value_ctx; + std::unordered_map _col_default_value_ctx; // the map values of dest slot id to src slot desc // if there is not key of dest slot id in dest_sid_to_src_sid_without_trans, it will be set to nullptr std::vector _src_slot_descs_order_by_dest; @@ -128,7 +128,7 @@ protected: std::unordered_set _missing_cols; // For load task - doris::vectorized::VExprContext* _pre_conjunct_ctx_ptr = nullptr; + vectorized::VExprContextSPtrs _pre_conjunct_ctxs; std::unique_ptr _src_row_desc; // row desc for default exprs std::unique_ptr _default_val_row_desc; @@ -146,7 +146,7 @@ protected: Block* _src_block_ptr; Block _src_block; - VExprContext* _push_down_expr = nullptr; + VExprContextSPtrs _push_down_conjuncts; bool _is_dynamic_schema = false; // for tracing dynamic schema std::unique_ptr _full_base_schema_view; @@ -165,9 +165,9 @@ private: const std::unordered_map* _col_name_to_slot_id; // single slot filter conjuncts - std::unordered_map> _slot_id_to_filter_conjuncts; + std::unordered_map _slot_id_to_filter_conjuncts; // not single(zero or multi) slot filter conjuncts - std::vector _not_single_slot_filter_conjuncts; + VExprContextSPtrs _not_single_slot_filter_conjuncts; private: Status _init_expr_ctxes(); @@ -179,7 +179,9 @@ private: Status _convert_to_output_block(Block* block); Status _generate_fill_columns(); Status _handle_dynamic_block(Block* block); - Status _split_conjuncts(VExpr* conjunct_expr_root); + Status _split_conjuncts(); + Status _split_conjuncts_expr(const VExprContextSPtr& context, + const VExprSPtr& conjunct_expr_root); void _get_slot_ids(VExpr* expr, std::vector* slot_ids); void _reset_counter() { diff --git a/be/src/vec/exec/scan/vmeta_scan_node.cpp b/be/src/vec/exec/scan/vmeta_scan_node.cpp index 3dec257dca..b94049697d 100644 --- a/be/src/vec/exec/scan/vmeta_scan_node.cpp +++ b/be/src/vec/exec/scan/vmeta_scan_node.cpp @@ -65,7 +65,7 @@ Status VMetaScanNode::_init_scanners(std::list* scanners) { for (auto& scan_range : _scan_ranges) { std::shared_ptr scanner = VMetaScanner::create_shared( _state, this, _tuple_id, scan_range, _limit_per_scanner, runtime_profile()); - RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr)); + RETURN_IF_ERROR(scanner->prepare(_state, _conjuncts)); scanners->push_back(scanner); } return Status::OK(); diff --git a/be/src/vec/exec/scan/vmeta_scanner.cpp b/be/src/vec/exec/scan/vmeta_scanner.cpp index 4a59804e6a..41114eba76 100644 --- a/be/src/vec/exec/scan/vmeta_scanner.cpp +++ b/be/src/vec/exec/scan/vmeta_scanner.cpp @@ -70,9 +70,9 @@ Status VMetaScanner::open(RuntimeState* state) { return Status::OK(); } -Status VMetaScanner::prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr) { +Status VMetaScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) { VLOG_CRITICAL << "VMetaScanner::prepare"; - RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); + RETURN_IF_ERROR(VScanner::prepare(_state, conjuncts)); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); RETURN_IF_ERROR(_fetch_metadata(_scan_range.meta_scan_range)); return Status::OK(); diff --git a/be/src/vec/exec/scan/vmeta_scanner.h b/be/src/vec/exec/scan/vmeta_scanner.h index 3cac485cb2..de6e5cbd41 100644 --- a/be/src/vec/exec/scan/vmeta_scanner.h +++ b/be/src/vec/exec/scan/vmeta_scanner.h @@ -55,7 +55,7 @@ public: Status open(RuntimeState* state) override; Status close(RuntimeState* state) override; - Status prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr); + Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eos) override; diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index 4c0241f9c2..007b197897 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -353,7 +353,7 @@ bool VScanNode::runtime_filters_are_ready_or_timeout() { Status VScanNode::_acquire_runtime_filter(bool wait) { SCOPED_TIMER(_acquire_runtime_filter_timer); - std::vector vexprs; + VExprSPtrs vexprs; for (size_t i = 0; i < _runtime_filter_descs.size(); ++i) { IRuntimeFilter* runtime_filter = _runtime_filter_ctxs[i].runtime_filter; bool ready = runtime_filter->is_ready(); @@ -380,59 +380,19 @@ Status VScanNode::_acquire_runtime_filter(bool wait) { return Status::OK(); } -Status VScanNode::_append_rf_into_conjuncts(std::vector& vexprs) { +Status VScanNode::_append_rf_into_conjuncts(const VExprSPtrs& vexprs) { if (vexprs.empty()) { return Status::OK(); } - VExpr* last_expr = nullptr; - if (_vconjunct_ctx_ptr != nullptr) { - last_expr = _vconjunct_ctx_ptr->root(); - } else { - DCHECK(_rf_vexpr_set.find(vexprs[0]) == _rf_vexpr_set.end()); - last_expr = vexprs[0]; - _rf_vexpr_set.insert(vexprs[0]); + for (auto& expr : vexprs) { + VExprContextSPtr conjunct = VExprContext::create_shared(expr); + RETURN_IF_ERROR(conjunct->prepare(_state, _row_descriptor)); + RETURN_IF_ERROR(conjunct->open(_state)); + _rf_vexpr_set.insert(expr); + _conjuncts.emplace_back(conjunct); } - for (size_t j = _vconjunct_ctx_ptr ? 0 : 1; j < vexprs.size(); j++) { - if (_rf_vexpr_set.find(vexprs[j]) != _rf_vexpr_set.end()) { - continue; - } - TFunction fn; - TFunctionName fn_name; - fn_name.__set_db_name(""); - fn_name.__set_function_name("and"); - fn.__set_name(fn_name); - fn.__set_binary_type(TFunctionBinaryType::BUILTIN); - std::vector arg_types; - arg_types.push_back(create_type_desc(PrimitiveType::TYPE_BOOLEAN)); - arg_types.push_back(create_type_desc(PrimitiveType::TYPE_BOOLEAN)); - fn.__set_arg_types(arg_types); - fn.__set_ret_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN)); - fn.__set_has_var_args(false); - TExprNode texpr_node; - texpr_node.__set_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN)); - texpr_node.__set_node_type(TExprNodeType::COMPOUND_PRED); - texpr_node.__set_opcode(TExprOpcode::COMPOUND_AND); - texpr_node.__set_fn(fn); - texpr_node.__set_is_nullable(last_expr->is_nullable() || vexprs[j]->is_nullable()); - VExpr* new_node = _pool->add(VcompoundPred::create_unique(texpr_node).release()); - new_node->add_child(last_expr); - DCHECK((vexprs[j])->get_impl() != nullptr); - new_node->add_child(vexprs[j]); - last_expr = new_node; - _rf_vexpr_set.insert(vexprs[j]); - } - auto new_vconjunct_ctx_ptr = _pool->add(VExprContext::create_unique(last_expr).release()); - if (_vconjunct_ctx_ptr) { - _vconjunct_ctx_ptr->clone_fn_contexts(new_vconjunct_ctx_ptr); - } - RETURN_IF_ERROR(new_vconjunct_ctx_ptr->prepare(_state, _row_descriptor)); - RETURN_IF_ERROR(new_vconjunct_ctx_ptr->open(_state)); - if (_vconjunct_ctx_ptr) { - _stale_vexpr_ctxs.push_back(_vconjunct_ctx_ptr); - } - _vconjunct_ctx_ptr = new_vconjunct_ctx_ptr; return Status::OK(); } @@ -459,11 +419,12 @@ void VScanNode::release_resource(RuntimeState* state) { runtime_filter->consumer_close(); } - for (auto& ctx : _stale_vexpr_ctxs) { + for (auto& ctx : _stale_expr_ctxs) { ctx->close(state); } - if (_common_vexpr_ctxs_pushdown) { - _common_vexpr_ctxs_pushdown->close(state); + + for (auto& ctx : _common_expr_ctxs_push_down) { + ctx->close(state); } ExecNode::release_resource(state); @@ -529,21 +490,26 @@ Status VScanNode::_normalize_conjuncts() { } } } - if (_vconjunct_ctx_ptr) { - if (_vconjunct_ctx_ptr->root()) { - VExpr* new_root; - RETURN_IF_ERROR(_normalize_predicate(_vconjunct_ctx_ptr->root(), &new_root)); + + for (auto it = _conjuncts.begin(); it != _conjuncts.end();) { + auto& conjunct = *it; + if (conjunct->root()) { + VExprSPtr new_root; + RETURN_IF_ERROR(_normalize_predicate(conjunct->root(), conjunct.get(), new_root)); if (new_root) { - _vconjunct_ctx_ptr->set_root(new_root); + conjunct->set_root(new_root); if (_should_push_down_common_expr()) { - _common_vexpr_ctxs_pushdown = _vconjunct_ctx_ptr; - _vconjunct_ctx_ptr = nullptr; + _common_expr_ctxs_push_down.emplace_back(conjunct); + it = _conjuncts.erase(it); + continue; } - } else { // All conjucts are pushed down as predicate column - _stale_vexpr_ctxs.push_back(_vconjunct_ctx_ptr); - _vconjunct_ctx_ptr = nullptr; + } else { // All conjuncts are pushed down as predicate column + _stale_expr_ctxs.emplace_back(conjunct); + it = _conjuncts.erase(it); + continue; } } + ++it; } for (auto& it : _slot_id_to_value_range) { std::visit( @@ -559,28 +525,30 @@ Status VScanNode::_normalize_conjuncts() { return Status::OK(); } -Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output_expr) { - static constexpr auto is_leaf = [](VExpr* expr) { return !expr->is_and_expr(); }; - auto in_predicate_checker = [](const std::vector& children, const VSlotRef** slot, - VExpr** child_contains_slot) { +Status VScanNode::_normalize_predicate(const VExprSPtr& conjunct_expr_root, VExprContext* context, + VExprSPtr& output_expr) { + static constexpr auto is_leaf = [](auto&& expr) { return !expr->is_and_expr(); }; + auto in_predicate_checker = [](const VExprSPtrs& children, std::shared_ptr& slot, + VExprSPtr& child_contains_slot) { if (children.empty() || VExpr::expr_without_cast(children[0])->node_type() != TExprNodeType::SLOT_REF) { // not a slot ref(column) return false; } - *slot = reinterpret_cast(VExpr::expr_without_cast(children[0])); - *child_contains_slot = children[0]; + slot = std::dynamic_pointer_cast(VExpr::expr_without_cast(children[0])); + child_contains_slot = children[0]; return true; }; - auto eq_predicate_checker = [](const std::vector& children, const VSlotRef** slot, - VExpr** child_contains_slot) { - for (const VExpr* child : children) { + auto eq_predicate_checker = [](const VExprSPtrs& children, std::shared_ptr& slot, + VExprSPtr& child_contains_slot) { + for (const auto& child : children) { if (VExpr::expr_without_cast(child)->node_type() != TExprNodeType::SLOT_REF) { // not a slot ref(column) continue; } - *slot = reinterpret_cast(VExpr::expr_without_cast(child)); - *child_contains_slot = const_cast(child); + slot = std::dynamic_pointer_cast(VExpr::expr_without_cast(child)); + CHECK(slot != nullptr); + child_contains_slot = child; return true; } return false; @@ -590,15 +558,15 @@ Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output if (is_leaf(conjunct_expr_root)) { auto impl = conjunct_expr_root->get_impl(); // If impl is not null, which means this a conjuncts from runtime filter. - VExpr* cur_expr = impl ? const_cast(impl) : conjunct_expr_root; - bool is_runtimer_filter_predicate = + auto cur_expr = impl ? impl.get() : conjunct_expr_root.get(); + bool _is_runtime_filter_predicate = _rf_vexpr_set.find(conjunct_expr_root) != _rf_vexpr_set.end(); SlotDescriptor* slot = nullptr; ColumnValueRangeType* range = nullptr; PushDownType pdt = PushDownType::UNACCEPTABLE; - RETURN_IF_ERROR(_eval_const_conjuncts(cur_expr, _vconjunct_ctx_ptr, &pdt)); + RETURN_IF_ERROR(_eval_const_conjuncts(cur_expr, context, &pdt)); if (pdt == PushDownType::ACCEPTABLE) { - *output_expr = nullptr; + output_expr = nullptr; return Status::OK(); } if (_is_predicate_acting_on_slot(cur_expr, in_predicate_checker, &slot, &range) || @@ -607,26 +575,26 @@ Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output [&](auto& value_range) { Defer mark_runtime_filter_flag {[&]() { value_range.mark_runtime_filter_predicate( - is_runtimer_filter_predicate); + _is_runtime_filter_predicate); }}; RETURN_IF_PUSH_DOWN(_normalize_in_and_eq_predicate( - cur_expr, _vconjunct_ctx_ptr, slot, value_range, &pdt)); + cur_expr, context, slot, value_range, &pdt)); RETURN_IF_PUSH_DOWN(_normalize_not_in_and_not_eq_predicate( - cur_expr, _vconjunct_ctx_ptr, slot, value_range, &pdt)); + cur_expr, context, slot, value_range, &pdt)); RETURN_IF_PUSH_DOWN(_normalize_is_null_predicate( - cur_expr, _vconjunct_ctx_ptr, slot, value_range, &pdt)); + cur_expr, context, slot, value_range, &pdt)); RETURN_IF_PUSH_DOWN(_normalize_noneq_binary_predicate( - cur_expr, _vconjunct_ctx_ptr, slot, value_range, &pdt)); - RETURN_IF_PUSH_DOWN(_normalize_match_predicate( - cur_expr, _vconjunct_ctx_ptr, slot, value_range, &pdt)); + cur_expr, context, slot, value_range, &pdt)); + RETURN_IF_PUSH_DOWN(_normalize_match_predicate(cur_expr, context, slot, + value_range, &pdt)); if (_is_key_column(slot->col_name())) { - RETURN_IF_PUSH_DOWN(_normalize_bitmap_filter( - cur_expr, _vconjunct_ctx_ptr, slot, &pdt)); - RETURN_IF_PUSH_DOWN(_normalize_bloom_filter( - cur_expr, _vconjunct_ctx_ptr, slot, &pdt)); + RETURN_IF_PUSH_DOWN( + _normalize_bitmap_filter(cur_expr, context, slot, &pdt)); + RETURN_IF_PUSH_DOWN( + _normalize_bloom_filter(cur_expr, context, slot, &pdt)); if (_state->enable_function_pushdown()) { RETURN_IF_PUSH_DOWN(_normalize_function_filters( - cur_expr, _vconjunct_ctx_ptr, slot, &pdt)); + cur_expr, context, slot, &pdt)); } } }, @@ -635,54 +603,52 @@ Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output if (pdt == PushDownType::UNACCEPTABLE && TExprNodeType::COMPOUND_PRED == cur_expr->node_type()) { - _normalize_compound_predicate(cur_expr, _vconjunct_ctx_ptr, &pdt, - is_runtimer_filter_predicate, in_predicate_checker, - eq_predicate_checker); - *output_expr = conjunct_expr_root; // remaining in conjunct tree + _normalize_compound_predicate(cur_expr, context, &pdt, _is_runtime_filter_predicate, + in_predicate_checker, eq_predicate_checker); + output_expr = conjunct_expr_root; // remaining in conjunct tree return Status::OK(); } if (pdt == PushDownType::ACCEPTABLE && _is_key_column(slot->col_name())) { - *output_expr = nullptr; + output_expr = nullptr; return Status::OK(); } else { // for PARTIAL_ACCEPTABLE and UNACCEPTABLE, do not remove expr from the tree - *output_expr = conjunct_expr_root; + output_expr = conjunct_expr_root; return Status::OK(); } } else { - VExpr* left_child; - RETURN_IF_ERROR(_normalize_predicate(conjunct_expr_root->children()[0], &left_child)); - VExpr* right_child; - RETURN_IF_ERROR(_normalize_predicate(conjunct_expr_root->children()[1], &right_child)); + VExprSPtr left_child; + RETURN_IF_ERROR( + _normalize_predicate(conjunct_expr_root->children()[0], context, left_child)); + VExprSPtr right_child; + RETURN_IF_ERROR( + _normalize_predicate(conjunct_expr_root->children()[1], context, right_child)); if (left_child != nullptr && right_child != nullptr) { conjunct_expr_root->set_children({left_child, right_child}); - *output_expr = conjunct_expr_root; + output_expr = conjunct_expr_root; return Status::OK(); } else { if (left_child == nullptr) { - conjunct_expr_root->children()[0]->close( - _state, _vconjunct_ctx_ptr, - _vconjunct_ctx_ptr->get_function_state_scope()); + conjunct_expr_root->children()[0]->close(_state, context, + context->get_function_state_scope()); } if (right_child == nullptr) { - conjunct_expr_root->children()[1]->close( - _state, _vconjunct_ctx_ptr, - _vconjunct_ctx_ptr->get_function_state_scope()); + conjunct_expr_root->children()[1]->close(_state, context, + context->get_function_state_scope()); } // here only close the and expr self, do not close the child conjunct_expr_root->set_children({}); - conjunct_expr_root->close(_state, _vconjunct_ctx_ptr, - _vconjunct_ctx_ptr->get_function_state_scope()); + conjunct_expr_root->close(_state, context, context->get_function_state_scope()); } // here do not close VExpr* now - *output_expr = left_child != nullptr ? left_child : right_child; + output_expr = left_child != nullptr ? left_child : right_child; return Status::OK(); } } - *output_expr = conjunct_expr_root; + output_expr = conjunct_expr_root; return Status::OK(); } @@ -720,7 +686,7 @@ Status VScanNode::_normalize_function_filters(VExpr* expr, VExprContext* expr_ct VExpr* fn_expr = expr; if (TExprNodeType::COMPOUND_PRED == expr->node_type() && expr->fn().name.function_name == "not") { - fn_expr = fn_expr->children()[0]; + fn_expr = fn_expr->children()[0].get(); opposite = true; } @@ -741,11 +707,12 @@ Status VScanNode::_normalize_function_filters(VExpr* expr, VExprContext* expr_ct bool VScanNode::_is_predicate_acting_on_slot( VExpr* expr, - const std::function&, const VSlotRef**, VExpr**)>& checker, + const std::function&, VExprSPtr&)>& + checker, SlotDescriptor** slot_desc, ColumnValueRangeType** range) { - const VSlotRef* slot_ref = nullptr; - VExpr* child_contains_slot = nullptr; - if (!checker(expr->children(), &slot_ref, &child_contains_slot)) { + std::shared_ptr slot_ref; + VExprSPtr child_contains_slot; + if (!checker(expr->children(), slot_ref, child_contains_slot)) { // not a slot ref(column) return false; } @@ -759,7 +726,7 @@ bool VScanNode::_is_predicate_acting_on_slot( if (child_contains_slot->type().type != (*slot_desc)->type().type || child_contains_slot->type().precision != (*slot_desc)->type().precision || child_contains_slot->type().scale != (*slot_desc)->type().scale) { - if (!ignore_cast(*slot_desc, child_contains_slot)) { + if (!ignore_cast(*slot_desc, child_contains_slot.get())) { // the type of predicate not match the slot's type return false; } @@ -781,7 +748,7 @@ Status VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, Pu if (const ColumnConst* const_column = check_and_get_column(const_col_wrapper->column_ptr)) { constant_val = const_cast(const_column->get_data_at(0).data); - if (constant_val == nullptr || *reinterpret_cast(constant_val) == false) { + if (constant_val == nullptr || !*reinterpret_cast(constant_val)) { *pdt = PushDownType::ACCEPTABLE; _eos = true; } @@ -798,7 +765,7 @@ Status VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, Pu DCHECK_EQ(bool_column->size(), 1); if (bool_column->size() == 1) { constant_val = const_cast(bool_column->get_data_at(0).data); - if (constant_val == nullptr || *reinterpret_cast(constant_val) == false) { + if (constant_val == nullptr || !*reinterpret_cast(constant_val)) { *pdt = PushDownType::ACCEPTABLE; _eos = true; } @@ -1081,16 +1048,16 @@ Status VScanNode::_normalize_noneq_binary_predicate(VExpr* expr, VExprContext* e Status VScanNode::_normalize_compound_predicate( vectorized::VExpr* expr, VExprContext* expr_ctx, PushDownType* pdt, - bool is_runtimer_filter_predicate, - const std::function&, const VSlotRef**, VExpr**)>& + bool _is_runtime_filter_predicate, + const std::function&, VExprSPtr&)>& in_predicate_checker, - const std::function&, const VSlotRef**, VExpr**)>& + const std::function&, VExprSPtr&)>& eq_predicate_checker) { if (TExprNodeType::COMPOUND_PRED == expr->node_type()) { auto compound_fn_name = expr->fn().name.function_name; auto children_num = expr->children().size(); for (auto i = 0; i < children_num; ++i) { - VExpr* child_expr = expr->children()[i]; + auto child_expr = expr->children()[i].get(); if (TExprNodeType::BINARY_PRED == child_expr->node_type()) { SlotDescriptor* slot = nullptr; ColumnValueRangeType* range_on_slot = nullptr; @@ -1104,7 +1071,7 @@ Status VScanNode::_normalize_compound_predicate( [&](auto& value_range) { Defer mark_runtime_filter_flag {[&]() { value_range.mark_runtime_filter_predicate( - is_runtimer_filter_predicate); + _is_runtime_filter_predicate); }}; _normalize_binary_in_compound_predicate(child_expr, expr_ctx, slot, value_range, pdt); @@ -1126,7 +1093,7 @@ Status VScanNode::_normalize_compound_predicate( [&](auto& value_range) { Defer mark_runtime_filter_flag {[&]() { value_range.mark_runtime_filter_predicate( - is_runtimer_filter_predicate); + _is_runtime_filter_predicate); }}; _normalize_match_in_compound_predicate(child_expr, expr_ctx, slot, value_range, pdt); @@ -1137,7 +1104,7 @@ Status VScanNode::_normalize_compound_predicate( } } else if (TExprNodeType::COMPOUND_PRED == child_expr->node_type()) { _normalize_compound_predicate(child_expr, expr_ctx, pdt, - is_runtimer_filter_predicate, in_predicate_checker, + _is_runtime_filter_predicate, in_predicate_checker, eq_predicate_checker); } } @@ -1329,22 +1296,22 @@ Status VScanNode::try_append_late_arrival_runtime_filter(int* arrived_rf_num) { } // 1. Check if are runtime filter ready but not applied. - std::vector vexprs; + VExprSPtrs exprs; int current_arrived_rf_num = 0; for (size_t i = 0; i < _runtime_filter_descs.size(); ++i) { if (_runtime_filter_ctxs[i].apply_mark) { ++current_arrived_rf_num; continue; } else if (_runtime_filter_ctxs[i].runtime_filter->is_ready()) { - _runtime_filter_ctxs[i].runtime_filter->get_prepared_vexprs(&vexprs, _row_descriptor, - _state); + _runtime_filter_ctxs[i].runtime_filter->get_prepared_exprs(&exprs, _row_descriptor, + _state); ++current_arrived_rf_num; _runtime_filter_ctxs[i].apply_mark = true; } } // 2. Append unapplied runtime filters to vconjunct_ctx_ptr - if (!vexprs.empty()) { - RETURN_IF_ERROR(_append_rf_into_conjuncts(vexprs)); + if (!exprs.empty()) { + RETURN_IF_ERROR(_append_rf_into_conjuncts(exprs)); } if (current_arrived_rf_num == _runtime_filter_descs.size()) { _is_all_rf_applied = true; @@ -1354,10 +1321,13 @@ Status VScanNode::try_append_late_arrival_runtime_filter(int* arrived_rf_num) { return Status::OK(); } -Status VScanNode::clone_vconjunct_ctx(VExprContext** _vconjunct_ctx) { - if (_vconjunct_ctx_ptr) { +Status VScanNode::clone_conjunct_ctxs(VExprContextSPtrs& conjuncts) { + if (!_conjuncts.empty()) { std::unique_lock l(_rf_locks); - return _vconjunct_ctx_ptr->clone(_state, _vconjunct_ctx); + conjuncts.resize(_conjuncts.size()); + for (size_t i = 0; i != _conjuncts.size(); ++i) { + RETURN_IF_ERROR(_conjuncts[i]->clone(_state, conjuncts[i])); + } } return Status::OK(); } diff --git a/be/src/vec/exec/scan/vscan_node.h b/be/src/vec/exec/scan/vscan_node.h index 9eafb61289..e4c2fb6118 100644 --- a/be/src/vec/exec/scan/vscan_node.h +++ b/be/src/vec/exec/scan/vscan_node.h @@ -144,8 +144,8 @@ public: // Return num of filters which are applied already. Status try_append_late_arrival_runtime_filter(int* arrived_rf_num); - // Clone current vconjunct_ctx to _vconjunct_ctx, if exists. - Status clone_vconjunct_ctx(VExprContext** _vconjunct_ctx); + // Clone current _conjuncts to conjuncts, if exists. + Status clone_conjunct_ctxs(VExprContextSPtrs& conjuncts); int runtime_filter_num() const { return (int)_runtime_filter_ctxs.size(); } @@ -269,7 +269,7 @@ protected: // Set to true if the runtime filter is ready. std::vector _runtime_filter_ready_flag; doris::Mutex _rf_locks; - phmap::flat_hash_set _rf_vexpr_set; + phmap::flat_hash_set _rf_vexpr_set; // True means all runtime filters are applied to scanners bool _is_all_rf_applied = true; @@ -321,8 +321,8 @@ protected: // Every time vconjunct_ctx_ptr is updated, the old ctx will be stored in this vector // so that it will be destroyed uniformly at the end of the query. - std::vector _stale_vexpr_ctxs; - VExprContext* _common_vexpr_ctxs_pushdown = nullptr; + VExprContextSPtrs _stale_expr_ctxs; + VExprContextSPtrs _common_expr_ctxs_push_down; // If sort info is set, push limit to each scanner; int64_t _limit_per_scanner = -1; @@ -373,10 +373,11 @@ private: // Get all arrived runtime filters at Open phase. Status _acquire_runtime_filter(bool wait = true); // Append late-arrival runtime filters to the vconjunct_ctx. - Status _append_rf_into_conjuncts(std::vector& vexprs); + Status _append_rf_into_conjuncts(const VExprSPtrs& vexprs); Status _normalize_conjuncts(); - Status _normalize_predicate(VExpr* conjunct_expr_root, VExpr** output_expr); + Status _normalize_predicate(const VExprSPtr& conjunct_expr_root, VExprContext* context, + VExprSPtr& output_expr); Status _eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, PushDownType* pdt); Status _normalize_bloom_filter(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, @@ -388,10 +389,11 @@ private: Status _normalize_function_filters(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, PushDownType* pdt); - bool _is_predicate_acting_on_slot(VExpr* expr, - const std::function&, - const VSlotRef**, VExpr**)>& checker, - SlotDescriptor** slot_desc, ColumnValueRangeType** range); + bool _is_predicate_acting_on_slot( + VExpr* expr, + const std::function&, VExprSPtr&)>& + checker, + SlotDescriptor** slot_desc, ColumnValueRangeType** range); template Status _normalize_in_and_eq_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx, @@ -410,9 +412,9 @@ private: Status _normalize_compound_predicate( vectorized::VExpr* expr, VExprContext* expr_ctx, PushDownType* pdt, bool is_runtimer_filter_predicate, - const std::function&, const VSlotRef**, VExpr**)>& + const std::function&, VExprSPtr&)>& in_predicate_checker, - const std::function&, const VSlotRef**, VExpr**)>& + const std::function&, VExprSPtr&)>& eq_predicate_checker); template diff --git a/be/src/vec/exec/scan/vscanner.cpp b/be/src/vec/exec/scan/vscanner.cpp index f3144c8c62..d7c6d20aa0 100644 --- a/be/src/vec/exec/scan/vscanner.cpp +++ b/be/src/vec/exec/scan/vscanner.cpp @@ -40,11 +40,14 @@ VScanner::VScanner(RuntimeState* state, VScanNode* parent, int64_t limit, Runtim _is_load = (_input_tuple_desc != nullptr); } -Status VScanner::prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr) { - if (vconjunct_ctx_ptr != nullptr) { - // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR(vconjunct_ctx_ptr->clone(_state, &_vconjunct_ctx)); +Status VScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) { + if (!conjuncts.empty()) { + _conjuncts.resize(conjuncts.size()); + for (size_t i = 0; i != conjuncts.size(); ++i) { + RETURN_IF_ERROR(conjuncts[i]->clone(state, _conjuncts[i])); + } } + return Status::OK(); } @@ -107,7 +110,7 @@ Status VScanner::get_block(RuntimeState* state, Block* block, bool* eof) { Status VScanner::_filter_output_block(Block* block) { auto old_rows = block->rows(); - Status st = VExprContext::filter_block(_vconjunct_ctx, block, block->columns()); + Status st = VExprContext::filter_block(_conjuncts, block, block->columns()); _counter.num_rows_unselected += old_rows - block->rows(); return st; } @@ -127,13 +130,13 @@ Status VScanner::try_append_late_arrival_runtime_filter() { } // There are newly arrived runtime filters, - // renew the vconjunct_ctx_ptr - if (_vconjunct_ctx) { + // renew the _conjuncts + if (!_conjuncts.empty()) { _discard_conjuncts(); } // Notice that the number of runtime filters may be larger than _applied_rf_num. // But it is ok because it will be updated at next time. - RETURN_IF_ERROR(_parent->clone_vconjunct_ctx(&_vconjunct_ctx)); + RETURN_IF_ERROR(_parent->clone_conjunct_ctxs(_conjuncts)); _applied_rf_num = arrived_rf_num; return Status::OK(); } @@ -142,14 +145,16 @@ Status VScanner::close(RuntimeState* state) { if (_is_closed) { return Status::OK(); } - for (auto& ctx : _stale_vexpr_ctxs) { + for (auto& ctx : _stale_expr_ctxs) { ctx->close(state); } - if (_vconjunct_ctx) { - _vconjunct_ctx->close(state); + + for (auto& conjunct : _conjuncts) { + conjunct->close(state); } - if (_common_vexpr_ctxs_pushdown) { - _common_vexpr_ctxs_pushdown->close(state); + + for (auto& ctx : _common_expr_ctxs_push_down) { + ctx->close(state); } COUNTER_UPDATE(_parent->_scanner_wait_worker_timer, _scanner_wait_worker_timer); diff --git a/be/src/vec/exec/scan/vscanner.h b/be/src/vec/exec/scan/vscanner.h index 4fcc019fdc..bba93dec47 100644 --- a/be/src/vec/exec/scan/vscanner.h +++ b/be/src/vec/exec/scan/vscanner.h @@ -77,7 +77,7 @@ protected: Status _filter_output_block(Block* block); // Not virtual, all child will call this method explictly - Status prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr); + Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); public: VScanNode* get_parent() { return _parent; } @@ -126,8 +126,6 @@ public: void set_status_on_failure(const Status& st) { _status = st; } - VExprContext** vconjunct_ctx_ptr() { return &_vconjunct_ctx; } - // return false if _is_counted_down is already true, // otherwise, set _is_counted_down to true and return true. bool set_counted_down() { @@ -140,10 +138,10 @@ public: protected: void _discard_conjuncts() { - if (_vconjunct_ctx) { - _stale_vexpr_ctxs.push_back(_vconjunct_ctx); - _vconjunct_ctx = nullptr; + for (auto& conjunct : _conjuncts) { + _stale_expr_ctxs.emplace_back(conjunct); } + _conjuncts.clear(); } protected: @@ -171,14 +169,15 @@ protected: // means all runtime filters are arrived and applied. int _applied_rf_num = 0; int _total_rf_num = 0; - // Cloned from _vconjunct_ctx of scan node. + // Cloned from _conjuncts of scan node. // It includes predicate in SQL and runtime filters. - VExprContext* _vconjunct_ctx = nullptr; - VExprContext* _common_vexpr_ctxs_pushdown = nullptr; - // Late arriving runtime filters will update _vconjunct_ctx. - // The old _vconjunct_ctx will be temporarily placed in _stale_vexpr_ctxs + VExprContextSPtrs _conjuncts; + + VExprContextSPtrs _common_expr_ctxs_push_down; + // Late arriving runtime filters will update _conjuncts. + // The old _conjuncts will be temporarily placed in _stale_expr_ctxs // and will be destroyed at the end. - std::vector _stale_vexpr_ctxs; + VExprContextSPtrs _stale_expr_ctxs; // num of rows read from scanner int64_t _num_rows_read = 0; diff --git a/be/src/vec/exec/vaggregation_node.cpp b/be/src/vec/exec/vaggregation_node.cpp index 1ebb0721b7..e4f2074dce 100644 --- a/be/src/vec/exec/vaggregation_node.cpp +++ b/be/src/vec/exec/vaggregation_node.cpp @@ -147,8 +147,7 @@ AggregationNode::~AggregationNode() = default; Status AggregationNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(ExecNode::init(tnode, state)); // ignore return status for now , so we need to introduce ExecNode::init() - RETURN_IF_ERROR( - VExpr::create_expr_trees(_pool, tnode.agg_node.grouping_exprs, &_probe_expr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(tnode.agg_node.grouping_exprs, _probe_expr_ctxs)); // init aggregate functions _aggregate_evaluators.reserve(tnode.agg_node.aggregate_functions.size()); @@ -181,7 +180,7 @@ Status AggregationNode::init(const TPlanNode& tnode, RuntimeState* state) { return Status::OK(); } -void AggregationNode::_init_hash_method(std::vector& probe_exprs) { +void AggregationNode::_init_hash_method(const VExprContextSPtrs& probe_exprs) { DCHECK(probe_exprs.size() >= 1); if (probe_exprs.size() == 1) { auto is_nullable = probe_exprs[0]->root()->is_nullable(); @@ -262,8 +261,8 @@ void AggregationNode::_init_hash_method(std::vector& probe_exprs) _probe_key_sz.resize(_probe_expr_ctxs.size()); for (int i = 0; i < _probe_expr_ctxs.size(); ++i) { - const auto vexpr = _probe_expr_ctxs[i]->root(); - const auto& data_type = vexpr->data_type(); + const auto& expr = _probe_expr_ctxs[i]->root(); + const auto& data_type = expr->data_type(); if (!data_type->have_maximum_size_of_value()) { use_fixed_key = false; @@ -473,9 +472,9 @@ Status AggregationNode::prepare_profile(RuntimeState* state) { std::bind(&AggregationNode::_update_memusage_with_serialized_key, this); _executor.close = std::bind(&AggregationNode::_close_with_serialized_key, this); - _should_limit_output = _limit != -1 && // has limit - _vconjunct_ctx_ptr == nullptr && // no having conjunct - _needs_finalize; // agg's finalize step + _should_limit_output = _limit != -1 && // has limit + _conjuncts.empty() && // no having conjunct + _needs_finalize; // agg's finalize step } return Status::OK(); @@ -578,7 +577,7 @@ Status AggregationNode::pull(doris::RuntimeState* state, vectorized::Block* bloc RETURN_IF_ERROR(_executor.get_result(state, block, eos)); _make_nullable_output_key(block); // dispose the having clause, should not be execute in prestreaming agg - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, block, block->columns())); reached_limit(block, eos); return Status::OK(); diff --git a/be/src/vec/exec/vaggregation_node.h b/be/src/vec/exec/vaggregation_node.h index 4a62131e23..a0f95dd05c 100644 --- a/be/src/vec/exec/vaggregation_node.h +++ b/be/src/vec/exec/vaggregation_node.h @@ -889,7 +889,7 @@ private: friend class pipeline::AggSourceOperator; friend class pipeline::StreamingAggSourceOperator; // group by k1,k2 - std::vector _probe_expr_ctxs; + VExprContextSPtrs _probe_expr_ctxs; // left / full join will change the key nullable make output/input solt // nullable diff. so we need make nullable of it. std::vector _make_nullable_keys; @@ -999,7 +999,7 @@ private: Status _merge_with_serialized_key(Block* block); void _update_memusage_with_serialized_key(); void _close_with_serialized_key(); - void _init_hash_method(std::vector& probe_exprs); + void _init_hash_method(const VExprContextSPtrs& probe_exprs); template void _pre_serialize_key_if_need(AggState& state, AggMethod& agg_method, @@ -1072,7 +1072,7 @@ private: CHECK(ctxs.size() == 1 && ctxs[0]->root()->is_slot_ref()) << "input_exprs_ctxs is invalid, input_exprs_ctx[0]=" << ctxs[0]->root()->debug_string(); - return ((VSlotRef*)ctxs[0]->root())->column_id(); + return ((VSlotRef*)ctxs[0]->root().get())->column_id(); } template diff --git a/be/src/vec/exec/vanalytic_eval_node.cpp b/be/src/vec/exec/vanalytic_eval_node.cpp index 528a624432..cef242e688 100644 --- a/be/src/vec/exec/vanalytic_eval_node.cpp +++ b/be/src/vec/exec/vanalytic_eval_node.cpp @@ -138,10 +138,9 @@ Status VAnalyticEvalNode::init(const TPlanNode& tnode, RuntimeState* state) { _agg_intput_columns[i].resize(desc.nodes[0].num_children); for (int j = 0; j < desc.nodes[0].num_children; ++j) { ++node_idx; - VExpr* expr = nullptr; - VExprContext* ctx = nullptr; - RETURN_IF_ERROR( - VExpr::create_tree_from_thrift(_pool, desc.nodes, &node_idx, &expr, &ctx)); + VExprSPtr expr; + VExprContextSPtr ctx; + RETURN_IF_ERROR(VExpr::create_tree_from_thrift(desc.nodes, &node_idx, expr, ctx)); _agg_expr_ctxs[i].emplace_back(ctx); } @@ -154,10 +153,9 @@ Status VAnalyticEvalNode::init(const TPlanNode& tnode, RuntimeState* state) { } } - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, analytic_node.partition_exprs, - &_partition_by_eq_expr_ctxs)); RETURN_IF_ERROR( - VExpr::create_expr_trees(_pool, analytic_node.order_by_exprs, &_order_by_eq_expr_ctxs)); + VExpr::create_expr_trees(analytic_node.partition_exprs, _partition_by_eq_expr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(analytic_node.order_by_exprs, _order_by_eq_expr_ctxs)); _partition_by_column_idxs.resize(_partition_by_eq_expr_ctxs.size()); _ordey_by_column_idxs.resize(_order_by_eq_expr_ctxs.size()); _agg_functions_size = _agg_functions.size(); @@ -286,8 +284,7 @@ Status VAnalyticEvalNode::pull(doris::RuntimeState* /*state*/, vectorized::Block } } RETURN_IF_ERROR(_output_current_block(output_block)); - RETURN_IF_ERROR( - VExprContext::filter_block(_vconjunct_ctx_ptr, output_block, output_block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, output_block, output_block->columns())); reached_limit(output_block, eos); return Status::OK(); } @@ -345,7 +342,7 @@ Status VAnalyticEvalNode::get_next(RuntimeState* state, vectorized::Block* block } } RETURN_IF_ERROR(_output_current_block(block)); - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, block, block->columns())); reached_limit(block, eos); return Status::OK(); } @@ -602,8 +599,9 @@ Status VAnalyticEvalNode::sink(doris::RuntimeState* /*state*/, vectorized::Block return Status::OK(); } -Status VAnalyticEvalNode::_insert_range_column(vectorized::Block* block, VExprContext* expr, - IColumn* dst_column, size_t length) { +Status VAnalyticEvalNode::_insert_range_column(vectorized::Block* block, + const VExprContextSPtr& expr, IColumn* dst_column, + size_t length) { int result_col_id = -1; RETURN_IF_ERROR(expr->execute(block, &result_col_id)); DCHECK_GE(result_col_id, 0); diff --git a/be/src/vec/exec/vanalytic_eval_node.h b/be/src/vec/exec/vanalytic_eval_node.h index ef344fdcc8..bd08ef2dca 100644 --- a/be/src/vec/exec/vanalytic_eval_node.h +++ b/be/src/vec/exec/vanalytic_eval_node.h @@ -37,6 +37,7 @@ #include "vec/common/arena.h" #include "vec/core/block.h" #include "vec/data_types/data_type.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class DescriptorTbl; @@ -44,9 +45,6 @@ class ObjectPool; class RuntimeState; class TupleDescriptor; -namespace vectorized { -class VExprContext; -} // namespace vectorized } // namespace doris namespace doris::vectorized { @@ -102,8 +100,8 @@ private: Status _init_result_columns(); Status _create_agg_status(); Status _destroy_agg_status(); - Status _insert_range_column(vectorized::Block* block, VExprContext* expr, IColumn* dst_column, - size_t length); + Status _insert_range_column(vectorized::Block* block, const VExprContextSPtr& expr, + IColumn* dst_column, size_t length); void _update_order_by_range(); bool _init_next_partition(BlockRowPos found_partition_end); @@ -140,9 +138,9 @@ private: std::vector _input_blocks; std::vector input_block_first_row_positions; std::vector _agg_functions; - std::vector> _agg_expr_ctxs; - std::vector _partition_by_eq_expr_ctxs; - std::vector _order_by_eq_expr_ctxs; + std::vector _agg_expr_ctxs; + VExprContextSPtrs _partition_by_eq_expr_ctxs; + VExprContextSPtrs _order_by_eq_expr_ctxs; std::vector> _agg_intput_columns; std::vector _result_window_columns; diff --git a/be/src/vec/exec/vdata_gen_scan_node.cpp b/be/src/vec/exec/vdata_gen_scan_node.cpp index 9fccc16aaf..367bdb8499 100644 --- a/be/src/vec/exec/vdata_gen_scan_node.cpp +++ b/be/src/vec/exec/vdata_gen_scan_node.cpp @@ -102,7 +102,7 @@ Status VDataGenFunctionScanNode::get_next(RuntimeState* state, vectorized::Block } RETURN_IF_CANCELLED(state); Status res = _table_func->get_next(state, block, eos); - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, block, block->columns())); reached_limit(block, eos); return res; } diff --git a/be/src/vec/exec/vjdbc_connector.cpp b/be/src/vec/exec/vjdbc_connector.cpp index d8e5405395..613c76f8f8 100644 --- a/be/src/vec/exec/vjdbc_connector.cpp +++ b/be/src/vec/exec/vjdbc_connector.cpp @@ -715,8 +715,7 @@ Status JdbcConnector::exec_write_sql(const std::u16string& insert_stmt, return Status::OK(); } -Status JdbcConnector::exec_stmt_write( - Block* block, const std::vector& output_vexpr_ctxs) { +Status JdbcConnector::exec_stmt_write(Block* block, const VExprContextSPtrs& output_vexpr_ctxs) { SCOPED_TIMER(_result_send_timer); JNIEnv* env = nullptr; RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); diff --git a/be/src/vec/exec/vjdbc_connector.h b/be/src/vec/exec/vjdbc_connector.h index d113cceb6e..38c71d0143 100644 --- a/be/src/vec/exec/vjdbc_connector.h +++ b/be/src/vec/exec/vjdbc_connector.h @@ -78,8 +78,7 @@ public: Status exec_write_sql(const std::u16string& insert_stmt, const fmt::memory_buffer& insert_stmt_buffer) override; - Status exec_stmt_write(Block* block, - const std::vector& output_vexpr_ctxs); + Status exec_stmt_write(Block* block, const VExprContextSPtrs& output_vexpr_ctxs); Status get_next(bool* eos, std::vector& columns, Block* block, int batch_size); diff --git a/be/src/vec/exec/vpartition_sort_node.cpp b/be/src/vec/exec/vpartition_sort_node.cpp index cb3b199285..f52e8cb678 100644 --- a/be/src/vec/exec/vpartition_sort_node.cpp +++ b/be/src/vec/exec/vpartition_sort_node.cpp @@ -55,8 +55,8 @@ Status VPartitionSortNode::init(const TPlanNode& tnode, RuntimeState* state) { } //partition by key if (tnode.partition_sort_node.__isset.partition_exprs) { - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, tnode.partition_sort_node.partition_exprs, - &_partition_expr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(tnode.partition_sort_node.partition_exprs, + _partition_expr_ctxs)); _partition_exprs_num = _partition_expr_ctxs.size(); _partition_columns.resize(_partition_exprs_num); } diff --git a/be/src/vec/exec/vpartition_sort_node.h b/be/src/vec/exec/vpartition_sort_node.h index 0b24ce8378..4aae4a7acb 100644 --- a/be/src/vec/exec/vpartition_sort_node.h +++ b/be/src/vec/exec/vpartition_sort_node.h @@ -353,7 +353,7 @@ private: std::unique_ptr _agg_arena_pool; // partition by k1,k2 int _partition_exprs_num = 0; - std::vector _partition_expr_ctxs; + VExprContextSPtrs _partition_expr_ctxs; std::vector _partition_columns; std::vector _partition_key_sz; std::vector _hash_values; diff --git a/be/src/vec/exec/vrepeat_node.cpp b/be/src/vec/exec/vrepeat_node.cpp index cb6a2278e6..8a67e5a90d 100644 --- a/be/src/vec/exec/vrepeat_node.cpp +++ b/be/src/vec/exec/vrepeat_node.cpp @@ -57,7 +57,7 @@ VRepeatNode::VRepeatNode(ObjectPool* pool, const TPlanNode& tnode, const Descrip Status VRepeatNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(ExecNode::init(tnode, state)); - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, tnode.repeat_node.exprs, &_expr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(tnode.repeat_node.exprs, _expr_ctxs)); return Status::OK(); } @@ -224,7 +224,7 @@ Status VRepeatNode::push(RuntimeState* state, vectorized::Block* input_block, bo if (input_block->rows() > 0) { _intermediate_block = Block::create_unique(); - for (auto expr : _expr_ctxs) { + for (auto& expr : _expr_ctxs) { int result_column_id = -1; RETURN_IF_ERROR(expr->execute(input_block, &result_column_id)); DCHECK(result_column_id != -1); diff --git a/be/src/vec/exec/vrepeat_node.h b/be/src/vec/exec/vrepeat_node.h index 4a2c242e6c..837b4c8aca 100644 --- a/be/src/vec/exec/vrepeat_node.h +++ b/be/src/vec/exec/vrepeat_node.h @@ -27,6 +27,7 @@ #include "common/global_types.h" #include "exec/exec_node.h" #include "vec/core/block.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { @@ -39,7 +40,6 @@ class SlotDescriptor; class TupleDescriptor; namespace vectorized { -class VExprContext; class VRepeatNode : public ExecNode { public: @@ -79,7 +79,7 @@ private: std::vector _output_slots; - std::vector _expr_ctxs; + VExprContextSPtrs _expr_ctxs; bool _child_eos; int _repeat_id_idx; }; diff --git a/be/src/vec/exec/vschema_scan_node.cpp b/be/src/vec/exec/vschema_scan_node.cpp index fd06f48ba6..ecc0fdb888 100644 --- a/be/src/vec/exec/vschema_scan_node.cpp +++ b/be/src/vec/exec/vschema_scan_node.cpp @@ -268,7 +268,7 @@ Status VSchemaScanNode::get_next(RuntimeState* state, vectorized::Block* block, *src_block.get_by_name(dest_slot_desc->col_name()).column, 0, src_block.rows()); } - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, block, _dest_tuple_desc->slots().size())); VLOG_ROW << "VSchemaScanNode output rows: " << src_block.rows(); src_block.clear(); diff --git a/be/src/vec/exec/vselect_node.cpp b/be/src/vec/exec/vselect_node.cpp index c8b61ec94a..ee1628cd19 100644 --- a/be/src/vec/exec/vselect_node.cpp +++ b/be/src/vec/exec/vselect_node.cpp @@ -75,8 +75,7 @@ Status VSelectNode::get_next(RuntimeState* state, vectorized::Block* block, bool Status VSelectNode::pull(RuntimeState* state, vectorized::Block* output_block, bool* eos) { RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR( - VExprContext::filter_block(_vconjunct_ctx_ptr, output_block, output_block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, output_block, output_block->columns())); reached_limit(output_block, eos); return Status::OK(); diff --git a/be/src/vec/exec/vset_operation_node.cpp b/be/src/vec/exec/vset_operation_node.cpp index d18ee55931..58b59c1b77 100644 --- a/be/src/vec/exec/vset_operation_node.cpp +++ b/be/src/vec/exec/vset_operation_node.cpp @@ -205,8 +205,8 @@ Status VSetOperationNode::init(const TPlanNode& tnode, RuntimeStat } for (auto& texprs : result_texpr_lists) { - std::vector ctxs; - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, texprs, &ctxs)); + VExprContextSPtrs ctxs; + RETURN_IF_ERROR(VExpr::create_expr_trees(texprs, ctxs)); _child_expr_lists.push_back(ctxs); } @@ -218,7 +218,7 @@ Status VSetOperationNode::init(const TPlanNode& tnode, RuntimeStat template Status VSetOperationNode::alloc_resource(RuntimeState* state) { // open result expr lists. - for (const std::vector& exprs : _child_expr_lists) { + for (const VExprContextSPtrs& exprs : _child_expr_lists) { RETURN_IF_ERROR(VExpr::open(exprs, state)); } _probe_columns.resize(_child_expr_lists[1].size()); @@ -431,8 +431,7 @@ Status VSetOperationNode::pull(RuntimeState* state, Block* output_ }, *_hash_table_variants); RETURN_IF_ERROR(st); - RETURN_IF_ERROR( - VExprContext::filter_block(_vconjunct_ctx_ptr, output_block, output_block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, output_block, output_block->columns())); reached_limit(output_block, eos); return Status::OK(); } diff --git a/be/src/vec/exec/vset_operation_node.h b/be/src/vec/exec/vset_operation_node.h index f4c9e0c870..39a74f899e 100644 --- a/be/src/vec/exec/vset_operation_node.h +++ b/be/src/vec/exec/vset_operation_node.h @@ -104,7 +104,7 @@ private: int64_t _valid_element_in_hash_tbl; //The i-th result expr list refers to the i-th child. - std::vector> _child_expr_lists; + std::vector _child_expr_lists; //record build column type DataTypes _left_table_data_types; //first:column_id, could point to origin column or cast column diff --git a/be/src/vec/exec/vtable_function_node.cpp b/be/src/vec/exec/vtable_function_node.cpp index ce80dbb931..d3e967afcc 100644 --- a/be/src/vec/exec/vtable_function_node.cpp +++ b/be/src/vec/exec/vtable_function_node.cpp @@ -48,15 +48,15 @@ Status VTableFunctionNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(ExecNode::init(tnode, state)); for (const TExpr& texpr : tnode.table_function_node.fnCallExprList) { - VExprContext* ctx = nullptr; - RETURN_IF_ERROR(VExpr::create_expr_tree(_pool, texpr, &ctx)); + VExprContextSPtr ctx; + RETURN_IF_ERROR(VExpr::create_expr_tree(texpr, ctx)); _vfn_ctxs.push_back(ctx); - VExpr* root = ctx->root(); + auto root = ctx->root(); const std::string& tf_name = root->fn().name.function_name; TableFunction* fn = nullptr; RETURN_IF_ERROR(TableFunctionFactory::get_fn(tf_name, _pool, &fn)); - fn->set_vexpr_context(ctx); + fn->set_expr_context(ctx); _fns.push_back(fn); } _fn_num = _fns.size(); @@ -236,8 +236,7 @@ Status VTableFunctionNode::_get_expanded_block(RuntimeState* state, Block* outpu } // 3. eval conjuncts - RETURN_IF_ERROR( - VExprContext::filter_block(_vconjunct_ctx_ptr, output_block, output_block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, output_block, output_block->columns())); *eos = _child_eos && _cur_child_offset == -1; return Status::OK(); diff --git a/be/src/vec/exec/vtable_function_node.h b/be/src/vec/exec/vtable_function_node.h index 1a4f1438c3..d2ca9589c2 100644 --- a/be/src/vec/exec/vtable_function_node.h +++ b/be/src/vec/exec/vtable_function_node.h @@ -145,7 +145,7 @@ private: std::vector _output_slots; int64_t _cur_child_offset = 0; - std::vector _vfn_ctxs; + VExprContextSPtrs _vfn_ctxs; std::vector _fns; int _fn_num = 0; diff --git a/be/src/vec/exec/vunion_node.cpp b/be/src/vec/exec/vunion_node.cpp index 6bee8ec6a5..a2fef62076 100644 --- a/be/src/vec/exec/vunion_node.cpp +++ b/be/src/vec/exec/vunion_node.cpp @@ -63,15 +63,15 @@ Status VUnionNode::init(const TPlanNode& tnode, RuntimeState* state) { // Create const_expr_ctx_lists_ from thrift exprs. auto& const_texpr_lists = tnode.union_node.const_expr_lists; for (auto& texprs : const_texpr_lists) { - std::vector ctxs; - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, texprs, &ctxs)); + VExprContextSPtrs ctxs; + RETURN_IF_ERROR(VExpr::create_expr_trees(texprs, ctxs)); _const_expr_lists.push_back(ctxs); } // Create result_expr_ctx_lists_ from thrift exprs. auto& result_texpr_lists = tnode.union_node.result_expr_lists; for (auto& texprs : result_texpr_lists) { - std::vector ctxs; - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, texprs, &ctxs)); + VExprContextSPtrs ctxs; + RETURN_IF_ERROR(VExpr::create_expr_trees(texprs, ctxs)); _child_expr_lists.push_back(ctxs); } return Status::OK(); @@ -83,7 +83,7 @@ Status VUnionNode::prepare(RuntimeState* state) { _materialize_exprs_evaluate_timer = ADD_TIMER(_runtime_profile, "MaterializeExprsEvaluateTimer"); // Prepare const expr lists. - for (const std::vector& exprs : _const_expr_lists) { + for (const VExprContextSPtrs& exprs : _const_expr_lists) { RETURN_IF_ERROR(VExpr::prepare(exprs, state, _row_descriptor)); } @@ -107,11 +107,11 @@ Status VUnionNode::open(RuntimeState* state) { Status VUnionNode::alloc_resource(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); // open const expr lists. - for (const std::vector& exprs : _const_expr_lists) { + for (const auto& exprs : _const_expr_lists) { RETURN_IF_ERROR(VExpr::open(exprs, state)); } // open result expr lists. - for (const std::vector& exprs : _child_expr_lists) { + for (const auto& exprs : _child_expr_lists) { RETURN_IF_ERROR(VExpr::open(exprs, state)); } return ExecNode::alloc_resource(state); @@ -297,7 +297,7 @@ Status VUnionNode::get_next(RuntimeState* state, Block* block, bool* eos) { } else if (has_more_const(state)) { RETURN_IF_ERROR(get_next_const(state, block)); } - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, block, block->columns())); *eos = (!has_more_passthrough() && !has_more_materialized() && !has_more_const(state)); reached_limit(block, eos); @@ -339,7 +339,7 @@ void VUnionNode::debug_string(int indentation_level, std::stringstream* out) con } Status VUnionNode::materialize_block(Block* src_block, int child_idx, Block* res_block) { - const std::vector& child_exprs = _child_expr_lists[child_idx]; + const auto& child_exprs = _child_expr_lists[child_idx]; ColumnsWithTypeAndName colunms; for (size_t i = 0; i < child_exprs.size(); ++i) { int result_column_id = -1; diff --git a/be/src/vec/exec/vunion_node.h b/be/src/vec/exec/vunion_node.h index 79ef72106c..c25bb07102 100644 --- a/be/src/vec/exec/vunion_node.h +++ b/be/src/vec/exec/vunion_node.h @@ -28,6 +28,7 @@ #include "runtime/runtime_state.h" #include "util/runtime_profile.h" #include "vec/core/block.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class DescriptorTbl; @@ -35,7 +36,6 @@ class ObjectPool; class TPlanNode; namespace vectorized { -class VExprContext; class VUnionNode final : public ExecNode { public: @@ -67,10 +67,10 @@ public: private: /// Const exprs materialized by this node. These exprs don't refer to any children. /// Only materialized by the first fragment instance to avoid duplication. - std::vector> _const_expr_lists; + std::vector _const_expr_lists; /// Exprs materialized by this node. The i-th result expr list refers to the i-th child. - std::vector> _child_expr_lists; + std::vector _child_expr_lists; /// Index of the first non-passthrough child; i.e. a child that needs materialization. /// 0 when all children are materialized, '_children.size()' when no children are /// materialized. @@ -106,7 +106,7 @@ private: /// have been consumed from the current child block. Updates '_child_row_idx'. Status materialize_block(Block* dst_block, int child_idx, Block* res_block); - Status get_error_msg(const std::vector& exprs); + Status get_error_msg(const VExprContextSPtrs& exprs); /// Returns true if the child at 'child_idx' can be passed through. bool is_child_passthrough(int child_idx) const { diff --git a/be/src/vec/exprs/lambda_function/lambda_function.h b/be/src/vec/exprs/lambda_function/lambda_function.h index a7f2fb8f69..184b4c2cc3 100644 --- a/be/src/vec/exprs/lambda_function/lambda_function.h +++ b/be/src/vec/exprs/lambda_function/lambda_function.h @@ -33,7 +33,7 @@ public: virtual doris::Status execute(VExprContext* context, doris::vectorized::Block* block, int* result_column_id, const DataTypePtr& result_type, - const std::vector& children) = 0; + const VExprSPtrs& children) = 0; }; using LambdaFunctionPtr = std::shared_ptr; diff --git a/be/src/vec/exprs/lambda_function/varray_filter_function.cpp b/be/src/vec/exprs/lambda_function/varray_filter_function.cpp index 9fae6ed634..0a77ebd673 100644 --- a/be/src/vec/exprs/lambda_function/varray_filter_function.cpp +++ b/be/src/vec/exprs/lambda_function/varray_filter_function.cpp @@ -61,7 +61,7 @@ public: doris::Status execute(VExprContext* context, doris::vectorized::Block* block, int* result_column_id, const DataTypePtr& result_type, - const std::vector& children) override { + const VExprSPtrs& children) override { ///* array_filter(array, array) */// //1. child[0:end]->execute(src_block) diff --git a/be/src/vec/exprs/lambda_function/varray_map_function.cpp b/be/src/vec/exprs/lambda_function/varray_map_function.cpp index 2f57ed8976..67bd6bf4aa 100644 --- a/be/src/vec/exprs/lambda_function/varray_map_function.cpp +++ b/be/src/vec/exprs/lambda_function/varray_map_function.cpp @@ -61,7 +61,7 @@ public: doris::Status execute(VExprContext* context, doris::vectorized::Block* block, int* result_column_id, const DataTypePtr& result_type, - const std::vector& children) override { + const VExprSPtrs& children) override { ///* array_map(lambda,arg1,arg2,.....) */// //1. child[1:end]->execute(src_block) diff --git a/be/src/vec/exprs/table_function/table_function.h b/be/src/vec/exprs/table_function/table_function.h index 7fff88899b..4b31a681b1 100644 --- a/be/src/vec/exprs/table_function/table_function.h +++ b/be/src/vec/exprs/table_function/table_function.h @@ -81,7 +81,7 @@ public: std::string name() const { return _fn_name; } bool eos() const { return _eos; } - void set_vexpr_context(VExprContext* vexpr_context) { _vexpr_context = vexpr_context; } + void set_expr_context(const VExprContextSPtr& expr_context) { _expr_context = expr_context; } void set_nullable() { _is_nullable = true; } bool is_outer() const { return _is_outer; } @@ -97,7 +97,7 @@ public: protected: std::string _fn_name; - VExprContext* _vexpr_context = nullptr; + VExprContextSPtr _expr_context = nullptr; // true if there is no more data can be read from this function. bool _eos = false; // the position of current cursor diff --git a/be/src/vec/exprs/table_function/vexplode.cpp b/be/src/vec/exprs/table_function/vexplode.cpp index 8c17ceeb25..1340ff74bb 100644 --- a/be/src/vec/exprs/table_function/vexplode.cpp +++ b/be/src/vec/exprs/table_function/vexplode.cpp @@ -37,13 +37,13 @@ VExplodeTableFunction::VExplodeTableFunction() { } Status VExplodeTableFunction::process_init(Block* block) { - CHECK(_vexpr_context->root()->children().size() == 1) + CHECK(_expr_context->root()->children().size() == 1) << "VExplodeTableFunction only support 1 child but has " - << _vexpr_context->root()->children().size(); + << _expr_context->root()->children().size(); int value_column_idx = -1; - RETURN_IF_ERROR(_vexpr_context->root()->children()[0]->execute(_vexpr_context, block, - &value_column_idx)); + RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, + &value_column_idx)); _array_column = block->get_by_position(value_column_idx).column->convert_to_full_column_if_const(); diff --git a/be/src/vec/exprs/table_function/vexplode_bitmap.cpp b/be/src/vec/exprs/table_function/vexplode_bitmap.cpp index 2f717b7413..152566d00b 100644 --- a/be/src/vec/exprs/table_function/vexplode_bitmap.cpp +++ b/be/src/vec/exprs/table_function/vexplode_bitmap.cpp @@ -41,13 +41,13 @@ VExplodeBitmapTableFunction::VExplodeBitmapTableFunction() { } Status VExplodeBitmapTableFunction::process_init(Block* block) { - CHECK(_vexpr_context->root()->children().size() == 1) + CHECK(_expr_context->root()->children().size() == 1) << "VExplodeNumbersTableFunction must be have 1 children but have " - << _vexpr_context->root()->children().size(); + << _expr_context->root()->children().size(); int value_column_idx = -1; - RETURN_IF_ERROR(_vexpr_context->root()->children()[0]->execute(_vexpr_context, block, - &value_column_idx)); + RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, + &value_column_idx)); _value_column = block->get_by_position(value_column_idx).column; return Status::OK(); diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.cpp b/be/src/vec/exprs/table_function/vexplode_json_array.cpp index 96e6b10065..16f6c39bf7 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_array.cpp +++ b/be/src/vec/exprs/table_function/vexplode_json_array.cpp @@ -138,12 +138,12 @@ VExplodeJsonArrayTableFunction::VExplodeJsonArrayTableFunction(ExplodeJsonArrayT } Status VExplodeJsonArrayTableFunction::process_init(Block* block) { - CHECK(_vexpr_context->root()->children().size() == 1) - << _vexpr_context->root()->children().size(); + CHECK(_expr_context->root()->children().size() == 1) + << _expr_context->root()->children().size(); int text_column_idx = -1; - RETURN_IF_ERROR(_vexpr_context->root()->children()[0]->execute(_vexpr_context, block, - &text_column_idx)); + RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, + &text_column_idx)); _text_column = block->get_by_position(text_column_idx).column; return Status::OK(); diff --git a/be/src/vec/exprs/table_function/vexplode_numbers.cpp b/be/src/vec/exprs/table_function/vexplode_numbers.cpp index 022c0f13a5..fe450c8096 100644 --- a/be/src/vec/exprs/table_function/vexplode_numbers.cpp +++ b/be/src/vec/exprs/table_function/vexplode_numbers.cpp @@ -41,13 +41,13 @@ VExplodeNumbersTableFunction::VExplodeNumbersTableFunction() { } Status VExplodeNumbersTableFunction::process_init(Block* block) { - CHECK(_vexpr_context->root()->children().size() == 1) + CHECK(_expr_context->root()->children().size() == 1) << "VExplodeSplitTableFunction must be have 1 children but have " - << _vexpr_context->root()->children().size(); + << _expr_context->root()->children().size(); int value_column_idx = -1; - RETURN_IF_ERROR(_vexpr_context->root()->children()[0]->execute(_vexpr_context, block, - &value_column_idx)); + RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, + &value_column_idx)); _value_column = block->get_by_position(value_column_idx).column; if (is_column_const(*_value_column)) { _cur_size = 0; diff --git a/be/src/vec/exprs/table_function/vexplode_split.cpp b/be/src/vec/exprs/table_function/vexplode_split.cpp index e832fc43c1..7350419436 100644 --- a/be/src/vec/exprs/table_function/vexplode_split.cpp +++ b/be/src/vec/exprs/table_function/vexplode_split.cpp @@ -44,17 +44,17 @@ Status VExplodeSplitTableFunction::open() { } Status VExplodeSplitTableFunction::process_init(Block* block) { - CHECK(_vexpr_context->root()->children().size() == 2) + CHECK(_expr_context->root()->children().size() == 2) << "VExplodeSplitTableFunction must be have 2 children but have " - << _vexpr_context->root()->children().size(); + << _expr_context->root()->children().size(); int text_column_idx = -1; int delimiter_column_idx = -1; - RETURN_IF_ERROR(_vexpr_context->root()->children()[0]->execute(_vexpr_context, block, - &text_column_idx)); - RETURN_IF_ERROR(_vexpr_context->root()->children()[1]->execute(_vexpr_context, block, - &delimiter_column_idx)); + RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, + &text_column_idx)); + RETURN_IF_ERROR(_expr_context->root()->children()[1]->execute(_expr_context.get(), block, + &delimiter_column_idx)); // dispose test column _text_column = diff --git a/be/src/vec/exprs/varray_literal.cpp b/be/src/vec/exprs/varray_literal.cpp index 233c3538ab..a9c1a08e1a 100644 --- a/be/src/vec/exprs/varray_literal.cpp +++ b/be/src/vec/exprs/varray_literal.cpp @@ -47,9 +47,9 @@ Status VArrayLiteral::prepare(RuntimeState* state, const RowDescriptor& row_desc RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, row_desc, context)); bool is_null = (_node_type == TExprNodeType::NULL_LITERAL); Field array = is_null ? Field() : Array(); - for (const auto child : _children) { + for (auto& child : _children) { Field item; - auto child_literal = dynamic_cast(child); + auto child_literal = std::dynamic_pointer_cast(child); child_literal->get_column_ptr()->get(0, item); array.get().push_back(item); } diff --git a/be/src/vec/exprs/vbitmap_predicate.h b/be/src/vec/exprs/vbitmap_predicate.h index 366a44ce46..8b4e6e00b8 100644 --- a/be/src/vec/exprs/vbitmap_predicate.h +++ b/be/src/vec/exprs/vbitmap_predicate.h @@ -61,9 +61,7 @@ public: void close(doris::RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VBitmapPredicate::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VBitmapPredicate::create_shared(*this); } const std::string& expr_name() const override; diff --git a/be/src/vec/exprs/vbloom_predicate.h b/be/src/vec/exprs/vbloom_predicate.h index d3de55596b..aaef48a098 100644 --- a/be/src/vec/exprs/vbloom_predicate.h +++ b/be/src/vec/exprs/vbloom_predicate.h @@ -51,9 +51,7 @@ public: FunctionContext::FunctionStateScope scope) override; void close(doris::RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VBloomPredicate::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VBloomPredicate::create_shared(*this); } const std::string& expr_name() const override; void set_filter(std::shared_ptr& filter); diff --git a/be/src/vec/exprs/vcase_expr.cpp b/be/src/vec/exprs/vcase_expr.cpp index 5dc9eec6e8..f5f4172b6a 100644 --- a/be/src/vec/exprs/vcase_expr.cpp +++ b/be/src/vec/exprs/vcase_expr.cpp @@ -118,7 +118,7 @@ std::string VCaseExpr::debug_string() const { out << "CaseExpr(has_case_expr=" << _has_case_expr << " has_else_expr=" << _has_else_expr << " function=" << _function_name << "){"; bool first = true; - for (VExpr* input_expr : children()) { + for (auto& input_expr : children()) { if (first) { first = false; } else { diff --git a/be/src/vec/exprs/vcase_expr.h b/be/src/vec/exprs/vcase_expr.h index c34e8d782e..ece370b736 100644 --- a/be/src/vec/exprs/vcase_expr.h +++ b/be/src/vec/exprs/vcase_expr.h @@ -32,7 +32,6 @@ class TExprNode; namespace vectorized { class Block; -class VExprContext; } // namespace vectorized } // namespace doris @@ -52,9 +51,7 @@ public: FunctionContext::FunctionStateScope scope) override; virtual void close(RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - virtual VExpr* clone(ObjectPool* pool) const override { - return pool->add(VCaseExpr::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VCaseExpr::create_shared(*this); } virtual const std::string& expr_name() const override; virtual std::string debug_string() const override; diff --git a/be/src/vec/exprs/vcast_expr.cpp b/be/src/vec/exprs/vcast_expr.cpp index 65fa18e89b..57fc993608 100644 --- a/be/src/vec/exprs/vcast_expr.cpp +++ b/be/src/vec/exprs/vcast_expr.cpp @@ -118,7 +118,7 @@ std::string VCastExpr::debug_string() const { out << "CastExpr(CAST " << _cast_param_data_type->get_name() << " to " << _target_data_type->get_name() << "){"; bool first = true; - for (VExpr* input_expr : children()) { + for (auto& input_expr : children()) { if (first) { first = false; } else { diff --git a/be/src/vec/exprs/vcast_expr.h b/be/src/vec/exprs/vcast_expr.h index f4e59d1581..7589ab38d5 100644 --- a/be/src/vec/exprs/vcast_expr.h +++ b/be/src/vec/exprs/vcast_expr.h @@ -51,9 +51,7 @@ public: FunctionContext::FunctionStateScope scope) override; virtual void close(doris::RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - virtual VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VCastExpr::create_unique(*this).release()); - } + virtual VExprSPtr clone() const override { return VCastExpr::create_shared(*this); } virtual const std::string& expr_name() const override; virtual std::string debug_string() const override; diff --git a/be/src/vec/exprs/vcolumn_ref.h b/be/src/vec/exprs/vcolumn_ref.h index 4ea582839d..3e1a6252a3 100644 --- a/be/src/vec/exprs/vcolumn_ref.h +++ b/be/src/vec/exprs/vcolumn_ref.h @@ -53,9 +53,7 @@ public: return Status::OK(); } - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VColumnRef::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VColumnRef::create_shared(*this); } bool is_constant() const override { return false; } diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index 47f12fa13e..56e21ad082 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -38,19 +38,17 @@ inline std::string compound_operator_to_string(TExprOpcode::type op) { } } -class VcompoundPred : public VectorizedFnCall { - ENABLE_FACTORY_CREATOR(VcompoundPred); +class VCompoundPred : public VectorizedFnCall { + ENABLE_FACTORY_CREATOR(VCompoundPred); public: - VcompoundPred(const TExprNode& node) : VectorizedFnCall(node) { + VCompoundPred(const TExprNode& node) : VectorizedFnCall(node) { _op = node.opcode; _fn.name.function_name = compound_operator_to_string(_op); _expr_name = "VCompoundPredicate (" + _fn.name.function_name + ")"; } - VExpr* clone(ObjectPool* pool) const override { - return pool->add(VcompoundPred::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VCompoundPred::create_shared(*this); } const std::string& expr_name() const override { return _expr_name; } diff --git a/be/src/vec/exprs/vdirect_in_predicate.h b/be/src/vec/exprs/vdirect_in_predicate.h index 219dda17fc..2110c03324 100644 --- a/be/src/vec/exprs/vdirect_in_predicate.h +++ b/be/src/vec/exprs/vdirect_in_predicate.h @@ -63,9 +63,7 @@ public: return Status::OK(); } - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VDirectInPredicate::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VDirectInPredicate::create_shared(*this); } const std::string& expr_name() const override { return _expr_name; } diff --git a/be/src/vec/exprs/vectorized_agg_fn.cpp b/be/src/vec/exprs/vectorized_agg_fn.cpp index 6576fd1048..d13138fc56 100644 --- a/be/src/vec/exprs/vectorized_agg_fn.cpp +++ b/be/src/vec/exprs/vectorized_agg_fn.cpp @@ -82,9 +82,9 @@ Status AggFnEvaluator::create(ObjectPool* pool, const TExpr& desc, const TSortIn int node_idx = 0; for (int i = 0; i < desc.nodes[0].num_children; ++i) { ++node_idx; - VExpr* expr = nullptr; - VExprContext* ctx = nullptr; - RETURN_IF_ERROR(VExpr::create_tree_from_thrift(pool, desc.nodes, &node_idx, &expr, &ctx)); + VExprSPtr expr; + VExprContextSPtr ctx; + RETURN_IF_ERROR(VExpr::create_tree_from_thrift(desc.nodes, &node_idx, expr, ctx)); agg_fn_evaluator->_input_exprs_ctxs.push_back(ctx); } diff --git a/be/src/vec/exprs/vectorized_agg_fn.h b/be/src/vec/exprs/vectorized_agg_fn.h index bb2b354280..55d11b6925 100644 --- a/be/src/vec/exprs/vectorized_agg_fn.h +++ b/be/src/vec/exprs/vectorized_agg_fn.h @@ -28,6 +28,7 @@ #include "vec/aggregate_functions/aggregate_function.h" #include "vec/core/sort_description.h" #include "vec/data_types/data_type.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class RuntimeState; @@ -43,7 +44,6 @@ class Arena; class Block; class BufferWritable; class IColumn; -class VExprContext; class AggFnEvaluator { ENABLE_FACTORY_CREATOR(AggFnEvaluator); @@ -99,7 +99,7 @@ public: static std::string debug_string(const std::vector& exprs); std::string debug_string() const; bool is_merge() const { return _is_merge; } - const std::vector& input_exprs_ctxs() const { return _input_exprs_ctxs; } + const VExprContextSPtrs& input_exprs_ctxs() const { return _input_exprs_ctxs; } private: const TFunction _fn; @@ -123,7 +123,7 @@ private: RuntimeProfile::Counter* _expr_timer; // input context - std::vector _input_exprs_ctxs; + VExprContextSPtrs _input_exprs_ctxs; SortDescription _sort_description; diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index e3ea8a466c..636be579e7 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -176,7 +176,7 @@ std::string VectorizedFnCall::debug_string() const { out << _expr_name; out << "]{"; bool first = true; - for (VExpr* input_expr : children()) { + for (auto& input_expr : children()) { if (first) { first = false; } else { diff --git a/be/src/vec/exprs/vectorized_fn_call.h b/be/src/vec/exprs/vectorized_fn_call.h index 9fcd3563c9..2d64f9a341 100644 --- a/be/src/vec/exprs/vectorized_fn_call.h +++ b/be/src/vec/exprs/vectorized_fn_call.h @@ -51,9 +51,7 @@ public: FunctionContext::FunctionStateScope scope) override; void close(RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - VExpr* clone(ObjectPool* pool) const override { - return pool->add(VectorizedFnCall::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VectorizedFnCall::create_shared(*this); } const std::string& expr_name() const override; std::string debug_string() const override; bool is_constant() const override { diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index 9f66bba50f..593715cd62 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -127,9 +127,9 @@ void VExpr::close(RuntimeState* state, VExprContext* context, } } -Status VExpr::create_expr(ObjectPool* pool, const TExprNode& texpr_node, VExpr** expr) { +Status VExpr::create_expr(const doris::TExprNode& expr_node, VExprSPtr& expr) { try { - switch (texpr_node.node_type) { + switch (expr_node.node_type) { case TExprNodeType::BOOL_LITERAL: case TExprNodeType::INT_LITERAL: case TExprNodeType::LARGE_INT_LITERAL: @@ -139,90 +139,90 @@ Status VExpr::create_expr(ObjectPool* pool, const TExprNode& texpr_node, VExpr** case TExprNodeType::STRING_LITERAL: case TExprNodeType::JSON_LITERAL: case TExprNodeType::NULL_LITERAL: { - *expr = pool->add(VLiteral::create_unique(texpr_node).release()); + expr = VLiteral::create_shared(expr_node); break; } case TExprNodeType::ARRAY_LITERAL: { - *expr = pool->add(VArrayLiteral::create_unique(texpr_node).release()); + expr = VArrayLiteral::create_shared(expr_node); break; } case TExprNodeType::MAP_LITERAL: { - *expr = pool->add(VMapLiteral::create_unique(texpr_node).release()); + expr = VMapLiteral::create_shared(expr_node); break; } case TExprNodeType::STRUCT_LITERAL: { - *expr = pool->add(VStructLiteral::create_unique(texpr_node).release()); + expr = VStructLiteral::create_shared(expr_node); break; } - case TExprNodeType::SLOT_REF: { - *expr = pool->add(VSlotRef::create_unique(texpr_node).release()); + case doris::TExprNodeType::SLOT_REF: { + expr = VSlotRef::create_shared(expr_node); break; } - case TExprNodeType::COLUMN_REF: { - *expr = pool->add(VColumnRef::create_unique(texpr_node).release()); + case doris::TExprNodeType::COLUMN_REF: { + expr = VColumnRef::create_shared(expr_node); break; } - case TExprNodeType::COMPOUND_PRED: { - *expr = pool->add(VcompoundPred::create_unique(texpr_node).release()); + case doris::TExprNodeType::COMPOUND_PRED: { + expr = VCompoundPred::create_shared(expr_node); break; } - case TExprNodeType::LAMBDA_FUNCTION_EXPR: { - *expr = pool->add(VLambdaFunctionExpr::create_unique(texpr_node).release()); + case doris::TExprNodeType::LAMBDA_FUNCTION_EXPR: { + expr = VLambdaFunctionExpr::create_shared(expr_node); break; } - case TExprNodeType::LAMBDA_FUNCTION_CALL_EXPR: { - *expr = pool->add(VLambdaFunctionCallExpr::create_unique(texpr_node).release()); + case doris::TExprNodeType::LAMBDA_FUNCTION_CALL_EXPR: { + expr = VLambdaFunctionCallExpr::create_shared(expr_node); break; } - case TExprNodeType::ARITHMETIC_EXPR: - case TExprNodeType::BINARY_PRED: - case TExprNodeType::FUNCTION_CALL: - case TExprNodeType::COMPUTE_FUNCTION_CALL: - case TExprNodeType::MATCH_PRED: { - *expr = pool->add(VectorizedFnCall::create_unique(texpr_node).release()); + case doris::TExprNodeType::ARITHMETIC_EXPR: + case doris::TExprNodeType::BINARY_PRED: + case doris::TExprNodeType::FUNCTION_CALL: + case doris::TExprNodeType::COMPUTE_FUNCTION_CALL: + case doris::TExprNodeType::MATCH_PRED: { + expr = VectorizedFnCall::create_shared(expr_node); break; } - case TExprNodeType::CAST_EXPR: { - *expr = pool->add(VCastExpr::create_unique(texpr_node).release()); + case doris::TExprNodeType::CAST_EXPR: { + expr = VCastExpr::create_shared(expr_node); break; } - case TExprNodeType::IN_PRED: { - *expr = pool->add(VInPredicate::create_unique(texpr_node).release()); + case doris::TExprNodeType::IN_PRED: { + expr = VInPredicate::create_shared(expr_node); break; } - case TExprNodeType::CASE_EXPR: { - if (!texpr_node.__isset.case_expr) { + case doris::TExprNodeType::CASE_EXPR: { + if (!expr_node.__isset.case_expr) { return Status::InternalError("Case expression not set in thrift node"); } - *expr = pool->add(VCaseExpr::create_unique(texpr_node).release()); + expr = VCaseExpr::create_shared(expr_node); break; } case TExprNodeType::INFO_FUNC: { - *expr = pool->add(VInfoFunc::create_unique(texpr_node).release()); + expr = VInfoFunc::create_shared(expr_node); break; } case TExprNodeType::TUPLE_IS_NULL_PRED: { - *expr = pool->add(VTupleIsNullPredicate::create_unique(texpr_node).release()); + expr = VTupleIsNullPredicate::create_shared(expr_node); break; } case TExprNodeType::SCHEMA_CHANGE_EXPR: { - *expr = pool->add(VSchemaChangeExpr::create_unique(texpr_node).release()); + expr = VSchemaChangeExpr::create_shared(expr_node); break; } default: - return Status::InternalError("Unknown expr node type: {}", texpr_node.node_type); + return Status::InternalError("Unknown expr node type: {}", expr_node.node_type); } } catch (const Exception& e) { return Status::Error(e.code(), e.to_string()); } - if (!(*expr)->data_type()) { - return Status::InvalidArgument("Unknown expr type: {}", texpr_node.node_type); + if (!expr->data_type()) { + return Status::InvalidArgument("Unknown expr type: {}", expr_node.node_type); } return Status::OK(); } -Status VExpr::create_tree_from_thrift(ObjectPool* pool, const std::vector& nodes, - int* node_idx, VExpr** root_expr, VExprContext** ctx) { +Status VExpr::create_tree_from_thrift(const std::vector& nodes, int* node_idx, + VExprSPtr& root_expr, VExprContextSPtr& ctx) { // propagate error case if (*node_idx >= nodes.size()) { return Status::InternalError("Failed to reconstruct expression tree from thrift."); @@ -230,21 +230,18 @@ Status VExpr::create_tree_from_thrift(ObjectPool* pool, const std::vector(root); // short path for leaf node if (root_children <= 0) { - *ctx = pool->add(VExprContext::create_unique(root).release()); return Status::OK(); } // non-recursive traversal - std::stack> s; + std::stack> s; s.push({root, root_children}); while (!s.empty()) { auto& parent = s.top(); @@ -257,8 +254,8 @@ Status VExpr::create_tree_from_thrift(ObjectPool* pool, const std::vector= nodes.size()) { return Status::InternalError("Failed to reconstruct expression tree from thrift."); } - VExpr* expr = nullptr; - RETURN_IF_ERROR(create_expr(pool, nodes[*node_idx], &expr)); + VExprSPtr expr; + RETURN_IF_ERROR(create_expr(nodes[*node_idx], expr)); DCHECK(expr != nullptr); parent.first->add_child(expr); int num_children = nodes[*node_idx].num_children; @@ -266,18 +263,17 @@ Status VExpr::create_tree_from_thrift(ObjectPool* pool, const std::vectoradd(VExprContext::create_unique(root).release()); return Status::OK(); } -Status VExpr::create_expr_tree(ObjectPool* pool, const TExpr& texpr, VExprContext** ctx) { +Status VExpr::create_expr_tree(const doris::TExpr& texpr, VExprContextSPtr& ctx) { if (texpr.nodes.size() == 0) { - *ctx = nullptr; + ctx = nullptr; return Status::OK(); } int node_idx = 0; - VExpr* e = nullptr; - Status status = create_tree_from_thrift(pool, texpr.nodes, &node_idx, &e, ctx); + VExprSPtr e; + Status status = create_tree_from_thrift(texpr.nodes, &node_idx, e, ctx); if (status.ok() && node_idx + 1 != texpr.nodes.size()) { status = Status::InternalError( "Expression tree only partially reconstructed. Not all thrift nodes were " @@ -291,18 +287,17 @@ Status VExpr::create_expr_tree(ObjectPool* pool, const TExpr& texpr, VExprContex return status; } -Status VExpr::create_expr_trees(ObjectPool* pool, const std::vector& texprs, - std::vector* ctxs) { - ctxs->clear(); +Status VExpr::create_expr_trees(const std::vector& texprs, VExprContextSPtrs& ctxs) { + ctxs.clear(); for (int i = 0; i < texprs.size(); ++i) { - VExprContext* ctx = nullptr; - RETURN_IF_ERROR(create_expr_tree(pool, texprs[i], &ctx)); - ctxs->push_back(ctx); + VExprContextSPtr ctx; + RETURN_IF_ERROR(create_expr_tree(texprs[i], ctx)); + ctxs.push_back(ctx); } return Status::OK(); } -Status VExpr::prepare(const std::vector& ctxs, RuntimeState* state, +Status VExpr::prepare(const VExprContextSPtrs& ctxs, RuntimeState* state, const RowDescriptor& row_desc) { for (auto ctx : ctxs) { RETURN_IF_ERROR(ctx->prepare(state, row_desc)); @@ -310,33 +305,32 @@ Status VExpr::prepare(const std::vector& ctxs, RuntimeState* stat return Status::OK(); } -void VExpr::close(const std::vector& ctxs, RuntimeState* state) { +void VExpr::close(const VExprContextSPtrs& ctxs, RuntimeState* state) { for (auto ctx : ctxs) { ctx->close(state); } } -Status VExpr::open(const std::vector& ctxs, RuntimeState* state) { +Status VExpr::open(const VExprContextSPtrs& ctxs, RuntimeState* state) { for (int i = 0; i < ctxs.size(); ++i) { RETURN_IF_ERROR(ctxs[i]->open(state)); } return Status::OK(); } -Status VExpr::clone_if_not_exists(const std::vector& ctxs, RuntimeState* state, - std::vector* new_ctxs) { - DCHECK(new_ctxs != nullptr); - if (!new_ctxs->empty()) { +Status VExpr::clone_if_not_exists(const VExprContextSPtrs& ctxs, RuntimeState* state, + VExprContextSPtrs& new_ctxs) { + if (!new_ctxs.empty()) { // 'ctxs' was already cloned into '*new_ctxs', nothing to do. - DCHECK_EQ(new_ctxs->size(), ctxs.size()); - for (int i = 0; i < new_ctxs->size(); ++i) { - DCHECK((*new_ctxs)[i]->_is_clone); + DCHECK_EQ(new_ctxs.size(), ctxs.size()); + for (int i = 0; i < new_ctxs.size(); ++i) { + DCHECK(new_ctxs[i]->_is_clone); } return Status::OK(); } - new_ctxs->resize(ctxs.size()); + new_ctxs.resize(ctxs.size()); for (int i = 0; i < ctxs.size(); ++i) { - RETURN_IF_ERROR(ctxs[i]->clone(state, &(*new_ctxs)[i])); + RETURN_IF_ERROR(ctxs[i]->clone(state, new_ctxs[i])); } return Status::OK(); } @@ -353,7 +347,7 @@ std::string VExpr::debug_string() const { return out.str(); } -std::string VExpr::debug_string(const std::vector& exprs) { +std::string VExpr::debug_string(const VExprSPtrs& exprs) { std::stringstream out; out << "["; @@ -365,8 +359,8 @@ std::string VExpr::debug_string(const std::vector& exprs) { return out.str(); } -std::string VExpr::debug_string(const std::vector& ctxs) { - std::vector exprs; +std::string VExpr::debug_string(const VExprContextSPtrs& ctxs) { + VExprSPtrs exprs; for (int i = 0; i < ctxs.size(); ++i) { exprs.push_back(ctxs[i]->root()); } diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index 2eb2e78a32..072ba48af3 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -39,6 +39,7 @@ #include "vec/core/block.h" #include "vec/core/column_with_type_and_name.h" #include "vec/data_types/data_type.h" +#include "vec/exprs/vexpr_fwd.h" #include "vec/functions/function.h" namespace doris { @@ -50,7 +51,6 @@ class RowDescriptor; class RuntimeState; namespace vectorized { -class VExprContext; #define RETURN_IF_ERROR_OR_PREPARED(stmt) \ if (_prepared) { \ @@ -81,7 +81,7 @@ public: VExpr() = default; virtual ~VExpr() = default; - virtual VExpr* clone(ObjectPool* pool) const = 0; + virtual VExprSPtr clone() const = 0; virtual const std::string& expr_name() const = 0; @@ -124,38 +124,38 @@ public: TExprOpcode::type op() const { return _opcode; } - void add_child(VExpr* expr) { _children.push_back(expr); } - VExpr* get_child(int i) const { return _children[i]; } + void add_child(const VExprSPtr& expr) { _children.push_back(expr); } + VExprSPtr get_child(int i) const { return _children[i]; } int get_num_children() const { return _children.size(); } - static Status create_expr_tree(ObjectPool* pool, const TExpr& texpr, VExprContext** ctx); + static Status create_expr_tree(const TExpr& texpr, VExprContextSPtr& ctx); - static Status create_expr_trees(ObjectPool* pool, const std::vector& texprs, - std::vector* ctxs); + static Status create_expr_trees(const std::vector& texprs, VExprContextSPtrs& ctxs); - static Status prepare(const std::vector& ctxs, RuntimeState* state, + static Status prepare(const VExprContextSPtrs& ctxs, RuntimeState* state, const RowDescriptor& row_desc); - static Status open(const std::vector& ctxs, RuntimeState* state); + static Status open(const VExprContextSPtrs& ctxs, RuntimeState* state); - static Status clone_if_not_exists(const std::vector& ctxs, RuntimeState* state, - std::vector* new_ctxs); + static Status clone_if_not_exists(const VExprContextSPtrs& ctxs, RuntimeState* state, + VExprContextSPtrs& new_ctxs); - static void close(const std::vector& ctxs, RuntimeState* state); + static void close(const VExprContextSPtrs& ctxs, RuntimeState* state); bool is_nullable() const { return _data_type->is_nullable(); } PrimitiveType result_type() const { return _type.type; } - static Status create_expr(ObjectPool* pool, const TExprNode& texpr_node, VExpr** expr); + static Status create_expr(const TExprNode& expr_node, VExprSPtr& expr); - static Status create_tree_from_thrift(ObjectPool* pool, const std::vector& nodes, - int* node_idx, VExpr** root_expr, VExprContext** ctx); - virtual const std::vector& children() const { return _children; } - void set_children(std::vector children) { _children = children; } + static Status create_tree_from_thrift(const std::vector& nodes, int* node_idx, + VExprSPtr& root_expr, VExprContextSPtr& ctx); + virtual const VExprSPtrs& children() const { return _children; } + void set_children(const VExprSPtrs& children) { _children = children; } + void set_children(VExprSPtrs&& children) { _children = std::move(children); } virtual std::string debug_string() const; - static std::string debug_string(const std::vector& exprs); - static std::string debug_string(const std::vector& ctxs); + static std::string debug_string(const VExprSPtrs& exprs); + static std::string debug_string(const VExprContextSPtrs& ctxs); bool is_and_expr() const { return _fn.name.function_name == "and"; } @@ -176,15 +176,15 @@ public: int fn_context_index() const { return _fn_context_index; } - static const VExpr* expr_without_cast(const VExpr* expr) { - if (expr->node_type() == TExprNodeType::CAST_EXPR) { + static const VExprSPtr expr_without_cast(const VExprSPtr& expr) { + if (expr->node_type() == doris::TExprNodeType::CAST_EXPR) { return expr_without_cast(expr->_children[0]); } return expr; } // If this expr is a RuntimeFilterWrapper, this method will return an underlying rf expression - virtual const VExpr* get_impl() const { return nullptr; } + virtual const VExprSPtr get_impl() const { return {}; } // If this expr is a BloomPredicate, this method will return a BloomFilterFunc virtual std::shared_ptr get_bloom_filter_func() const { @@ -233,7 +233,7 @@ protected: TExprOpcode::type _opcode; TypeDescriptor _type; DataTypePtr _data_type; - std::vector _children; + VExprSPtrs _children; TFunction _fn; /// Index to pass to ExprContext::fn_context() to retrieve this expr's FunctionContext. @@ -247,8 +247,5 @@ protected: bool _prepared; }; -using VExprSPtr = std::shared_ptr; -using VExprUPtr = std::unique_ptr; - } // namespace vectorized } // namespace doris diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp index 9844989d88..98af614797 100644 --- a/be/src/vec/exprs/vexpr_context.cpp +++ b/be/src/vec/exprs/vexpr_context.cpp @@ -40,7 +40,7 @@ class RowDescriptor; } // namespace doris namespace doris::vectorized { -VExprContext::VExprContext(VExpr* expr) +VExprContext::VExprContext(const VExprSPtr& expr) : _root(expr), _is_clone(false), _prepared(false), @@ -92,21 +92,21 @@ void VExprContext::close(doris::RuntimeState* state) { _closed = true; } -doris::Status VExprContext::clone(RuntimeState* state, VExprContext** new_ctx) { +doris::Status VExprContext::clone(RuntimeState* state, VExprContextSPtr& new_ctx) { DCHECK(_prepared) << "expr context not prepared"; DCHECK(_opened); - DCHECK(*new_ctx == nullptr); + DCHECK(new_ctx.get() == nullptr); - *new_ctx = state->obj_pool()->add(VExprContext::create_unique(_root).release()); + new_ctx = std::make_shared(_root); for (auto& _fn_context : _fn_contexts) { - (*new_ctx)->_fn_contexts.push_back(_fn_context->clone()); + new_ctx->_fn_contexts.push_back(_fn_context->clone()); } - (*new_ctx)->_is_clone = true; - (*new_ctx)->_prepared = true; - (*new_ctx)->_opened = true; + new_ctx->_is_clone = true; + new_ctx->_prepared = true; + new_ctx->_opened = true; - return _root->open(state, *new_ctx, FunctionContext::THREAD_LOCAL); + return _root->open(state, new_ctx.get(), FunctionContext::THREAD_LOCAL); } void VExprContext::clone_fn_contexts(VExprContext* other) { @@ -132,14 +132,34 @@ Status VExprContext::filter_block(VExprContext* vexpr_ctx, Block* block, int col return Block::filter_block(block, result_column_id, column_to_keep); } -// TODO Performance Optimization -Status VExprContext::execute_conjuncts(const std::vector& ctxs, +Status VExprContext::filter_block(const VExprContextSPtrs& expr_contexts, Block* block, + int column_to_keep) { + if (expr_contexts.empty() || block->rows() == 0) { + return Status::OK(); + } + + std::vector columns_to_filter(column_to_keep); + std::iota(columns_to_filter.begin(), columns_to_filter.end(), 0); + + return execute_conjuncts_and_filter_block(expr_contexts, nullptr, block, columns_to_filter, + column_to_keep); +} + +Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs, const std::vector* filters, Block* block, IColumn::Filter* result_filter, bool* can_filter_all) { + return execute_conjuncts(ctxs, filters, false, block, result_filter, can_filter_all); +} + +// TODO Performance Optimization +Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs, + const std::vector* filters, + const bool accept_null, Block* block, + IColumn::Filter* result_filter, bool* can_filter_all) { DCHECK(result_filter->size() == block->rows()); *can_filter_all = false; auto* __restrict result_filter_data = result_filter->data(); - for (auto* ctx : ctxs) { + for (auto& ctx : ctxs) { int result_column_id = -1; RETURN_IF_ERROR(ctx->execute(block, &result_column_id)); ColumnPtr& filter_column = block->get_by_position(result_column_id).column; @@ -156,9 +176,16 @@ Status VExprContext::execute_conjuncts(const std::vector& ctxs, const size_t size = filter.size(); auto* __restrict null_map_data = nullable_column->get_null_map_data().data(); - for (size_t i = 0; i < size; ++i) { - result_filter_data[i] &= (!null_map_data[i]) & filter_data[i]; + if (accept_null) { + for (size_t i = 0; i < size; ++i) { + result_filter_data[i] &= (null_map_data[i]) || filter_data[i]; + } + } else { + for (size_t i = 0; i < size; ++i) { + result_filter_data[i] &= (!null_map_data[i]) & filter_data[i]; + } } + if (memchr(result_filter_data, 0x1, size) == nullptr) { *can_filter_all = true; return Status::OK(); @@ -201,11 +228,12 @@ Status VExprContext::execute_conjuncts(const std::vector& ctxs, // TODO Performance Optimization // need exception safety Status VExprContext::execute_conjuncts_and_filter_block( - const std::vector& ctxs, const std::vector* filters, - Block* block, std::vector& columns_to_filter, int column_to_keep) { + const VExprContextSPtrs& ctxs, const std::vector* filters, Block* block, + std::vector& columns_to_filter, int column_to_keep) { IColumn::Filter result_filter(block->rows(), 1); bool can_filter_all; - RETURN_IF_ERROR(execute_conjuncts(ctxs, filters, block, &result_filter, &can_filter_all)); + RETURN_IF_ERROR( + execute_conjuncts(ctxs, filters, false, block, &result_filter, &can_filter_all)); if (can_filter_all) { for (auto& col : columns_to_filter) { std::move(*block->get_by_position(col).column).assume_mutable()->clear(); @@ -218,12 +246,30 @@ Status VExprContext::execute_conjuncts_and_filter_block( return Status::OK(); } +Status VExprContext::execute_conjuncts_and_filter_block(const VExprContextSPtrs& ctxs, Block* block, + std::vector& columns_to_filter, + int column_to_keep, + IColumn::Filter& filter) { + filter.resize_fill(block->rows(), 1); + bool can_filter_all; + RETURN_IF_ERROR(execute_conjuncts(ctxs, nullptr, false, block, &filter, &can_filter_all)); + if (can_filter_all) { + for (auto& col : columns_to_filter) { + std::move(*block->get_by_position(col).column).assume_mutable()->clear(); + } + } else { + RETURN_IF_CATCH_EXCEPTION(Block::filter_block_internal(block, columns_to_filter, filter)); + } + + Block::erase_useless_column(block, column_to_keep); + return Status::OK(); +} + Status VExprContext::get_output_block_after_execute_exprs( - const std::vector& output_vexpr_ctxs, const Block& input_block, - Block* output_block) { + const VExprContextSPtrs& output_vexpr_ctxs, const Block& input_block, Block* output_block) { vectorized::Block tmp_block(input_block.get_columns_with_type_and_name()); vectorized::ColumnsWithTypeAndName result_columns; - for (auto vexpr_ctx : output_vexpr_ctxs) { + for (auto& vexpr_ctx : output_vexpr_ctxs) { int result_column_id = -1; RETURN_IF_ERROR(vexpr_ctx->execute(&tmp_block, &result_column_id)); DCHECK(result_column_id != -1); diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h index e00d34559b..3332e6f816 100644 --- a/be/src/vec/exprs/vexpr_context.h +++ b/be/src/vec/exprs/vexpr_context.h @@ -27,6 +27,7 @@ #include "runtime/types.h" #include "udf/udf.h" #include "vec/core/block.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class RowDescriptor; @@ -34,22 +35,21 @@ class RuntimeState; } // namespace doris namespace doris::vectorized { -class VExpr; class VExprContext { ENABLE_FACTORY_CREATOR(VExprContext); public: - VExprContext(VExpr* expr); + VExprContext(const VExprSPtr& expr); ~VExprContext(); [[nodiscard]] Status prepare(RuntimeState* state, const RowDescriptor& row_desc); [[nodiscard]] Status open(RuntimeState* state); void close(RuntimeState* state); - [[nodiscard]] Status clone(RuntimeState* state, VExprContext** new_ctx); + [[nodiscard]] Status clone(RuntimeState* state, VExprContextSPtr& new_ctx); [[nodiscard]] Status execute(Block* block, int* result_column_id); - VExpr* root() { return _root; } - void set_root(VExpr* expr) { _root = expr; } + VExprSPtr root() { return _root; } + void set_root(const VExprSPtr& expr) { _root = expr; } /// Creates a FunctionContext, and returns the index that's passed to fn_context() to /// retrieve the created context. Exprs that need a FunctionContext should call this in @@ -68,16 +68,30 @@ public: [[nodiscard]] static Status filter_block(VExprContext* vexpr_ctx, Block* block, int column_to_keep); - [[nodiscard]] static Status execute_conjuncts(const std::vector& ctxs, + + [[nodiscard]] static Status filter_block(const VExprContextSPtrs& expr_contexts, Block* block, + int column_to_keep); + + [[nodiscard]] static Status execute_conjuncts(const VExprContextSPtrs& ctxs, const std::vector* filters, - Block* block, IColumn::Filter* result_filter, + const bool accept_null, Block* block, + IColumn::Filter* result_filter, bool* can_filter_all); + + static Status execute_conjuncts(const VExprContextSPtrs& ctxs, + const std::vector* filters, Block* block, + IColumn::Filter* result_filter, bool* can_filter_all); + [[nodiscard]] static Status execute_conjuncts_and_filter_block( - const std::vector& ctxs, const std::vector* filters, + const VExprContextSPtrs& ctxs, const std::vector* filters, Block* block, std::vector& columns_to_filter, int column_to_keep); - [[nodiscard]] static Status get_output_block_after_execute_exprs( - const std::vector&, const Block&, Block*); + static Status execute_conjuncts_and_filter_block(const VExprContextSPtrs& ctxs, Block* block, + std::vector& columns_to_filter, + int column_to_keep, IColumn::Filter& filter); + + [[nodiscard]] static Status get_output_block_after_execute_exprs(const VExprContextSPtrs&, + const Block&, Block*); int get_last_result_column_id() const { DCHECK(_last_result_column_id != -1); @@ -94,11 +108,44 @@ public: void set_force_materialize_slot() { _force_materialize_slot = true; } + VExprContext& operator=(const VExprContext& other) { + if (this == &other) { + return *this; + } + + _root = other._root; + _is_clone = other._is_clone; + _prepared = other._prepared; + _opened = other._opened; + _closed = other._closed; + + for (auto& fn : other._fn_contexts) { + _fn_contexts.emplace_back(fn->clone()); + } + + _last_result_column_id = other._last_result_column_id; + _depth_num = other._depth_num; + return *this; + } + + VExprContext& operator=(VExprContext&& other) { + _root = other._root; + other._root = nullptr; + _is_clone = other._is_clone; + _prepared = other._prepared; + _opened = other._opened; + _closed = other._closed; + _fn_contexts = std::move(other._fn_contexts); + _last_result_column_id = other._last_result_column_id; + _depth_num = other._depth_num; + return *this; + } + private: friend class VExpr; /// The expr tree this context is for. - VExpr* _root; + VExprSPtr _root; /// True if this context came from a Clone() call. Used to manage FunctionStateScope. bool _is_clone; diff --git a/be/src/vec/exprs/vexpr_fwd.h b/be/src/vec/exprs/vexpr_fwd.h new file mode 100644 index 0000000000..5f0f287509 --- /dev/null +++ b/be/src/vec/exprs/vexpr_fwd.h @@ -0,0 +1,33 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +namespace doris::vectorized { +class VExpr; +class VExprContext; + +using VExprSPtr = std::shared_ptr; +using VExprContextSPtr = std::shared_ptr; + +using VExprSPtrs = std::vector; +using VExprContextSPtrs = std::vector; + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/exprs/vin_predicate.h b/be/src/vec/exprs/vin_predicate.h index 925f7b4ce0..4d047de6a2 100644 --- a/be/src/vec/exprs/vin_predicate.h +++ b/be/src/vec/exprs/vin_predicate.h @@ -50,9 +50,7 @@ public: FunctionContext::FunctionStateScope scope) override; void close(doris::RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VInPredicate::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VInPredicate::create_shared(*this); } const std::string& expr_name() const override; std::string debug_string() const override; diff --git a/be/src/vec/exprs/vinfo_func.h b/be/src/vec/exprs/vinfo_func.h index 6b2d9e90b9..54498272cf 100644 --- a/be/src/vec/exprs/vinfo_func.h +++ b/be/src/vec/exprs/vinfo_func.h @@ -38,9 +38,7 @@ public: VInfoFunc(const TExprNode& node); virtual ~VInfoFunc() {} - virtual VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VInfoFunc::create_unique(*this).release()); - } + virtual VExprSPtr clone() const override { return VInfoFunc::create_shared(*this); } virtual const std::string& expr_name() const override { return _expr_name; } virtual Status execute(VExprContext* context, vectorized::Block* block, int* result_column_id) override; diff --git a/be/src/vec/exprs/vlambda_function_call_expr.h b/be/src/vec/exprs/vlambda_function_call_expr.h index 302b2e8827..1a1ca7df7b 100644 --- a/be/src/vec/exprs/vlambda_function_call_expr.h +++ b/be/src/vec/exprs/vlambda_function_call_expr.h @@ -34,9 +34,7 @@ public: VLambdaFunctionCallExpr(const TExprNode& node) : VExpr(node) {} ~VLambdaFunctionCallExpr() override = default; - VExpr* clone(ObjectPool* pool) const override { - return pool->add(VLambdaFunctionCallExpr::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VLambdaFunctionCallExpr::create_shared(*this); } doris::Status prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc, VExprContext* context) override { @@ -69,7 +67,7 @@ public: out << _expr_name; out << "]{"; bool first = true; - for (VExpr* input_expr : children()) { + for (auto& input_expr : children()) { if (first) { first = false; } else { diff --git a/be/src/vec/exprs/vlambda_function_expr.h b/be/src/vec/exprs/vlambda_function_expr.h index 490d17d01b..8a2e3275d7 100644 --- a/be/src/vec/exprs/vlambda_function_expr.h +++ b/be/src/vec/exprs/vlambda_function_expr.h @@ -33,9 +33,7 @@ public: return get_child(0)->execute(context, block, result_column_id); } - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VLambdaFunctionExpr::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VLambdaFunctionExpr::create_shared(*this); } const std::string& expr_name() const override { return _expr_name; } diff --git a/be/src/vec/exprs/vliteral.h b/be/src/vec/exprs/vliteral.h index 79d3b6d8da..e3ea84121a 100644 --- a/be/src/vec/exprs/vliteral.h +++ b/be/src/vec/exprs/vliteral.h @@ -44,9 +44,7 @@ public: } Status execute(VExprContext* context, vectorized::Block* block, int* result_column_id) override; const std::string& expr_name() const override { return _expr_name; } - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VLiteral::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VLiteral::create_shared(*this); } std::string debug_string() const override; std::string value() const; diff --git a/be/src/vec/exprs/vmap_literal.cpp b/be/src/vec/exprs/vmap_literal.cpp index 2beae8cf32..e75b5dc06c 100644 --- a/be/src/vec/exprs/vmap_literal.cpp +++ b/be/src/vec/exprs/vmap_literal.cpp @@ -52,10 +52,10 @@ Status VMapLiteral::prepare(RuntimeState* state, const RowDescriptor& row_desc, // each child is slot with key1, value1, key2, value2... for (int idx = 0; idx < _children.size() && idx + 1 < _children.size(); idx += 2) { Field kf, vf; - auto key_literal = dynamic_cast(_children[idx]); + auto key_literal = std::dynamic_pointer_cast(_children[idx]); key_literal->get_column_ptr()->get(0, kf); - auto val_literal = - dynamic_cast(VExpr::expr_without_cast(_children[idx + 1])); + auto val_literal = std::dynamic_pointer_cast( + VExpr::expr_without_cast(_children[idx + 1])); val_literal->get_column_ptr()->get(0, vf); keys.get().push_back(kf); diff --git a/be/src/vec/exprs/vmap_literal.h b/be/src/vec/exprs/vmap_literal.h index c107fe3e20..808516aaa0 100644 --- a/be/src/vec/exprs/vmap_literal.h +++ b/be/src/vec/exprs/vmap_literal.h @@ -25,7 +25,6 @@ class RuntimeState; class TExprNode; namespace vectorized { -class VExprContext; class VMapLiteral : public VLiteral { ENABLE_FACTORY_CREATOR(VMapLiteral); diff --git a/be/src/vec/exprs/vruntimefilter_wrapper.cpp b/be/src/vec/exprs/vruntimefilter_wrapper.cpp index 554a15231f..1383d98c60 100644 --- a/be/src/vec/exprs/vruntimefilter_wrapper.cpp +++ b/be/src/vec/exprs/vruntimefilter_wrapper.cpp @@ -44,7 +44,7 @@ class VExprContext; namespace doris::vectorized { -VRuntimeFilterWrapper::VRuntimeFilterWrapper(const TExprNode& node, VExpr* impl) +VRuntimeFilterWrapper::VRuntimeFilterWrapper(const TExprNode& node, const VExprSPtr& impl) : VExpr(node), _impl(impl), _always_true(false), _filtered_rows(0), _scan_rows(0) {} VRuntimeFilterWrapper::VRuntimeFilterWrapper(const VRuntimeFilterWrapper& vexpr) diff --git a/be/src/vec/exprs/vruntimefilter_wrapper.h b/be/src/vec/exprs/vruntimefilter_wrapper.h index 7e31513824..259484bd78 100644 --- a/be/src/vec/exprs/vruntimefilter_wrapper.h +++ b/be/src/vec/exprs/vruntimefilter_wrapper.h @@ -44,7 +44,7 @@ class VRuntimeFilterWrapper final : public VExpr { ENABLE_FACTORY_CREATOR(VRuntimeFilterWrapper); public: - VRuntimeFilterWrapper(const TExprNode& node, VExpr* impl); + VRuntimeFilterWrapper(const TExprNode& node, const VExprSPtr& impl); VRuntimeFilterWrapper(const VRuntimeFilterWrapper& vexpr); ~VRuntimeFilterWrapper() override = default; doris::Status execute(VExprContext* context, doris::vectorized::Block* block, @@ -57,13 +57,11 @@ public: bool is_constant() const override; void close(doris::RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VRuntimeFilterWrapper::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VRuntimeFilterWrapper::create_shared(*this); } const std::string& expr_name() const override; - const std::vector& children() const override { return _impl->children(); } + const VExprSPtrs& children() const override { return _impl->children(); } - const VExpr* get_impl() const override { return _impl; } + const VExprSPtr get_impl() const override { return _impl; } // if filter rate less than this, bloom filter will set always true constexpr static double EXPECTED_FILTER_RATE = 0.4; @@ -80,7 +78,7 @@ public: } private: - VExpr* _impl; + VExprSPtr _impl; bool _always_true; /// TODO: statistic filter rate in the profile diff --git a/be/src/vec/exprs/vschema_change_expr.h b/be/src/vec/exprs/vschema_change_expr.h index fb0dae796c..9dd43a0ae2 100644 --- a/be/src/vec/exprs/vschema_change_expr.h +++ b/be/src/vec/exprs/vschema_change_expr.h @@ -32,7 +32,6 @@ class RuntimeState; namespace vectorized { class Block; -class VExprContext; } // namespace vectorized } // namespace doris @@ -56,9 +55,7 @@ public: FunctionContext::FunctionStateScope scope) override; void close(doris::RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VSchemaChangeExpr::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VSchemaChangeExpr::create_shared(*this); } const std::string& expr_name() const override; std::string debug_string() const override; diff --git a/be/src/vec/exprs/vslot_ref.h b/be/src/vec/exprs/vslot_ref.h index 6db7238597..1f46c44ac8 100644 --- a/be/src/vec/exprs/vslot_ref.h +++ b/be/src/vec/exprs/vslot_ref.h @@ -42,9 +42,7 @@ public: int* result_column_id) override; virtual doris::Status prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc, VExprContext* context) override; - virtual VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VSlotRef::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VSlotRef::create_shared(*this); } virtual const std::string& expr_name() const override; virtual std::string debug_string() const override; diff --git a/be/src/vec/exprs/vstruct_literal.cpp b/be/src/vec/exprs/vstruct_literal.cpp index 295d366b37..9b4e10f720 100644 --- a/be/src/vec/exprs/vstruct_literal.cpp +++ b/be/src/vec/exprs/vstruct_literal.cpp @@ -39,9 +39,9 @@ Status VStructLiteral::prepare(RuntimeState* state, const RowDescriptor& row_des VExprContext* context) { RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, row_desc, context)); Field struct_field = Tuple(); - for (const auto child : _children) { + for (const auto& child : _children) { Field item; - auto child_literal = dynamic_cast(child); + auto child_literal = std::dynamic_pointer_cast(child); child_literal->get_column_ptr()->get(0, item); struct_field.get().push_back(item); } diff --git a/be/src/vec/exprs/vtuple_is_null_predicate.h b/be/src/vec/exprs/vtuple_is_null_predicate.h index d927165ef6..a27eb71cff 100644 --- a/be/src/vec/exprs/vtuple_is_null_predicate.h +++ b/be/src/vec/exprs/vtuple_is_null_predicate.h @@ -47,9 +47,7 @@ public: doris::Status prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc, VExprContext* context) override; - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VTupleIsNullPredicate::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VTupleIsNullPredicate::create_shared(*this); } [[nodiscard]] bool is_constant() const override { return false; } diff --git a/be/src/vec/olap/vcollect_iterator.cpp b/be/src/vec/olap/vcollect_iterator.cpp index 113307bbc0..3fda650024 100644 --- a/be/src/vec/olap/vcollect_iterator.cpp +++ b/be/src/vec/olap/vcollect_iterator.cpp @@ -297,15 +297,15 @@ Status VCollectIterator::_topn_next(Block* block) { bool eof = false; while (read_rows < _topn_limit && !eof) { block->clear_column_data(); - auto res = rs_reader->next_block(block); - if (!res.ok()) { - if (res.is()) { + auto status = rs_reader->next_block(block); + if (!status.ok()) { + if (status.is()) { eof = true; if (block->rows() == 0) { break; } } else { - return res; + return status; } } @@ -313,8 +313,7 @@ Status VCollectIterator::_topn_next(Block* block) { // filter block RETURN_IF_ERROR(VExprContext::filter_block( - *(_reader->_reader_context.filter_block_vconjunct_ctx_ptr), block, - block->columns())); + _reader->_reader_context.filter_block_conjuncts, block, block->columns())); // update read rows read_rows += block->rows(); diff --git a/be/src/vec/runtime/vdata_stream_recvr.cpp b/be/src/vec/runtime/vdata_stream_recvr.cpp index ebee26783a..a59ac3c7fc 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.cpp +++ b/be/src/vec/runtime/vdata_stream_recvr.cpp @@ -338,7 +338,7 @@ VDataStreamRecvr::~VDataStreamRecvr() { DCHECK(_mgr == nullptr) << "Must call close()"; } -Status VDataStreamRecvr::create_merger(const std::vector& ordering_expr, +Status VDataStreamRecvr::create_merger(const VExprContextSPtrs& ordering_expr, const std::vector& is_asc_order, const std::vector& nulls_first, size_t batch_size, int64_t limit, size_t offset) { diff --git a/be/src/vec/runtime/vdata_stream_recvr.h b/be/src/vec/runtime/vdata_stream_recvr.h index 7478dc1eb9..c2374d23a7 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.h +++ b/be/src/vec/runtime/vdata_stream_recvr.h @@ -48,6 +48,7 @@ #include "vec/core/block.h" #include "vec/core/column_with_type_and_name.h" #include "vec/core/materialize_block.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class MemTracker; @@ -59,7 +60,6 @@ class RuntimeState; namespace vectorized { class VDataStreamMgr; class VSortedRunMerger; -class VExprContext; class VDataStreamRecvr { public: @@ -70,7 +70,7 @@ public: virtual ~VDataStreamRecvr(); - Status create_merger(const std::vector& ordering_expr, + Status create_merger(const VExprContextSPtrs& ordering_expr, const std::vector& is_asc_order, const std::vector& nulls_first, size_t batch_size, int64_t limit, size_t offset); diff --git a/be/src/vec/runtime/vfile_result_writer.cpp b/be/src/vec/runtime/vfile_result_writer.cpp index 0c1071c041..ed408e5f7a 100644 --- a/be/src/vec/runtime/vfile_result_writer.cpp +++ b/be/src/vec/runtime/vfile_result_writer.cpp @@ -72,12 +72,13 @@ namespace doris::vectorized { const size_t VFileResultWriter::OUTSTREAM_BUFFER_SIZE_BYTES = 1024 * 1024; using doris::operator<<; -VFileResultWriter::VFileResultWriter( - const ResultFileOptions* file_opts, const TStorageBackendType::type storage_type, - const TUniqueId fragment_instance_id, - const std::vector& output_vexpr_ctxs, - RuntimeProfile* parent_profile, BufferControlBlock* sinker, Block* output_block, - bool output_object_data, const RowDescriptor& output_row_descriptor) +VFileResultWriter::VFileResultWriter(const ResultFileOptions* file_opts, + const TStorageBackendType::type storage_type, + const TUniqueId fragment_instance_id, + const VExprContextSPtrs& output_vexpr_ctxs, + RuntimeProfile* parent_profile, BufferControlBlock* sinker, + Block* output_block, bool output_object_data, + const RowDescriptor& output_row_descriptor) : _file_opts(file_opts), _storage_type(storage_type), _fragment_instance_id(fragment_instance_id), diff --git a/be/src/vec/runtime/vfile_result_writer.h b/be/src/vec/runtime/vfile_result_writer.h index d01ad5bf74..1b9d58144f 100644 --- a/be/src/vec/runtime/vfile_result_writer.h +++ b/be/src/vec/runtime/vfile_result_writer.h @@ -52,9 +52,8 @@ public: VFileResultWriter(const ResultFileOptions* file_option, const TStorageBackendType::type storage_type, const TUniqueId fragment_instance_id, - const std::vector& _output_vexpr_ctxs, - RuntimeProfile* parent_profile, BufferControlBlock* sinker, - Block* output_block, bool output_object_data, + const VExprContextSPtrs& _output_vexpr_ctxs, RuntimeProfile* parent_profile, + BufferControlBlock* sinker, Block* output_block, bool output_object_data, const RowDescriptor& output_row_descriptor); virtual ~VFileResultWriter() = default; @@ -101,7 +100,7 @@ private: const ResultFileOptions* _file_opts; TStorageBackendType::type _storage_type; TUniqueId _fragment_instance_id; - const std::vector& _output_vexpr_ctxs; + const VExprContextSPtrs& _output_vexpr_ctxs; // If the result file format is plain text, like CSV, this _file_writer is owned by this FileResultWriter. // If the result file format is Parquet, this _file_writer is owned by _parquet_writer. diff --git a/be/src/vec/runtime/vorc_writer.cpp b/be/src/vec/runtime/vorc_writer.cpp index f3804a6b62..47fc9242f8 100644 --- a/be/src/vec/runtime/vorc_writer.cpp +++ b/be/src/vec/runtime/vorc_writer.cpp @@ -85,7 +85,7 @@ void VOrcOutputStream::set_written_len(int64_t written_len) { } VOrcWriterWrapper::VOrcWriterWrapper(doris::io::FileWriter* file_writer, - const std::vector& output_vexpr_ctxs, + const VExprContextSPtrs& output_vexpr_ctxs, const std::string& schema, bool output_object_data) : VFileWriterWrapper(output_vexpr_ctxs, output_object_data), _file_writer(file_writer), diff --git a/be/src/vec/runtime/vorc_writer.h b/be/src/vec/runtime/vorc_writer.h index cc66c65a6c..3a7b6c205f 100644 --- a/be/src/vec/runtime/vorc_writer.h +++ b/be/src/vec/runtime/vorc_writer.h @@ -75,8 +75,8 @@ private: class VOrcWriterWrapper final : public VFileWriterWrapper { public: VOrcWriterWrapper(doris::io::FileWriter* file_writer, - const std::vector& output_vexpr_ctxs, - const std::string& schema, bool output_object_data); + const VExprContextSPtrs& output_vexpr_ctxs, const std::string& schema, + bool output_object_data); ~VOrcWriterWrapper() = default; diff --git a/be/src/vec/runtime/vparquet_writer.cpp b/be/src/vec/runtime/vparquet_writer.cpp index c22f110519..13fd6d8b85 100644 --- a/be/src/vec/runtime/vparquet_writer.cpp +++ b/be/src/vec/runtime/vparquet_writer.cpp @@ -255,7 +255,7 @@ void ParquetBuildHelper::build_version(parquet::WriterProperties::Builder& build } VParquetWriterWrapper::VParquetWriterWrapper(doris::io::FileWriter* file_writer, - const std::vector& output_vexpr_ctxs, + const VExprContextSPtrs& output_vexpr_ctxs, const std::vector& parquet_schemas, const TParquetCompressionType::type& compression_type, const bool& parquet_disable_dictionary, diff --git a/be/src/vec/runtime/vparquet_writer.h b/be/src/vec/runtime/vparquet_writer.h index 7d28f35cfb..6e07aa0e44 100644 --- a/be/src/vec/runtime/vparquet_writer.h +++ b/be/src/vec/runtime/vparquet_writer.h @@ -31,14 +31,12 @@ #include "common/status.h" #include "vec/core/block.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { namespace io { class FileWriter; } // namespace io -namespace vectorized { -class VExprContext; -} // namespace vectorized } // namespace doris namespace parquet { namespace schema { @@ -93,7 +91,7 @@ public: class VFileWriterWrapper { public: - VFileWriterWrapper(const std::vector& output_vexpr_ctxs, bool output_object_data) + VFileWriterWrapper(const VExprContextSPtrs& output_vexpr_ctxs, bool output_object_data) : _output_vexpr_ctxs(output_vexpr_ctxs), _cur_written_rows(0), _output_object_data(output_object_data) {} @@ -109,7 +107,7 @@ public: virtual int64_t written_len() = 0; protected: - const std::vector& _output_vexpr_ctxs; + const VExprContextSPtrs& _output_vexpr_ctxs; int64_t _cur_written_rows; bool _output_object_data; }; @@ -118,7 +116,7 @@ protected: class VParquetWriterWrapper final : public VFileWriterWrapper { public: VParquetWriterWrapper(doris::io::FileWriter* file_writer, - const std::vector& output_vexpr_ctxs, + const VExprContextSPtrs& output_vexpr_ctxs, const std::vector& parquet_schemas, const TParquetCompressionType::type& compression_type, const bool& parquet_disable_dictionary, diff --git a/be/src/vec/runtime/vsorted_run_merger.cpp b/be/src/vec/runtime/vsorted_run_merger.cpp index 0010327d01..be7397ee0a 100644 --- a/be/src/vec/runtime/vsorted_run_merger.cpp +++ b/be/src/vec/runtime/vsorted_run_merger.cpp @@ -35,7 +35,7 @@ using std::vector; namespace doris::vectorized { -VSortedRunMerger::VSortedRunMerger(const std::vector& ordering_expr, +VSortedRunMerger::VSortedRunMerger(const VExprContextSPtrs& ordering_expr, const std::vector& is_asc_order, const std::vector& nulls_first, const size_t batch_size, int64_t limit, size_t offset, RuntimeProfile* profile) diff --git a/be/src/vec/runtime/vsorted_run_merger.h b/be/src/vec/runtime/vsorted_run_merger.h index e48552eaba..1f6f566526 100644 --- a/be/src/vec/runtime/vsorted_run_merger.h +++ b/be/src/vec/runtime/vsorted_run_merger.h @@ -28,11 +28,11 @@ #include "vec/core/block.h" #include "vec/core/sort_cursor.h" #include "vec/core/sort_description.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { namespace vectorized { -class VExprContext; // VSortedRunMerger is used to merge multiple sorted runs of blocks. A run is a sorted // sequence of blocks, which are fetched from a BlockSupplier function object. @@ -45,10 +45,9 @@ public: // Function that returns the next block of rows from an input sorted run. The batch // is owned by the supplier (i.e. not VSortedRunMerger). eos is indicated by an NULL // batch being returned. - VSortedRunMerger(const std::vector& ordering_expr, - const std::vector& _is_asc_order, const std::vector& _nulls_first, - const size_t batch_size, int64_t limit, size_t offset, - RuntimeProfile* profile); + VSortedRunMerger(const VExprContextSPtrs& ordering_expr, const std::vector& _is_asc_order, + const std::vector& _nulls_first, const size_t batch_size, int64_t limit, + size_t offset, RuntimeProfile* profile); VSortedRunMerger(const SortDescription& desc, const size_t batch_size, int64_t limit, size_t offset, RuntimeProfile* profile); @@ -64,7 +63,7 @@ public: Status get_next(Block* output_block, bool* eos); protected: - const std::vector _ordering_expr; + const VExprContextSPtrs _ordering_expr; SortDescription _desc; const std::vector _is_asc_order; const std::vector _nulls_first; diff --git a/be/src/vec/sink/vdata_stream_sender.cpp b/be/src/vec/sink/vdata_stream_sender.cpp index 6b03753c84..5886f82dad 100644 --- a/be/src/vec/sink/vdata_stream_sender.cpp +++ b/be/src/vec/sink/vdata_stream_sender.cpp @@ -431,8 +431,8 @@ Status VDataStreamSender::init(const TDataSink& tsink) { const TDataStreamSink& t_stream_sink = tsink.stream_sink; if (_part_type == TPartitionType::HASH_PARTITIONED || _part_type == TPartitionType::BUCKET_SHFFULE_HASH_PARTITIONED) { - RETURN_IF_ERROR(VExpr::create_expr_trees( - _pool, t_stream_sink.output_partition.partition_exprs, &_partition_expr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(t_stream_sink.output_partition.partition_exprs, + _partition_expr_ctxs)); } else if (_part_type == TPartitionType::RANGE_PARTITIONED) { return Status::InternalError("TPartitionType::RANGE_PARTITIONED should not be used"); } else { diff --git a/be/src/vec/sink/vdata_stream_sender.h b/be/src/vec/sink/vdata_stream_sender.h index 3ce04915b7..86d15782ae 100644 --- a/be/src/vec/sink/vdata_stream_sender.h +++ b/be/src/vec/sink/vdata_stream_sender.h @@ -196,7 +196,7 @@ protected: int _broadcast_pb_block_idx; // compute per-row partition values - std::vector _partition_expr_ctxs; + VExprContextSPtrs _partition_expr_ctxs; std::vector _channels; std::vector> _channel_shared_ptrs; diff --git a/be/src/vec/sink/vmemory_scratch_sink.cpp b/be/src/vec/sink/vmemory_scratch_sink.cpp index 480ad10e13..f54465c0ad 100644 --- a/be/src/vec/sink/vmemory_scratch_sink.cpp +++ b/be/src/vec/sink/vmemory_scratch_sink.cpp @@ -45,15 +45,14 @@ class TMemoryScratchSink; namespace doris::vectorized { MemoryScratchSink::MemoryScratchSink(const RowDescriptor& row_desc, - const std::vector& t_output_expr, - const TMemoryScratchSink& sink, ObjectPool* pool) - : _row_desc(row_desc), _t_output_expr(t_output_expr), _pool(pool) { + const std::vector& t_output_expr) + : _row_desc(row_desc), _t_output_expr(t_output_expr) { _name = "VMemoryScratchSink"; } Status MemoryScratchSink::_prepare_vexpr(RuntimeState* state) { // From the thrift expressions create the real exprs. - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, _t_output_expr, &_output_vexpr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(_t_output_expr, _output_vexpr_ctxs)); // Prepare the exprs to run. RETURN_IF_ERROR(VExpr::prepare(_output_vexpr_ctxs, state, _row_desc)); // generate the arrow schema diff --git a/be/src/vec/sink/vmemory_scratch_sink.h b/be/src/vec/sink/vmemory_scratch_sink.h index 47a9984b8f..b8352fed5d 100644 --- a/be/src/vec/sink/vmemory_scratch_sink.h +++ b/be/src/vec/sink/vmemory_scratch_sink.h @@ -23,6 +23,7 @@ #include "common/status.h" #include "exec/data_sink.h" #include "runtime/result_queue_mgr.h" +#include "vec/exprs/vexpr_fwd.h" namespace arrow { @@ -40,14 +41,12 @@ class TExpr; class TMemoryScratchSink; namespace vectorized { -class VExprContext; class Block; // used to push data to blocking queue class MemoryScratchSink final : public DataSink { public: - MemoryScratchSink(const RowDescriptor& row_desc, const std::vector& t_output_expr, - const TMemoryScratchSink& sink, ObjectPool* pool); + MemoryScratchSink(const RowDescriptor& row_desc, const std::vector& t_output_expr); ~MemoryScratchSink() override = default; @@ -76,9 +75,7 @@ private: // Owned by the RuntimeState. const std::vector& _t_output_expr; - std::vector _output_vexpr_ctxs; - - ObjectPool* _pool; + VExprContextSPtrs _output_vexpr_ctxs; }; } // namespace vectorized } // namespace doris diff --git a/be/src/vec/sink/vmysql_result_writer.cpp b/be/src/vec/sink/vmysql_result_writer.cpp index cc7b39aa5e..b001a10526 100644 --- a/be/src/vec/sink/vmysql_result_writer.cpp +++ b/be/src/vec/sink/vmysql_result_writer.cpp @@ -74,9 +74,9 @@ namespace doris { namespace vectorized { template -VMysqlResultWriter::VMysqlResultWriter( - BufferControlBlock* sinker, const std::vector& output_vexpr_ctxs, - RuntimeProfile* parent_profile) +VMysqlResultWriter::VMysqlResultWriter(BufferControlBlock* sinker, + const VExprContextSPtrs& output_vexpr_ctxs, + RuntimeProfile* parent_profile) : VResultWriter(), _sinker(sinker), _output_vexpr_ctxs(output_vexpr_ctxs), diff --git a/be/src/vec/sink/vmysql_result_writer.h b/be/src/vec/sink/vmysql_result_writer.h index 9626edfcd2..0e0b4d9313 100644 --- a/be/src/vec/sink/vmysql_result_writer.h +++ b/be/src/vec/sink/vmysql_result_writer.h @@ -27,6 +27,7 @@ #include "util/mysql_row_buffer.h" #include "util/runtime_profile.h" #include "vec/data_types/data_type.h" +#include "vec/exprs/vexpr_fwd.h" #include "vec/sink/vresult_writer.h" namespace doris { @@ -34,7 +35,6 @@ class BufferControlBlock; class RuntimeState; namespace vectorized { -class VExprContext; class Block; template @@ -42,8 +42,7 @@ class VMysqlResultWriter final : public VResultWriter { public: using ResultList = std::vector>; - VMysqlResultWriter(BufferControlBlock* sinker, - const std::vector& output_vexpr_ctxs, + VMysqlResultWriter(BufferControlBlock* sinker, const VExprContextSPtrs& output_vexpr_ctxs, RuntimeProfile* parent_profile); Status init(RuntimeState* state) override; @@ -69,7 +68,7 @@ private: BufferControlBlock* _sinker; - const std::vector& _output_vexpr_ctxs; + const VExprContextSPtrs& _output_vexpr_ctxs; RuntimeProfile* _parent_profile; // parent profile from result sink. not owned // total time cost on append batch operation diff --git a/be/src/vec/sink/vmysql_table_writer.cpp b/be/src/vec/sink/vmysql_table_writer.cpp index c4c7e48393..af1f920e4a 100644 --- a/be/src/vec/sink/vmysql_table_writer.cpp +++ b/be/src/vec/sink/vmysql_table_writer.cpp @@ -59,7 +59,7 @@ std::string MysqlConnInfo::debug_string() const { return ss.str(); } -VMysqlTableWriter::VMysqlTableWriter(const std::vector& output_expr_ctxs) +VMysqlTableWriter::VMysqlTableWriter(const VExprContextSPtrs& output_expr_ctxs) : _vec_output_expr_ctxs(output_expr_ctxs) {} VMysqlTableWriter::~VMysqlTableWriter() { diff --git a/be/src/vec/sink/vmysql_table_writer.h b/be/src/vec/sink/vmysql_table_writer.h index 54a8b470c9..51f62a4db5 100644 --- a/be/src/vec/sink/vmysql_table_writer.h +++ b/be/src/vec/sink/vmysql_table_writer.h @@ -25,6 +25,7 @@ #include #include "common/status.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { namespace vectorized { @@ -40,12 +41,11 @@ struct MysqlConnInfo { std::string debug_string() const; }; -class VExprContext; class Block; class VMysqlTableWriter { public: - VMysqlTableWriter(const std::vector& output_exprs); + VMysqlTableWriter(const VExprContextSPtrs& output_exprs); ~VMysqlTableWriter(); // connect to mysql server @@ -61,7 +61,7 @@ public: private: Status insert_row(vectorized::Block& block, size_t row); - const std::vector& _vec_output_expr_ctxs; + const VExprContextSPtrs& _vec_output_expr_ctxs; fmt::memory_buffer _insert_stmt_buffer; std::string _mysql_tbl; MYSQL* _mysql_conn; diff --git a/be/src/vec/sink/vresult_file_sink.cpp b/be/src/vec/sink/vresult_file_sink.cpp index 92b396d189..a65bda5af2 100644 --- a/be/src/vec/sink/vresult_file_sink.cpp +++ b/be/src/vec/sink/vresult_file_sink.cpp @@ -97,8 +97,7 @@ Status VResultFileSink::init(const TDataSink& tsink) { Status VResultFileSink::prepare_exprs(RuntimeState* state) { // From the thrift expressions create the real exprs. - RETURN_IF_ERROR( - VExpr::create_expr_trees(state->obj_pool(), _t_output_expr, &_output_vexpr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(_t_output_expr, _output_vexpr_ctxs)); // Prepare the exprs to run. RETURN_IF_ERROR(VExpr::prepare(_output_vexpr_ctxs, state, _row_desc)); return Status::OK(); diff --git a/be/src/vec/sink/vresult_file_sink.h b/be/src/vec/sink/vresult_file_sink.h index ed62b3c60a..eaea995584 100644 --- a/be/src/vec/sink/vresult_file_sink.h +++ b/be/src/vec/sink/vresult_file_sink.h @@ -78,7 +78,7 @@ private: // Owned by the RuntimeState. const std::vector& _t_output_expr; - std::vector _output_vexpr_ctxs; + VExprContextSPtrs _output_vexpr_ctxs; RowDescriptor _output_row_descriptor; std::unique_ptr _output_block = nullptr; diff --git a/be/src/vec/sink/vresult_sink.cpp b/be/src/vec/sink/vresult_sink.cpp index 297f201790..445d342a5b 100644 --- a/be/src/vec/sink/vresult_sink.cpp +++ b/be/src/vec/sink/vresult_sink.cpp @@ -64,10 +64,9 @@ VResultSink::~VResultSink() = default; Status VResultSink::prepare_exprs(RuntimeState* state) { // From the thrift expressions create the real exprs. - RETURN_IF_ERROR( - VExpr::create_expr_trees(state->obj_pool(), _t_output_expr, &_output_vexpr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(_t_output_expr, _output_vexpr_ctxs)); if (_fetch_option.use_two_phase_fetch) { - for (VExprContext* expr_ctx : _output_vexpr_ctxs) { + for (auto& expr_ctx : _output_vexpr_ctxs) { // Must materialize if it a slot, or the slot column id will be -1 expr_ctx->set_force_materialize_slot(); } diff --git a/be/src/vec/sink/vresult_sink.h b/be/src/vec/sink/vresult_sink.h index 97916544bf..93394930fb 100644 --- a/be/src/vec/sink/vresult_sink.h +++ b/be/src/vec/sink/vresult_sink.h @@ -28,6 +28,7 @@ #include "common/status.h" #include "exec/data_sink.h" +#include "vec/exprs/vexpr_fwd.h" #include "vec/sink/vresult_writer.h" namespace doris { @@ -42,7 +43,6 @@ namespace pipeline { class ResultSinkOperator; } namespace vectorized { -class VExprContext; class Block; class VResultWriter; @@ -152,7 +152,7 @@ private: // Owned by the RuntimeState. const std::vector& _t_output_expr; - std::vector _output_vexpr_ctxs; + VExprContextSPtrs _output_vexpr_ctxs; std::shared_ptr _sender; std::shared_ptr _writer; diff --git a/be/src/vec/sink/vtable_sink.cpp b/be/src/vec/sink/vtable_sink.cpp index 02259f0cfa..9640b3b4ec 100644 --- a/be/src/vec/sink/vtable_sink.cpp +++ b/be/src/vec/sink/vtable_sink.cpp @@ -41,7 +41,7 @@ VTableSink::VTableSink(ObjectPool* pool, const RowDescriptor& row_desc, Status VTableSink::init(const TDataSink& t_sink) { RETURN_IF_ERROR(DataSink::init(t_sink)); // From the thrift expressions create the real exprs. - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, _t_output_expr, &_output_vexpr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(_t_output_expr, _output_vexpr_ctxs)); return Status::OK(); } diff --git a/be/src/vec/sink/vtable_sink.h b/be/src/vec/sink/vtable_sink.h index 325d44566c..0c45d567f3 100644 --- a/be/src/vec/sink/vtable_sink.h +++ b/be/src/vec/sink/vtable_sink.h @@ -21,6 +21,7 @@ #include "common/status.h" #include "exec/data_sink.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { @@ -33,7 +34,6 @@ class TDataSink; namespace vectorized { class Block; -class VExprContext; class VTableSink : public DataSink { public: @@ -59,7 +59,7 @@ protected: ObjectPool* _pool; const RowDescriptor& _row_desc; const std::vector& _t_output_expr; - std::vector _output_vexpr_ctxs; + VExprContextSPtrs _output_vexpr_ctxs; RuntimeProfile* _profile; std::string _table_name; // whether use transaction diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp index 62954a97b4..a934ce86cf 100644 --- a/be/src/vec/sink/vtablet_sink.cpp +++ b/be/src/vec/sink/vtablet_sink.cpp @@ -966,7 +966,7 @@ VOlapTableSink::VOlapTableSink(ObjectPool* pool, const RowDescriptor& row_desc, const std::vector& texprs, Status* status) : _pool(pool), _input_row_desc(row_desc), _filter_bitmap(1024) { // From the thrift expressions create the real exprs. - *status = vectorized::VExpr::create_expr_trees(pool, texprs, &_output_vexpr_ctxs); + *status = vectorized::VExpr::create_expr_trees(texprs, _output_vexpr_ctxs); _name = "VOlapTableSink"; _transfer_large_data_by_brpc = config::transfer_large_data_by_brpc; } diff --git a/be/src/vec/sink/vtablet_sink.h b/be/src/vec/sink/vtablet_sink.h index ae2b497165..d6ac07068e 100644 --- a/be/src/vec/sink/vtablet_sink.h +++ b/be/src/vec/sink/vtablet_sink.h @@ -66,6 +66,7 @@ #include "vec/common/allocator.h" #include "vec/core/block.h" #include "vec/data_types/data_type.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class ObjectPool; @@ -79,10 +80,6 @@ class TupleDescriptor; template class RefCountClosure; -namespace vectorized { -class VExprContext; -} - namespace stream_load { class OpenPartitionClosure; @@ -358,7 +355,8 @@ protected: class IndexChannel { public: - IndexChannel(VOlapTableSink* parent, int64_t index_id, vectorized::VExprContext* where_clause) + IndexChannel(VOlapTableSink* parent, int64_t index_id, + const vectorized::VExprContextSPtr& where_clause) : _parent(parent), _index_id(index_id), _where_clause(where_clause) { _index_channel_tracker = std::make_unique("IndexChannel:indexID=" + std::to_string(_index_id)); @@ -397,7 +395,7 @@ public: // check whether the rows num written by different replicas is consistent Status check_tablet_received_rows_consistency(); - vectorized::VExprContext* get_where_clause() { return _where_clause; } + vectorized::VExprContextSPtr get_where_clause() { return _where_clause; } private: friend class VNodeChannel; @@ -405,7 +403,7 @@ private: VOlapTableSink* _parent; int64_t _index_id; - vectorized::VExprContext* _where_clause; + vectorized::VExprContextSPtr _where_clause; // from backend channel to tablet_id // ATTN: must be placed before `_node_channels` and `_channels_by_tablet`. @@ -607,7 +605,7 @@ private: FindTabletMode findTabletMode = FindTabletMode::FIND_TABLET_EVERY_ROW; VOlapTablePartitionParam* _vpartition = nullptr; - std::vector _output_vexpr_ctxs; + vectorized::VExprContextSPtrs _output_vexpr_ctxs; RuntimeState* _state = nullptr; diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp index 6530a66212..416987c31d 100644 --- a/be/src/vec/utils/util.hpp +++ b/be/src/vec/utils/util.hpp @@ -92,35 +92,6 @@ public: return data_types; } - static VExpr* dfs_peel_conjunct(RuntimeState* state, VExprContext* context, VExpr* expr, - int& leaf_index, std::function checker) { - static constexpr auto is_leaf = [](VExpr* expr) { return !expr->is_and_expr(); }; - - if (is_leaf(expr)) { - if (checker(leaf_index++)) { - expr->close(state, context, context->get_function_state_scope()); - return nullptr; - } - return expr; - } else { - VExpr* left_child = - dfs_peel_conjunct(state, context, expr->children()[0], leaf_index, checker); - VExpr* right_child = - dfs_peel_conjunct(state, context, expr->children()[1], leaf_index, checker); - - if (left_child != nullptr && right_child != nullptr) { - expr->set_children({left_child, right_child}); - return expr; - } else { - // here only close the and expr self, do not close the child - expr->set_children({}); - expr->close(state, context, context->get_function_state_scope()); - } - - return left_child != nullptr ? left_child : right_child; - } - } - static bool all_arguments_are_constant(const Block& block, const ColumnNumbers& args) { for (const auto& arg : args) { if (!is_column_const(*block.get_by_position(arg).column)) { diff --git a/be/test/exprs/mock_vexpr.h b/be/test/exprs/mock_vexpr.h index 569a6b1d43..b8260dba65 100644 --- a/be/test/exprs/mock_vexpr.h +++ b/be/test/exprs/mock_vexpr.h @@ -27,7 +27,7 @@ namespace vectorized { class MockVExpr : public VExpr { public: - MOCK_CONST_METHOD1(clone, VExpr*(ObjectPool* pool)); + MOCK_CONST_METHOD0(clone, VExprSPtr()); MOCK_CONST_METHOD0(expr_name, const std::string&()); MOCK_METHOD3(execute, Status(VExprContext* context, vectorized::Block* block, int* result_column_id)); diff --git a/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp b/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp index bffbc57cbb..8decfe1a09 100644 --- a/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp @@ -79,7 +79,7 @@ void serialize_and_deserialize_mysql_test() { {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, false}}; int row_num = 7; // make desc and generate block - std::vector _output_vexpr_ctxs; + vectorized::VExprContextSPtrs _output_vexpr_ctxs; _output_vexpr_ctxs.resize(cols.size()); doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(), doris::TQueryGlobals(), nullptr); @@ -256,8 +256,8 @@ void serialize_and_deserialize_mysql_test() { nodes[0].__set_type(create_type_desc(std::get<3>(t), type_desc.precision, type_desc.scale)); TExpr texpr; texpr.__set_nodes(nodes); - VExprContext* ctx = nullptr; - Status st = VExpr::create_expr_tree(&object_pool, texpr, &ctx); + VExprContextSPtr ctx = nullptr; + Status st = VExpr::create_expr_tree(texpr, ctx); std::cout << st.to_string() << std::endl; doris::DescriptorTblBuilder builder(&object_pool); builder.declare_tuple() << type_desc; diff --git a/be/test/vec/exec/parquet/parquet_reader_test.cpp b/be/test/vec/exec/parquet/parquet_reader_test.cpp index bae98ad9ee..20f4f4150c 100644 --- a/be/test/vec/exec/parquet/parquet_reader_test.cpp +++ b/be/test/vec/exec/parquet/parquet_reader_test.cpp @@ -140,11 +140,11 @@ TEST_F(ParquetReaderTest, normal) { std::unordered_map colname_to_value_range; p_reader->open(); - p_reader->init_reader(column_names, missing_column_names, nullptr, nullptr, nullptr, nullptr, + p_reader->init_reader(column_names, missing_column_names, nullptr, {}, nullptr, nullptr, nullptr, nullptr, nullptr); std::unordered_map> partition_columns; - std::unordered_map missing_columns; + std::unordered_map missing_columns; p_reader->set_fill_columns(partition_columns, missing_columns); BlockUPtr block = Block::create_unique(); for (const auto& slot_desc : tuple_desc->slots()) { diff --git a/be/test/vec/exprs/vexpr_test.cpp b/be/test/vec/exprs/vexpr_test.cpp index a97206741d..01546ff758 100644 --- a/be/test/vec/exprs/vexpr_test.cpp +++ b/be/test/vec/exprs/vexpr_test.cpp @@ -60,8 +60,8 @@ TEST(TEST_VEXPR, ABSTEST) { std::string expr_json = R"|({"1":{"lst":["rec",2,{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"abs"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"5":{"tf":0},"7":{"str":"abs(INT)"},"9":{"rec":{"1":{"str":"_ZN5doris13MathFunctions3absEPN9doris_udf15FunctionContextERKNS1_6IntValE"}}},"11":{"i64":0}}}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0}}},"20":{"i32":-1},"23":{"i32":-1}}]}})|"; doris::TExpr exprx = apache::thrift::from_json_string(expr_json); - doris::vectorized::VExprContext* context = nullptr; - doris::vectorized::VExpr::create_expr_tree(&object_pool, exprx, &context); + doris::vectorized::VExprContextSPtr context; + doris::vectorized::VExpr::create_expr_tree(exprx, context); doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(), doris::TQueryGlobals(), nullptr); @@ -154,8 +154,8 @@ TEST(TEST_VEXPR, ABSTEST2) { R"|({"1":{"lst":["rec",2,{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"abs"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"5":{"tf":0},"7":{"str":"abs(INT)"},"9":{"rec":{"1":{"str":"_ZN5doris13MathFunctions3absEPN9doris_udf15FunctionContextERKNS1_6IntValE"}}},"11":{"i64":0}}}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0}}},"20":{"i32":-1},"23":{"i32":-1}}]}})|"; TExpr exprx = apache::thrift::from_json_string(expr_json); - doris::vectorized::VExprContext* context = nullptr; - doris::vectorized::VExpr::create_expr_tree(&object_pool, exprx, &context); + doris::vectorized::VExprContextSPtr context; + doris::vectorized::VExpr::create_expr_tree(exprx, context); doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(), doris::TQueryGlobals(), nullptr); diff --git a/be/test/vec/function/table_function_test.cpp b/be/test/vec/function/table_function_test.cpp index a659c39538..2d08eadc81 100644 --- a/be/test/vec/function/table_function_test.cpp +++ b/be/test/vec/function/table_function_test.cpp @@ -59,21 +59,21 @@ protected: void init_expr_context(int child_num) { clear(); - _root = std::make_unique(); + _root = std::make_shared(); for (int i = 0; i < child_num; ++i) { _column_ids.push_back(i); - _children.push_back(std::make_unique()); + _children.push_back(std::make_shared()); EXPECT_CALL(*_children[i], execute(_, _, _)) .WillRepeatedly(DoAll(SetArgPointee<2>(_column_ids[i]), Return(Status::OK()))); - _root->add_child(_children[i].get()); + _root->add_child(_children[i]); } - _ctx = std::make_unique(_root.get()); + _ctx = std::make_shared(_root); } private: - std::unique_ptr _ctx; - std::unique_ptr _root; - std::vector> _children; + VExprContextSPtr _ctx; + std::shared_ptr _root; + std::vector> _children; std::vector _column_ids; }; @@ -81,7 +81,7 @@ TEST_F(TableFunctionTest, vexplode_outer) { init_expr_context(1); VExplodeTableFunction explode_outer; explode_outer.set_outer(); - explode_outer.set_vexpr_context(_ctx.get()); + explode_outer.set_expr_context(_ctx); // explode_outer(Array) { @@ -128,7 +128,7 @@ TEST_F(TableFunctionTest, vexplode_outer) { TEST_F(TableFunctionTest, vexplode) { init_expr_context(1); VExplodeTableFunction explode; - explode.set_vexpr_context(_ctx.get()); + explode.set_expr_context(_ctx); // explode(Array) { @@ -171,7 +171,7 @@ TEST_F(TableFunctionTest, vexplode) { TEST_F(TableFunctionTest, vexplode_numbers) { init_expr_context(1); VExplodeNumbersTableFunction tfn; - tfn.set_vexpr_context(_ctx.get()); + tfn.set_expr_context(_ctx); { InputTypeSet input_types = {TypeIndex::Int32}; @@ -187,7 +187,7 @@ TEST_F(TableFunctionTest, vexplode_numbers) { TEST_F(TableFunctionTest, vexplode_split) { init_expr_context(2); VExplodeSplitTableFunction tfn; - tfn.set_vexpr_context(_ctx.get()); + tfn.set_expr_context(_ctx); { // Case 1: explode_split(null) --- null diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CompoundPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CompoundPredicate.java index b98b982eb6..670cd7c9a8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CompoundPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CompoundPredicate.java @@ -291,7 +291,7 @@ public class CompoundPredicate extends Predicate { } @Override - public Expr replaceSubPredicate(Expr subExpr) throws AnalysisException { + public Expr replaceSubPredicate(Expr subExpr) { if (op.equals(Operator.AND)) { Expr lhs = children.get(0); Expr rhs = children.get(1); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java index 18b99065f4..8e8c213e9b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java @@ -2285,7 +2285,7 @@ public abstract class Expr extends TreeNode implements ParseNode, Cloneabl return false; } - public Expr replaceSubPredicate(Expr subExpr) throws AnalysisException { + public Expr replaceSubPredicate(Expr subExpr) { if (toSqlWithoutTbl().equals(subExpr.toSqlWithoutTbl())) { return null; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java index 2daed4e237..cd5f13cf97 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java @@ -166,7 +166,6 @@ public class LoadingTaskPlanner { fileStatusesList, filesAdded, strictMode, loadParallelism, userInfo); scanNode.init(analyzer); scanNode.finalize(analyzer); - scanNode.convertToVectorized(); scanNodes.add(scanNode); descTable.computeStatAndMemLayout(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index ac5c05eb26..8be56ffdca 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -291,7 +291,6 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor outputExprs.add(context.findSlotRef(exprId))); rootFragment.setOutputExprs(outputExprs); } - rootFragment.getPlanRoot().convertToVectorized(); for (PlanFragment fragment : context.getPlanFragments()) { fragment.finalize(null); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/FileLoadScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/FileLoadScanNode.java index 9215d409ae..ef581280c0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/FileLoadScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/FileLoadScanNode.java @@ -302,10 +302,8 @@ public class FileLoadScanNode extends FileScanNode { // Need re compute memory layout after set some slot descriptor to nullable srcTupleDesc.computeStatAndMemLayout(); - if (!preFilterConjuncts.isEmpty()) { - Expr vPreFilterExpr = convertConjunctsToAndCompoundPredicate(preFilterConjuncts); - initCompoundPredicate(vPreFilterExpr); - params.setPreFilterExprs(vPreFilterExpr.treeToThrift()); + for (Expr conjunct : preFilterConjuncts) { + params.addToPreFilterExprsList(conjunct.treeToThrift()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java index 1bbdae9d09..04a541ee4f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java @@ -53,7 +53,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; @@ -73,9 +72,7 @@ public class HashJoinNode extends JoinNodeBase { private List eqJoinConjuncts = Lists.newArrayList(); // join conjuncts from the JOIN clause that aren't equi-join predicates private List otherJoinConjuncts; - // join conjunct from the JOIN clause that aren't equi-join predicates, only use in - // vec exec engine - private Expr votherJoinConjunct = null; + private DistributionMode distrMode; private boolean isColocate = false; //the flag for colocate join private String colocateReason = ""; // if can not do colocate join, set reason here @@ -258,11 +255,6 @@ public class HashJoinNode extends JoinNodeBase { @Override protected void computeOtherConjuncts(Analyzer analyzer, ExprSubstitutionMap originToIntermediateSmap) { otherJoinConjuncts = Expr.substituteList(otherJoinConjuncts, originToIntermediateSmap, analyzer, false); - if (votherJoinConjunct != null) { - votherJoinConjunct = - Expr.substituteList(Arrays.asList(votherJoinConjunct), originToIntermediateSmap, analyzer, false) - .get(0); - } } @Override @@ -717,10 +709,6 @@ public class HashJoinNode extends JoinNodeBase { msg.hash_join_node.addToOtherJoinConjuncts(e.treeToThrift()); } - // use in vec exec engine to replace otherJoinConjuncts - if (votherJoinConjunct != null) { - msg.hash_join_node.setVotherJoinConjunct(votherJoinConjunct.treeToThrift()); - } if (hashOutputSlotIds != null) { for (SlotId slotId : hashOutputSlotIds) { msg.hash_join_node.addToHashOutputSlotIds(slotId.asInt()); @@ -829,15 +817,6 @@ public class HashJoinNode extends JoinNodeBase { } } - @Override - public void convertToVectorized() { - if (!otherJoinConjuncts.isEmpty()) { - votherJoinConjunct = convertConjunctsToAndCompoundPredicate(otherJoinConjuncts); - initCompoundPredicate(votherJoinConjunct); - } - super.convertToVectorized(); - } - /** * Used by nereids. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java b/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java index 14aafb4a84..11b9c7c4a0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java @@ -45,7 +45,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -430,9 +429,6 @@ public abstract class JoinNodeBase extends PlanNode { // 4. replace other conjuncts and conjuncts computeOtherConjuncts(analyzer, originToIntermediateSmap); conjuncts = Expr.substituteList(conjuncts, originToIntermediateSmap, analyzer, false); - if (vconjunct != null) { - vconjunct = Expr.substituteList(Arrays.asList(vconjunct), originToIntermediateSmap, analyzer, false).get(0); - } // 5. replace tuple is null expr TupleIsNullPredicate.substitueListForTupleIsNull(vSrcToOutputSMap.getLhs(), originTidsToIntermediateTidMap); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java index e26aac6653..375357e701 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java @@ -66,8 +66,6 @@ public class NestedLoopJoinNode extends JoinNodeBase { private List runtimeFilterExpr = Lists.newArrayList(); private List joinConjuncts; - private Expr vJoinConjunct; - public NestedLoopJoinNode(PlanNodeId id, PlanNode outer, PlanNode inner, TableRef innerRef) { super(id, "NESTED LOOP JOIN", StatisticalType.NESTED_LOOP_JOIN_NODE, outer, inner, innerRef); tupleIds.addAll(outer.getOutputTupleIds()); @@ -160,20 +158,6 @@ public class NestedLoopJoinNode extends JoinNodeBase { @Override protected void computeOtherConjuncts(Analyzer analyzer, ExprSubstitutionMap originToIntermediateSmap) { joinConjuncts = Expr.substituteList(joinConjuncts, originToIntermediateSmap, analyzer, false); - if (vJoinConjunct != null) { - vJoinConjunct = - Expr.substituteList(Collections.singletonList(vJoinConjunct), originToIntermediateSmap, analyzer, - false).get(0); - } - } - - @Override - public void convertToVectorized() { - if (!joinConjuncts.isEmpty()) { - vJoinConjunct = convertConjunctsToAndCompoundPredicate(joinConjuncts); - initCompoundPredicate(vJoinConjunct); - } - super.convertToVectorized(); } @Override @@ -185,8 +169,8 @@ public class NestedLoopJoinNode extends JoinNodeBase { protected void toThrift(TPlanNode msg) { msg.nested_loop_join_node = new TNestedLoopJoinNode(); msg.nested_loop_join_node.join_op = joinOp.toThrift(); - if (vJoinConjunct != null) { - msg.nested_loop_join_node.setVjoinConjunct(vJoinConjunct.treeToThrift()); + for (Expr conjunct : joinConjuncts) { + msg.nested_loop_join_node.addToJoinConjuncts(conjunct.treeToThrift()); } msg.nested_loop_join_node.setIsMark(isMarkJoin()); if (vSrcToOutputSMap != null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index af673a9847..b384375bf0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -98,6 +98,7 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -351,11 +352,12 @@ public class OlapScanNode extends ScanNode { return selectedIndexId; } - public void ignoreConjuncts(Expr whereExpr) throws AnalysisException { + public void ignoreConjuncts(Expr whereExpr) { if (whereExpr == null) { return; } - vconjunct = vconjunct.replaceSubPredicate(whereExpr); + conjuncts = conjuncts.stream().map(expr -> expr.replaceSubPredicate(whereExpr)) + .filter(Objects::nonNull).collect(Collectors.toList()); } /** @@ -1163,8 +1165,10 @@ public class OlapScanNode extends ScanNode { if (useTopnOpt) { output.append(prefix).append("TOPN OPT\n"); } - if (vconjunct != null) { - output.append(prefix).append("PREDICATES: ").append(vconjunct.toSql()).append("\n"); + + if (!conjuncts.isEmpty()) { + Expr expr = convertConjunctsToAndCompoundPredicate(conjuncts); + output.append(prefix).append("PREDICATES: ").append(expr.toSql()).append("\n"); } if (!runtimeFilters.isEmpty()) { output.append(prefix).append("runtime filters: "); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java index e322b619ab..024ea0647b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java @@ -165,11 +165,6 @@ public class OriginalPlanner extends Planner { plannerContext = new PlannerContext(analyzer, queryStmt, queryOptions, statement); singleNodePlanner = new SingleNodePlanner(plannerContext); PlanNode singleNodePlan = singleNodePlanner.createSingleNodePlan(); - // TODO change to vec should happen after distributed planner - if (VectorizedUtil.isVectorized()) { - singleNodePlan.convertToVectorized(); - } - ProjectPlanner projectPlanner = new ProjectPlanner(analyzer); projectPlanner.projectSingleNodePlan(queryStmt.getResultExprs(), singleNodePlan); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java index 4a807a6452..6b49b237c6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java @@ -101,8 +101,6 @@ public abstract class PlanNode extends TreeNode implements PlanStats { protected List conjuncts = Lists.newArrayList(); - protected Expr vconjunct = null; - // Conjuncts used to filter the original load file. // In the load execution plan, the difference between "preFilterConjuncts" and "conjuncts" is that // conjuncts are used to filter the data after column conversion and mapping, @@ -462,9 +460,6 @@ public abstract class PlanNode extends TreeNode implements PlanStats { } public void transferConjuncts(PlanNode recipient) { - recipient.vconjunct = vconjunct; - vconjunct = null; - recipient.conjuncts.addAll(conjuncts); conjuncts.clear(); } @@ -584,9 +579,9 @@ public abstract class PlanNode extends TreeNode implements PlanStats { msg.addToRowTuples(tid.asInt()); msg.addToNullableTuples(nullableTupleIds.contains(tid)); } - // `conjuncts` is never needed on vectorized engine except scan nodes which use them as push-down predicates. - if (this instanceof ScanNode || !VectorizedUtil.isVectorized()) { - for (Expr e : conjuncts) { + + for (Expr e : conjuncts) { + if (!(e instanceof BitmapFilterPredicate)) { msg.addToConjuncts(e.treeToThrift()); } } @@ -596,10 +591,6 @@ public abstract class PlanNode extends TreeNode implements PlanStats { msg.addToRuntimeFilters(filter.toThrift()); } - if (vconjunct != null) { - msg.vconjunct = vconjunct.treeToThrift(); - } - msg.compact_data = compactData; if (outputSlotIds != null) { for (SlotId slotId : outputSlotIds) { @@ -1051,28 +1042,6 @@ public abstract class PlanNode extends TreeNode implements PlanStats { return getRuntimeFilterExplainString(isBuildNode, false); } - public void convertToVectorized() { - List conjunctsExcludeBitmapFilter = Lists.newArrayList(); - for (Expr expr : conjuncts) { - if (!(expr instanceof BitmapFilterPredicate)) { - conjunctsExcludeBitmapFilter.add(expr); - } - } - if (!conjunctsExcludeBitmapFilter.isEmpty()) { - vconjunct = convertConjunctsToAndCompoundPredicate(conjunctsExcludeBitmapFilter); - initCompoundPredicate(vconjunct); - } - - if (!preFilterConjuncts.isEmpty()) { - vpreFilterConjunct = convertConjunctsToAndCompoundPredicate(preFilterConjuncts); - initCompoundPredicate(vpreFilterConjunct); - } - - for (PlanNode child : children) { - child.convertToVectorized(); - } - } - /** * If an plan node implements this method, the plan node itself supports project optimization. * @param requiredSlotIdSet: The upper plan node's requirement slot set for the current plan node. @@ -1165,8 +1134,8 @@ public abstract class PlanNode extends TreeNode implements PlanStats { return outputSlotIds; } - public void setVConjunct(Set exprs) { - vconjunct = convertConjunctsToAndCompoundPredicate(new ArrayList<>(exprs)); + public void setConjuncts(Set exprs) { + conjuncts = new ArrayList<>(exprs); } public void setCardinalityAfterFilter(long cardinalityAfterFilter) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java index 60d821166a..0ae4a35edb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java @@ -1372,16 +1372,16 @@ public class SingleNodePlanner { tupleSelectFailed = true; } else { try { - // mv index have where clause, so where expr on scan node is unused. - olapScanNode.ignoreConjuncts(olapScanNode.getOlapTable() - .getIndexMetaByIndexId(bestIndexInfo.getBestIndexId()) - .getWhereClause()); - // if the new selected index id is different from the old one, scan node will be // updated. olapScanNode.updateScanRangeInfoByNewMVSelector(bestIndexInfo.getBestIndexId(), bestIndexInfo.isPreAggregation(), bestIndexInfo.getReasonOfDisable()); + // mv index have where clause, so where expr on scan node is unused. + olapScanNode.ignoreConjuncts(olapScanNode.getOlapTable() + .getIndexMetaByIndexId(bestIndexInfo.getBestIndexId()) + .getWhereClause()); + if (selectStmt.getAggInfo() != null) { selectStmt.getAggInfo().updateTypeOfAggregateExprs(); } @@ -2207,7 +2207,9 @@ public class SingleNodePlanner { Analyzer viewAnalyzer = inlineViewRef.getAnalyzer(); Set exprs = viewAnalyzer.findMigrateFailedConjuncts(inlineViewRef); if (CollectionUtils.isNotEmpty(exprs)) { - scanNode.setVConjunct(exprs); + for (Expr expr : exprs) { + scanNode.addConjunct(expr); + } } } if (scanNode == null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java index f837665028..e5789e784f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java @@ -234,7 +234,6 @@ public class StreamLoadPlanner { scanNode.init(analyzer); scanNode.finalize(analyzer); - scanNode.convertToVectorized(); descTable.computeStatAndMemLayout(); int timeout = taskInfo.getTimeout(); diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index f538df77d9..6cbea5bae2 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -339,6 +339,7 @@ struct TFileScanRangeParams { 18: optional list column_idxs // Map of slot to its position in table schema. Only for Hive external table. 19: optional map slot_name_to_schema_pos + 20: optional list pre_filter_exprs_list } struct TFileRangeDesc { @@ -674,6 +675,8 @@ struct TNestedLoopJoinNode { 6: optional Exprs.TExpr vjoin_conjunct 7: optional bool is_mark + + 8: optional list join_conjuncts } struct TMergeJoinNode { diff --git a/regression-test/suites/query_p0/join/test_join.groovy b/regression-test/suites/query_p0/join/test_join.groovy index 1fddc4e208..e6d2906c0d 100644 --- a/regression-test/suites/query_p0/join/test_join.groovy +++ b/regression-test/suites/query_p0/join/test_join.groovy @@ -1260,11 +1260,14 @@ suite("test_join", "query,p0") { logger.info(ret.toString()) assertTrue(ret.toString().contains(" | join op: INNER JOIN(BROADCAST)")) + sql "drop table if exists `t0`" + sql "drop table if exists `t1`" + sql """ - CREATE TABLE t0(c0 BOOLEAN NOT NULL) DISTRIBUTED BY HASH (c0) BUCKETS 8 PROPERTIES ("replication_num" = "1"); + CREATE TABLE IF NOT EXISTS t0(c0 BOOLEAN NOT NULL) DISTRIBUTED BY HASH (c0) BUCKETS 8 PROPERTIES ("replication_num" = "1"); """ sql """ - CREATE TABLE t1(c0 DATETIME NOT NULL) DISTRIBUTED BY HASH (c0) BUCKETS 9 PROPERTIES ("replication_num" = "1"); + CREATE TABLE IF NOT EXISTS t1(c0 DATETIME NOT NULL) DISTRIBUTED BY HASH (c0) BUCKETS 9 PROPERTIES ("replication_num" = "1"); """ sql """INSERT INTO t1 (c0) VALUES (DATE '1970-02-15'), (DATE '1970-11-05'), (DATE '1970-07-10');""" sql """INSERT INTO t1 (c0) VALUES (DATE '1970-04-04');"""