diff --git a/be/src/exec/data_sink.cpp b/be/src/exec/data_sink.cpp index a8e88a38c3..b0d75067e0 100644 --- a/be/src/exec/data_sink.cpp +++ b/be/src/exec/data_sink.cpp @@ -110,8 +110,7 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink return Status::InternalError("Missing data buffer sink."); } - tmp_sink = new vectorized::MemoryScratchSink(row_desc, output_exprs, - thrift_sink.memory_scratch_sink, pool); + tmp_sink = new vectorized::MemoryScratchSink(row_desc, output_exprs); sink->reset(tmp_sink); break; } @@ -254,8 +253,7 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink return Status::InternalError("Missing data buffer sink."); } - tmp_sink = new vectorized::MemoryScratchSink(row_desc, output_exprs, - thrift_sink.memory_scratch_sink, pool); + tmp_sink = new vectorized::MemoryScratchSink(row_desc, output_exprs); sink->reset(tmp_sink); break; } diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index b74fbd23b1..ae6e2944a7 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -105,15 +105,21 @@ Status ExecNode::init(const TPlanNode& tnode, RuntimeState* state) { init_runtime_profile(get_name()); if (tnode.__isset.vconjunct) { - RETURN_IF_ERROR(doris::vectorized::VExpr::create_expr_tree(_pool, tnode.vconjunct, - &_vconjunct_ctx_ptr)); + vectorized::VExprContextSPtr context; + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(tnode.vconjunct, context)); + _conjuncts.emplace_back(context); + } else if (tnode.__isset.conjuncts) { + for (auto& conjunct : tnode.conjuncts) { + vectorized::VExprContextSPtr context; + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(conjunct, context)); + _conjuncts.emplace_back(context); + } } // create the projections expr if (tnode.__isset.projections) { DCHECK(tnode.__isset.output_tuple_id); - RETURN_IF_ERROR( - vectorized::VExpr::create_expr_trees(_pool, tnode.projections, &_projections)); + RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(tnode.projections, _projections)); } return Status::OK(); @@ -133,8 +139,8 @@ Status ExecNode::prepare(RuntimeState* state) { _mem_tracker = std::make_unique("ExecNode:" + _runtime_profile->name(), _runtime_profile.get(), nullptr, "PeakMemoryUsage"); - if (_vconjunct_ctx_ptr != nullptr) { - RETURN_IF_ERROR(_vconjunct_ctx_ptr->prepare(state, intermediate_row_desc())); + for (auto& conjunct : _conjuncts) { + RETURN_IF_ERROR(conjunct->prepare(state, intermediate_row_desc())); } RETURN_IF_ERROR(vectorized::VExpr::prepare(_projections, state, intermediate_row_desc())); @@ -147,8 +153,8 @@ Status ExecNode::prepare(RuntimeState* state) { } Status ExecNode::alloc_resource(doris::RuntimeState* state) { - if (_vconjunct_ctx_ptr != nullptr) { - RETURN_IF_ERROR(_vconjunct_ctx_ptr->open(state)); + for (auto& conjunct : _conjuncts) { + RETURN_IF_ERROR(conjunct->open(state)); } RETURN_IF_ERROR(vectorized::VExpr::open(_projections, state)); return Status::OK(); @@ -180,9 +186,10 @@ void ExecNode::release_resource(doris::RuntimeState* state) { COUNTER_SET(_rows_returned_counter, _num_rows_returned); } - if (_vconjunct_ctx_ptr != nullptr) { - _vconjunct_ctx_ptr->close(state); + for (auto& conjunct : _conjuncts) { + conjunct->close(state); } + vectorized::VExpr::close(_projections, state); runtime_profile()->add_to_span(_span); diff --git a/be/src/exec/exec_node.h b/be/src/exec/exec_node.h index d119609ab5..7f709de0e6 100644 --- a/be/src/exec/exec_node.h +++ b/be/src/exec/exec_node.h @@ -38,6 +38,7 @@ #include "util/runtime_profile.h" #include "util/telemetry/telemetry.h" #include "vec/core/block.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class ObjectPool; @@ -45,10 +46,6 @@ class RuntimeState; class MemTracker; class QueryStatistics; -namespace vectorized { -class VExprContext; -} // namespace vectorized - namespace pipeline { class OperatorBase; } // namespace pipeline @@ -262,14 +259,14 @@ protected: ObjectPool* _pool; std::vector _tuple_ids; - doris::vectorized::VExprContext* _vconjunct_ctx_ptr = nullptr; + vectorized::VExprContextSPtrs _conjuncts; std::vector _children; RowDescriptor _row_descriptor; vectorized::Block _origin_block; std::unique_ptr _output_row_descriptor; - std::vector _projections; + vectorized::VExprContextSPtrs _projections; /// Resource information sent from the frontend. const TBackendResourceProfile _resource_profile; diff --git a/be/src/exec/scan_node.cpp b/be/src/exec/scan_node.cpp index 00496b306f..9271c44e00 100644 --- a/be/src/exec/scan_node.cpp +++ b/be/src/exec/scan_node.cpp @@ -54,28 +54,4 @@ Status ScanNode::prepare(RuntimeState* state) { return Status::OK(); } -// This function is used to remove pushed expr in expr tree. -// It relies on the logic of function convertConjunctsToAndCompoundPredicate() of FE splicing expr. -// It requires FE to satisfy each splicing with 'and' expr, and spliced from left to right, in order. -// Expr tree specific forms do not require requirements. -void ScanNode::_peel_pushed_vconjunct(RuntimeState* state, - const std::function& checker) { - if (_vconjunct_ctx_ptr == nullptr) { - return; - } - - int leaf_index = 0; - vectorized::VExpr* conjunct_expr_root = _vconjunct_ctx_ptr->root(); - - if (conjunct_expr_root != nullptr) { - vectorized::VExpr* new_conjunct_expr_root = vectorized::VectorizedUtils::dfs_peel_conjunct( - state, _vconjunct_ctx_ptr, conjunct_expr_root, leaf_index, checker); - if (new_conjunct_expr_root == nullptr) { - _vconjunct_ctx_ptr->close(state); - } else { - _vconjunct_ctx_ptr->set_root(new_conjunct_expr_root); - } - } -} - } // namespace doris diff --git a/be/src/exec/scan_node.h b/be/src/exec/scan_node.h index 6df382a975..ddad887b66 100644 --- a/be/src/exec/scan_node.h +++ b/be/src/exec/scan_node.h @@ -100,10 +100,6 @@ public: static const std::string _s_num_disks_accessed_counter; protected: - void _peel_pushed_vconjunct( - RuntimeState* state, - const std::function& checker); // remove pushed expr from conjunct tree - RuntimeProfile::Counter* _bytes_read_counter; // # bytes read from the scanner RuntimeProfile::Counter* _rows_read_counter; // Wall based aggregate read throughput [bytes/sec] diff --git a/be/src/exec/table_connector.cpp b/be/src/exec/table_connector.cpp index 82b82e077c..667618df82 100644 --- a/be/src/exec/table_connector.cpp +++ b/be/src/exec/table_connector.cpp @@ -97,7 +97,7 @@ std::u16string TableConnector::utf8_to_u16string(const char* first, const char* } Status TableConnector::append(const std::string& table_name, vectorized::Block* block, - const std::vector& output_vexpr_ctxs, + const vectorized::VExprContextSPtrs& output_vexpr_ctxs, uint32_t start_send_row, uint32_t* num_rows_sent, TOdbcTableType::type table_type) { _insert_stmt_buffer.clear(); @@ -153,10 +153,10 @@ Status TableConnector::append(const std::string& table_name, vectorized::Block* return Status::OK(); } -Status TableConnector::oracle_type_append( - const std::string& table_name, vectorized::Block* block, - const std::vector& output_vexpr_ctxs, uint32_t start_send_row, - uint32_t* num_rows_sent, TOdbcTableType::type table_type) { +Status TableConnector::oracle_type_append(const std::string& table_name, vectorized::Block* block, + const vectorized::VExprContextSPtrs& output_vexpr_ctxs, + uint32_t start_send_row, uint32_t* num_rows_sent, + TOdbcTableType::type table_type) { fmt::format_to(_insert_stmt_buffer, "INSERT ALL "); int num_rows = block->rows(); int num_columns = block->columns(); @@ -185,10 +185,10 @@ Status TableConnector::oracle_type_append( return Status::OK(); } -Status TableConnector::sap_hana_type_append( - const std::string& table_name, vectorized::Block* block, - const std::vector& output_vexpr_ctxs, uint32_t start_send_row, - uint32_t* num_rows_sent, TOdbcTableType::type table_type) { +Status TableConnector::sap_hana_type_append(const std::string& table_name, vectorized::Block* block, + const vectorized::VExprContextSPtrs& output_vexpr_ctxs, + uint32_t start_send_row, uint32_t* num_rows_sent, + TOdbcTableType::type table_type) { fmt::format_to(_insert_stmt_buffer, "INSERT INTO {} ", table_name); int num_rows = block->rows(); int num_columns = block->columns(); diff --git a/be/src/exec/table_connector.h b/be/src/exec/table_connector.h index 8a1cdff7e3..d8c6e01075 100644 --- a/be/src/exec/table_connector.h +++ b/be/src/exec/table_connector.h @@ -29,6 +29,7 @@ #include "util/runtime_profile.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/data_types/data_type.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class RuntimeState; @@ -36,7 +37,6 @@ class TupleDescriptor; namespace vectorized { class Block; -class VExprContext; } // namespace vectorized // Table Connector for scan data from ODBC/JDBC @@ -59,9 +59,8 @@ public: //write data into table vectorized Status append(const std::string& table_name, vectorized::Block* block, - const std::vector& _output_vexpr_ctxs, - uint32_t start_send_row, uint32_t* num_rows_sent, - TOdbcTableType::type table_type = TOdbcTableType::MYSQL); + const vectorized::VExprContextSPtrs& _output_vexpr_ctxs, uint32_t start_send_row, + uint32_t* num_rows_sent, TOdbcTableType::type table_type = TOdbcTableType::MYSQL); void init_profile(RuntimeProfile*); @@ -95,11 +94,11 @@ private: // insert into tables values (...),(...); // Here we do something special for Oracle and SAP Hana. Status oracle_type_append(const std::string& table_name, vectorized::Block* block, - const std::vector& output_vexpr_ctxs, + const vectorized::VExprContextSPtrs& output_vexpr_ctxs, uint32_t start_send_row, uint32_t* num_rows_sent, TOdbcTableType::type table_type); Status sap_hana_type_append(const std::string& table_name, vectorized::Block* block, - const std::vector& output_vexpr_ctxs, + const vectorized::VExprContextSPtrs& output_vexpr_ctxs, uint32_t start_send_row, uint32_t* num_rows_sent, TOdbcTableType::type table_type); }; diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index 4191865f2a..a4829a0d05 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -162,8 +162,8 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) { } } if (t_index.__isset.where_clause) { - RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(&_obj_pool, t_index.where_clause, - &index->where_clause)); + RETURN_IF_ERROR( + vectorized::VExpr::create_expr_tree(t_index.where_clause, index->where_clause)); } _indexes.emplace_back(index); } diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h index bcfc5541eb..4b79ef6482 100644 --- a/be/src/exec/tablet_info.h +++ b/be/src/exec/tablet_info.h @@ -35,6 +35,7 @@ #include "vec/columns/column.h" #include "vec/core/block.h" #include "vec/core/column_with_type_and_name.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class MemTracker; @@ -44,17 +45,13 @@ class TabletColumn; class TabletIndex; class TupleDescriptor; -namespace vectorized { -class VExprContext; -} // namespace vectorized - struct OlapTableIndexSchema { int64_t index_id; std::vector slots; int32_t schema_hash; std::vector columns; std::vector indexes; - vectorized::VExprContext* where_clause = nullptr; + vectorized::VExprContextSPtr where_clause; void to_protobuf(POlapTableIndexSchema* pindex) const; }; diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index 13bc456a29..f8a3ffc8d4 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -192,7 +192,7 @@ PFilterType get_type(RuntimeFilterType type) { } } -Status create_literal(ObjectPool* pool, const TypeDescriptor& type, const void* data, void** expr) { +Status create_literal(const TypeDescriptor& type, const void* data, vectorized::VExprSPtr& expr) { TExprNode node; switch (type.type) { @@ -278,8 +278,7 @@ Status create_literal(ObjectPool* pool, const TypeDescriptor& type, const void* } try { - *reinterpret_cast(expr) = - pool->add(vectorized::VLiteral::create_unique(node).release()); + expr = vectorized::VLiteral::create_shared(node); } catch (const Exception& e) { return Status::Error(e.code(), e.to_string()); } @@ -287,8 +286,8 @@ Status create_literal(ObjectPool* pool, const TypeDescriptor& type, const void* return Status::OK(); } -Status create_vbin_predicate(ObjectPool* pool, const TypeDescriptor& type, TExprOpcode::type opcode, - vectorized::VExpr** expr, TExprNode* tnode) { +Status create_vbin_predicate(const TypeDescriptor& type, TExprOpcode::type opcode, + vectorized::VExprSPtr& expr, TExprNode* tnode) { TExprNode node; TScalarType tscalar_type; tscalar_type.__set_type(TPrimitiveType::BOOLEAN); @@ -344,7 +343,7 @@ Status create_vbin_predicate(ObjectPool* pool, const TypeDescriptor& type, TExpr fn.__set_has_var_args(false); node.__set_fn(fn); *tnode = node; - return vectorized::VExpr::create_expr(pool, node, expr); + return vectorized::VExpr::create_expr(node, expr); } // This class is a wrapper of runtime predicate function class RuntimePredicateWrapper { @@ -605,8 +604,8 @@ public: return 0; } - Status get_push_vexprs(std::vector* container, - vectorized::VExprContext* prob_expr); + Status get_push_exprs(std::vector* container, + const vectorized::VExprContextSPtr& prob_expr); Status merge(const RuntimePredicateWrapper* wrapper) { bool can_not_merge_in_or_bloom = _filter_type == RuntimeFilterType::IN_OR_BLOOM_FILTER && @@ -1165,20 +1164,20 @@ void IRuntimeFilter::publish_finally() { join_rpc(); } -Status IRuntimeFilter::get_push_expr_ctxs(std::vector* push_vexprs) { +Status IRuntimeFilter::get_push_expr_ctxs(std::vector* push_exprs) { DCHECK(is_consumer()); if (!_is_ignored) { _set_push_down(); _profile->add_info_string("Info", _format_status()); - return _wrapper->get_push_vexprs(push_vexprs, _vprobe_ctx); + return _wrapper->get_push_exprs(push_exprs, _vprobe_ctx); } else { _profile->add_info_string("Info", _format_status()); return Status::OK(); } } -Status IRuntimeFilter::get_prepared_vexprs(std::vector* vexprs, - const RowDescriptor& desc, RuntimeState* state) { +Status IRuntimeFilter::get_prepared_exprs(std::vector* vexprs, + const RowDescriptor& desc, RuntimeState* state) { _profile->add_info_string("Info", _format_status()); if (_is_ignored) { return Status::OK(); @@ -1190,7 +1189,7 @@ Status IRuntimeFilter::get_prepared_vexprs(std::vector* vexp std::lock_guard guard(_inner_mutex); if (_push_down_vexprs.empty()) { - RETURN_IF_ERROR(_wrapper->get_push_vexprs(&_push_down_vexprs, _vprobe_ctx)); + RETURN_IF_ERROR(_wrapper->get_push_exprs(&_push_down_vexprs, _vprobe_ctx)); } vexprs->insert(vexprs->end(), _push_down_vexprs.begin(), _push_down_vexprs.end()); return Status::OK(); @@ -1348,8 +1347,8 @@ Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQue _expr_order = desc->expr_order; _filter_id = desc->filter_id; _opt_remote_rf = desc->__isset.opt_remote_rf && desc->opt_remote_rf; - vectorized::VExprContext* build_ctx = nullptr; - RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(_pool, desc->src_expr, &build_ctx)); + vectorized::VExprContextSPtr build_ctx; + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(desc->src_expr, build_ctx)); RuntimeFilterParams params; params.filter_id = _filter_id; @@ -1373,9 +1372,9 @@ Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQue if (!desc->__isset.bitmap_target_expr) { return Status::InvalidArgument("Unknown bitmap filter target expr."); } - vectorized::VExprContext* bitmap_target_ctx = nullptr; - RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(_pool, desc->bitmap_target_expr, - &bitmap_target_ctx)); + vectorized::VExprContextSPtr bitmap_target_ctx; + RETURN_IF_ERROR( + vectorized::VExpr::create_expr_tree(desc->bitmap_target_expr, bitmap_target_ctx)); params.column_return_type = bitmap_target_ctx->root()->type().type; if (desc->__isset.bitmap_filter_not_in) { @@ -1390,7 +1389,7 @@ Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQue DCHECK(false) << "runtime filter not found node_id:" << node_id; return Status::InternalError("not found a node id"); } - RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(_pool, iter->second, &_vprobe_ctx)); + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(iter->second, _vprobe_ctx)); } if (_state) { @@ -1878,15 +1877,15 @@ Status IRuntimeFilter::consumer_close() { return Status::OK(); } -Status RuntimePredicateWrapper::get_push_vexprs(std::vector* container, - vectorized::VExprContext* vprob_expr) { +Status RuntimePredicateWrapper::get_push_exprs(std::vector* container, + const vectorized::VExprContextSPtr& prob_expr) { DCHECK(container != nullptr); DCHECK(_pool != nullptr); - DCHECK(vprob_expr->root()->type().type == _column_return_type || - (is_string_type(vprob_expr->root()->type().type) && + DCHECK(prob_expr->root()->type().type == _column_return_type || + (is_string_type(prob_expr->root()->type().type) && is_string_type(_column_return_type)) || _filter_type == RuntimeFilterType::BITMAP_FILTER) - << " vprob_expr->root()->type().type: " << vprob_expr->root()->type().type + << " prob_expr->root()->type().type: " << prob_expr->root()->type().type << " _column_return_type: " << _column_return_type << " _filter_type: " << ::doris::to_string(_filter_type); @@ -1905,47 +1904,43 @@ Status RuntimePredicateWrapper::get_push_vexprs(std::vector* node.__set_vector_opcode(to_in_opcode(_column_return_type)); node.__set_is_nullable(false); - auto in_pred = - _pool->add(vectorized::VDirectInPredicate::create_unique(node).release()); + auto in_pred = vectorized::VDirectInPredicate::create_shared(node); in_pred->set_filter(_context.hybrid_set); - auto cloned_vexpr = vprob_expr->root()->clone(_pool); - in_pred->add_child(cloned_vexpr); - auto wrapper = _pool->add( - vectorized::VRuntimeFilterWrapper::create_unique(node, in_pred).release()); + auto cloned_expr = prob_expr->root()->clone(); + in_pred->add_child(cloned_expr); + auto wrapper = vectorized::VRuntimeFilterWrapper::create_shared(node, in_pred); container->push_back(wrapper); } break; } case RuntimeFilterType::MINMAX_FILTER: { - vectorized::VExpr* max_pred = nullptr; + vectorized::VExprSPtr max_pred; // create max filter TExprNode max_pred_node; - RETURN_IF_ERROR(create_vbin_predicate(_pool, vprob_expr->root()->type(), TExprOpcode::LE, - &max_pred, &max_pred_node)); - vectorized::VExpr* max_literal = nullptr; - RETURN_IF_ERROR(create_literal(_pool, vprob_expr->root()->type(), - _context.minmax_func->get_max(), (void**)&max_literal)); - auto cloned_vexpr = vprob_expr->root()->clone(_pool); - max_pred->add_child(cloned_vexpr); + RETURN_IF_ERROR(create_vbin_predicate(prob_expr->root()->type(), TExprOpcode::LE, max_pred, + &max_pred_node)); + vectorized::VExprSPtr max_literal; + RETURN_IF_ERROR(create_literal(prob_expr->root()->type(), _context.minmax_func->get_max(), + max_literal)); + auto cloned_expr = prob_expr->root()->clone(); + max_pred->add_child(cloned_expr); max_pred->add_child(max_literal); container->push_back( - _pool->add(vectorized::VRuntimeFilterWrapper::create_unique(max_pred_node, max_pred) - .release())); + vectorized::VRuntimeFilterWrapper::create_shared(max_pred_node, max_pred)); // create min filter - vectorized::VExpr* min_pred = nullptr; + vectorized::VExprSPtr min_pred; TExprNode min_pred_node; - RETURN_IF_ERROR(create_vbin_predicate(_pool, vprob_expr->root()->type(), TExprOpcode::GE, - &min_pred, &min_pred_node)); - vectorized::VExpr* min_literal = nullptr; - RETURN_IF_ERROR(create_literal(_pool, vprob_expr->root()->type(), - _context.minmax_func->get_min(), (void**)&min_literal)); - cloned_vexpr = vprob_expr->root()->clone(_pool); - min_pred->add_child(cloned_vexpr); + RETURN_IF_ERROR(create_vbin_predicate(prob_expr->root()->type(), TExprOpcode::GE, min_pred, + &min_pred_node)); + vectorized::VExprSPtr min_literal; + RETURN_IF_ERROR(create_literal(prob_expr->root()->type(), _context.minmax_func->get_min(), + min_literal)); + cloned_expr = prob_expr->root()->clone(); + min_pred->add_child(cloned_expr); min_pred->add_child(min_literal); container->push_back( - _pool->add(vectorized::VRuntimeFilterWrapper::create_unique(min_pred_node, min_pred) - .release())); + vectorized::VRuntimeFilterWrapper::create_shared(min_pred_node, min_pred)); break; } case RuntimeFilterType::BLOOM_FILTER: { @@ -1959,12 +1954,11 @@ Status RuntimePredicateWrapper::get_push_vexprs(std::vector* node.__isset.vector_opcode = true; node.__set_vector_opcode(to_in_opcode(_column_return_type)); node.__set_is_nullable(false); - auto bloom_pred = _pool->add(vectorized::VBloomPredicate::create_unique(node).release()); + auto bloom_pred = vectorized::VBloomPredicate::create_shared(node); bloom_pred->set_filter(_context.bloom_filter_func); - auto cloned_vexpr = vprob_expr->root()->clone(_pool); - bloom_pred->add_child(cloned_vexpr); - auto wrapper = _pool->add( - vectorized::VRuntimeFilterWrapper::create_unique(node, bloom_pred).release()); + auto cloned_expr = prob_expr->root()->clone(); + bloom_pred->add_child(cloned_expr); + auto wrapper = vectorized::VRuntimeFilterWrapper::create_shared(node, bloom_pred); container->push_back(wrapper); break; } @@ -1979,12 +1973,11 @@ Status RuntimePredicateWrapper::get_push_vexprs(std::vector* node.__isset.vector_opcode = true; node.__set_vector_opcode(to_in_opcode(_column_return_type)); node.__set_is_nullable(false); - auto bitmap_pred = _pool->add(vectorized::VBitmapPredicate::create_unique(node).release()); + auto bitmap_pred = vectorized::VBitmapPredicate::create_shared(node); bitmap_pred->set_filter(_context.bitmap_filter_func); - auto cloned_vexpr = vprob_expr->root()->clone(_pool); - bitmap_pred->add_child(cloned_vexpr); - auto wrapper = _pool->add( - vectorized::VRuntimeFilterWrapper::create_unique(node, bitmap_pred).release()); + auto cloned_expr = prob_expr->root()->clone(); + bitmap_pred->add_child(cloned_expr); + auto wrapper = vectorized::VRuntimeFilterWrapper::create_shared(node, bitmap_pred); container->push_back(wrapper); break; } diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h index b1dbbfa4b2..8389e8416b 100644 --- a/be/src/exprs/runtime_filter.h +++ b/be/src/exprs/runtime_filter.h @@ -221,10 +221,10 @@ public: RuntimeFilterType type() const { return _runtime_filter_type; } - Status get_push_expr_ctxs(std::vector* push_vexprs); + Status get_push_expr_ctxs(std::vector* push_exprs); - Status get_prepared_vexprs(std::vector* push_vexprs, - const RowDescriptor& desc, RuntimeState* state); + Status get_prepared_exprs(std::vector* push_exprs, + const RowDescriptor& desc, RuntimeState* state); bool is_broadcast_join() const { return _is_broadcast_join; } @@ -381,13 +381,13 @@ protected: // this filter won't filter any data bool _always_true; - doris::vectorized::VExprContext* _vprobe_ctx = nullptr; + doris::vectorized::VExprContextSPtr _vprobe_ctx; // Indicate whether runtime filter expr has been ignored bool _is_ignored; std::string _ignored_msg; - std::vector _push_down_vexprs; + std::vector _push_down_vexprs; struct rpc_context; diff --git a/be/src/exprs/runtime_filter_slots.h b/be/src/exprs/runtime_filter_slots.h index 88fcccc47b..3367e8d44a 100644 --- a/be/src/exprs/runtime_filter_slots.h +++ b/be/src/exprs/runtime_filter_slots.h @@ -32,8 +32,8 @@ namespace doris { template class RuntimeFilterSlotsBase { public: - RuntimeFilterSlotsBase(const std::vector& prob_expr_ctxs, - const std::vector& build_expr_ctxs, + RuntimeFilterSlotsBase(const std::vector>& prob_expr_ctxs, + const std::vector>& build_expr_ctxs, const std::vector& runtime_filter_descs) : _probe_expr_context(prob_expr_ctxs), _build_expr_context(build_expr_ctxs), @@ -247,8 +247,8 @@ public: bool empty() { return !_runtime_filters.size(); } private: - const std::vector& _probe_expr_context; - const std::vector& _build_expr_context; + const std::vector>& _probe_expr_context; + const std::vector>& _build_expr_context; const std::vector& _runtime_filter_descs; // prob_contition index -> [IRuntimeFilter] std::map> _runtime_filters; diff --git a/be/src/exprs/runtime_filter_slots_cross.h b/be/src/exprs/runtime_filter_slots_cross.h index adc9fc1188..1e8c15e713 100644 --- a/be/src/exprs/runtime_filter_slots_cross.h +++ b/be/src/exprs/runtime_filter_slots_cross.h @@ -35,7 +35,7 @@ template class RuntimeFilterSlotsCross { public: RuntimeFilterSlotsCross(const std::vector& runtime_filter_descs, - const std::vector& src_expr_ctxs) + const vectorized::VExprContextSPtrs& src_expr_ctxs) : _runtime_filter_descs(runtime_filter_descs), filter_src_expr_ctxs(src_expr_ctxs) {} ~RuntimeFilterSlotsCross() = default; @@ -56,7 +56,7 @@ public: Status insert(vectorized::Block* block) { for (int i = 0; i < _runtime_filters.size(); ++i) { auto* filter = _runtime_filters[i]; - auto* vexpr_ctx = filter_src_expr_ctxs[i]; + auto& vexpr_ctx = filter_src_expr_ctxs[i]; int result_column_id = -1; RETURN_IF_ERROR(vexpr_ctx->execute(block, &result_column_id)); @@ -102,7 +102,7 @@ public: private: const std::vector& _runtime_filter_descs; - const std::vector filter_src_expr_ctxs; + const vectorized::VExprContextSPtrs filter_src_expr_ctxs; std::vector _runtime_filters; }; diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h index adb1a5feb0..f1b195f8f9 100644 --- a/be/src/olap/iterators.h +++ b/be/src/olap/iterators.h @@ -101,7 +101,8 @@ public: std::vector* read_orderby_key_columns = nullptr; io::IOContext io_ctx; vectorized::VExpr* remaining_vconjunct_root = nullptr; - vectorized::VExprContext* common_vexpr_ctxs_pushdown = nullptr; + std::vector remaining_conjunct_roots; + vectorized::VExprContextSPtrs common_expr_ctxs_push_down; const std::set* output_columns = nullptr; // runtime state RuntimeState* runtime_state = nullptr; diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 0379e33616..2a7f1d2d5d 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -417,14 +417,14 @@ Status PushBrokerReader::next(vectorized::Block* block) { Status PushBrokerReader::close() { _ready = false; - for (auto ctx : _dest_vexpr_ctx) { + for (auto ctx : _dest_expr_ctxs) { if (ctx != nullptr) { ctx->close(_runtime_state.get()); } } - if (_push_down_expr) { - _push_down_expr->close(_runtime_state.get()); + for (auto& expr : _push_down_exprs) { + expr->close(_runtime_state.get()); } for (auto& [k, v] : _slot_id_to_filter_conjuncts) { @@ -435,7 +435,7 @@ Status PushBrokerReader::close() { } } - for (auto* ctx : _not_single_slot_filter_conjuncts) { + for (auto& ctx : _not_single_slot_filter_conjuncts) { if (ctx != nullptr) { ctx->close(_runtime_state.get()); } @@ -515,7 +515,7 @@ Status PushBrokerReader::_convert_to_output_block(vectorized::Block* block) { int dest_index = ctx_idx++; vectorized::ColumnPtr column_ptr; - auto* ctx = _dest_vexpr_ctx[dest_index]; + auto& ctx = _dest_expr_ctxs[dest_index]; int result_column_id = -1; // PT1 => dest primitive type RETURN_IF_ERROR(ctx->execute(&_src_block, &result_column_id)); @@ -587,11 +587,10 @@ Status PushBrokerReader::_init_expr_ctxes() { if (!_pre_filter_texprs.empty()) { DCHECK(_pre_filter_texprs.size() == 1); - _vpre_filter_ctx_ptr.reset(new doris::vectorized::VExprContext*); - RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree( - _runtime_state->obj_pool(), _pre_filter_texprs[0], _vpre_filter_ctx_ptr.get())); - RETURN_IF_ERROR((*_vpre_filter_ctx_ptr)->prepare(_runtime_state.get(), *_row_desc)); - RETURN_IF_ERROR((*_vpre_filter_ctx_ptr)->open(_runtime_state.get())); + RETURN_IF_ERROR( + vectorized::VExpr::create_expr_tree(_pre_filter_texprs[0], _pre_filter_ctx_ptr)); + RETURN_IF_ERROR(_pre_filter_ctx_ptr->prepare(_runtime_state.get(), *_row_desc)); + RETURN_IF_ERROR(_pre_filter_ctx_ptr->open(_runtime_state.get())); } _dest_tuple_desc = _runtime_state->desc_tbl().get_tuple_descriptor(_params.dest_tuple_id); @@ -610,12 +609,11 @@ Status PushBrokerReader::_init_expr_ctxes() { slot_desc->col_name()); } - vectorized::VExprContext* ctx = nullptr; - RETURN_IF_ERROR( - vectorized::VExpr::create_expr_tree(_runtime_state->obj_pool(), it->second, &ctx)); + vectorized::VExprContextSPtr ctx; + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(it->second, ctx)); RETURN_IF_ERROR(ctx->prepare(_runtime_state.get(), *_row_desc.get())); RETURN_IF_ERROR(ctx->open(_runtime_state.get())); - _dest_vexpr_ctx.emplace_back(ctx); + _dest_expr_ctxs.emplace_back(ctx); if (has_slot_id_map) { auto it1 = _params.dest_sid_to_src_sid_without_trans.find(slot_desc->id()); if (it1 == std::end(_params.dest_sid_to_src_sid_without_trans)) { @@ -654,7 +652,7 @@ Status PushBrokerReader::_get_next_reader() { RETURN_IF_ERROR(parquet_reader->open()); std::vector place_holder; init_status = parquet_reader->init_reader( - _all_col_names, place_holder, _colname_to_value_range, _push_down_expr, + _all_col_names, place_holder, _colname_to_value_range, _push_down_exprs, _real_tuple_desc, _default_val_row_desc.get(), _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts, false); _cur_reader = std::move(parquet_reader); @@ -664,7 +662,7 @@ Status PushBrokerReader::_get_next_reader() { } std::unordered_map> partition_columns; - std::unordered_map missing_columns; + std::unordered_map missing_columns; _cur_reader->get_columns(&_name_to_col_type, &_missing_cols); _cur_reader->set_fill_columns(partition_columns, missing_columns); break; diff --git a/be/src/olap/push_handler.h b/be/src/olap/push_handler.h index de6dd857e9..204d0e73e2 100644 --- a/be/src/olap/push_handler.h +++ b/be/src/olap/push_handler.h @@ -117,8 +117,8 @@ private: std::unordered_map _name_to_col_type; std::unordered_set _missing_cols; std::unordered_map _src_block_name_to_idx; - std::vector _dest_vexpr_ctx; - std::unique_ptr _vpre_filter_ctx_ptr; + vectorized::VExprContextSPtrs _dest_expr_ctxs; + vectorized::VExprContextSPtr _pre_filter_ctx_ptr; bool _is_dynamic_schema = false; std::vector _src_slot_descs_order_by_dest; std::unordered_map _dest_slot_to_src_slot_index; @@ -142,12 +142,12 @@ private: // col names from _slot_descs std::vector _all_col_names; std::unordered_map* _colname_to_value_range; - vectorized::VExprContext* _push_down_expr = nullptr; + vectorized::VExprContextSPtrs _push_down_exprs; const std::unordered_map* _col_name_to_slot_id; // single slot filter conjuncts - std::unordered_map> _slot_id_to_filter_conjuncts; + std::unordered_map _slot_id_to_filter_conjuncts; // not single(zero or multi) slot filter conjuncts - std::vector _not_single_slot_filter_conjuncts; + vectorized::VExprContextSPtrs _not_single_slot_filter_conjuncts; // File source slot descriptors std::vector _file_slot_descs; // row desc for default exprs diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp index f3e9e07769..a0e9b989c1 100644 --- a/be/src/olap/reader.cpp +++ b/be/src/olap/reader.cpp @@ -222,7 +222,7 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params) { _reader_context.use_topn_opt = read_params.use_topn_opt; _reader_context.read_orderby_key_reverse = read_params.read_orderby_key_reverse; _reader_context.read_orderby_key_limit = read_params.read_orderby_key_limit; - _reader_context.filter_block_vconjunct_ctx_ptr = read_params.filter_block_vconjunct_ctx_ptr; + _reader_context.filter_block_conjuncts = read_params.filter_block_conjuncts; _reader_context.return_columns = &_return_columns; _reader_context.read_orderby_key_columns = _orderby_key_columns.size() > 0 ? &_orderby_key_columns : nullptr; @@ -243,8 +243,8 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params) { _reader_context.enable_unique_key_merge_on_write = tablet()->enable_unique_key_merge_on_write(); _reader_context.record_rowids = read_params.record_rowids; _reader_context.is_key_column_group = read_params.is_key_column_group; - _reader_context.remaining_vconjunct_root = read_params.remaining_vconjunct_root; - _reader_context.common_vexpr_ctxs_pushdown = read_params.common_vexpr_ctxs_pushdown; + _reader_context.remaining_conjunct_roots = read_params.remaining_conjunct_roots; + _reader_context.common_expr_ctxs_push_down = read_params.common_expr_ctxs_push_down; _reader_context.output_columns = &read_params.output_columns; return Status::OK(); diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h index 41fff12be1..95bc9d1c0d 100644 --- a/be/src/olap/reader.h +++ b/be/src/olap/reader.h @@ -138,7 +138,8 @@ public: std::unordered_set* tablet_columns_convert_to_null_set = nullptr; TPushAggOp::type push_down_agg_type_opt = TPushAggOp::NONE; vectorized::VExpr* remaining_vconjunct_root = nullptr; - vectorized::VExprContext* common_vexpr_ctxs_pushdown = nullptr; + std::vector remaining_conjunct_roots; + vectorized::VExprContextSPtrs common_expr_ctxs_push_down; // used for compaction to record row ids bool record_rowids = false; @@ -153,7 +154,7 @@ public: // limit of rows for read_orderby_key size_t read_orderby_key_limit = 0; // filter_block arguments - vectorized::VExprContext** filter_block_vconjunct_ctx_ptr = nullptr; + vectorized::VExprContextSPtrs filter_block_conjuncts; // for vertical compaction bool is_key_column_group = false; diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index 670f48d3d3..82a8b761c6 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -88,8 +88,8 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context _read_options.block_row_max = read_context->batch_size; _read_options.stats = _stats; _read_options.push_down_agg_type_opt = _context->push_down_agg_type_opt; - _read_options.remaining_vconjunct_root = _context->remaining_vconjunct_root; - _read_options.common_vexpr_ctxs_pushdown = _context->common_vexpr_ctxs_pushdown; + _read_options.remaining_conjunct_roots = _context->remaining_conjunct_roots; + _read_options.common_expr_ctxs_push_down = _context->common_expr_ctxs_push_down; _read_options.rowset_id = _rowset->rowset_id(); _read_options.version = _rowset->version(); _read_options.tablet_id = _rowset->rowset_meta()->tablet_id(); diff --git a/be/src/olap/rowset/rowset_reader_context.h b/be/src/olap/rowset/rowset_reader_context.h index 1adc63122b..a1c8650011 100644 --- a/be/src/olap/rowset/rowset_reader_context.h +++ b/be/src/olap/rowset/rowset_reader_context.h @@ -47,7 +47,7 @@ struct RowsetReaderContext { // limit of rows for read_orderby_key size_t read_orderby_key_limit = 0; // filter_block arguments - vectorized::VExprContext** filter_block_vconjunct_ctx_ptr = nullptr; + vectorized::VExprContextSPtrs filter_block_conjuncts; // projection columns: the set of columns rowset reader should return const std::vector* return_columns = nullptr; TPushAggOp::type push_down_agg_type_opt = TPushAggOp::NONE; @@ -64,8 +64,8 @@ struct RowsetReaderContext { const DeleteHandler* delete_handler = nullptr; OlapReaderStatistics* stats = nullptr; RuntimeState* runtime_state = nullptr; - vectorized::VExpr* remaining_vconjunct_root = nullptr; - vectorized::VExprContext* common_vexpr_ctxs_pushdown = nullptr; + std::vector remaining_conjunct_roots; + vectorized::VExprContextSPtrs common_expr_ctxs_push_down; bool use_page_cache = false; int sequence_id_idx = -1; int batch_size = 1024; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 4ddc567d6e..297ef34d87 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -236,11 +236,14 @@ Status SegmentIterator::init(const StorageReadOptions& opts) { _output_columns = *(opts.output_columns); } - _remaining_vconjunct_root = opts.remaining_vconjunct_root; - _common_vexpr_ctxs_pushdown = opts.common_vexpr_ctxs_pushdown; - _enable_common_expr_pushdown = _common_vexpr_ctxs_pushdown ? true : false; + _remaining_conjunct_roots = opts.remaining_conjunct_roots; + _common_expr_ctxs_push_down = opts.common_expr_ctxs_push_down; + _enable_common_expr_pushdown = !_common_expr_ctxs_push_down.empty(); _column_predicate_info.reset(new ColumnPredicateInfo()); - _calculate_pred_in_remaining_vconjunct_root(_remaining_vconjunct_root); + + for (auto& expr : _remaining_conjunct_roots) { + _calculate_pred_in_remaining_conjunct_root(expr); + } _column_predicate_info.reset(new ColumnPredicateInfo()); if (_schema.rowid_col_idx() > 0) { @@ -375,9 +378,12 @@ Status SegmentIterator::_get_row_ranges_by_column_conditions() { if (config::enable_index_apply_preds_except_leafnode_of_andnode) { RETURN_IF_ERROR(_apply_index_except_leafnode_of_andnode()); if (_can_filter_by_preds_except_leafnode_of_andnode()) { - auto res = _execute_predicates_except_leafnode_of_andnode(_remaining_vconjunct_root); - if (res.ok() && _pred_except_leafnode_of_andnode_evaluate_result.size() == 1) { - _row_bitmap &= _pred_except_leafnode_of_andnode_evaluate_result[0]; + for (auto& expr : _remaining_conjunct_roots) { + _pred_except_leafnode_of_andnode_evaluate_result.clear(); + auto res = _execute_predicates_except_leafnode_of_andnode(expr); + if (res.ok() && _pred_except_leafnode_of_andnode_evaluate_result.size() == 1) { + _row_bitmap &= _pred_except_leafnode_of_andnode_evaluate_result[0]; + } } } } @@ -521,15 +527,15 @@ bool SegmentIterator::_is_literal_node(const TExprNodeType::type& node_type) { } } -Status SegmentIterator::_extract_common_expr_columns(vectorized::VExpr* expr) { - auto children = expr->children(); +Status SegmentIterator::_extract_common_expr_columns(const vectorized::VExprSPtr& expr) { + auto& children = expr->children(); for (int i = 0; i < children.size(); ++i) { RETURN_IF_ERROR(_extract_common_expr_columns(children[i])); } auto node_type = expr->node_type(); if (node_type == TExprNodeType::SLOT_REF) { - auto slot_expr = dynamic_cast(expr); + auto slot_expr = std::dynamic_pointer_cast(expr); _is_common_expr_column[_schema.column_id(slot_expr->column_id())] = true; _common_expr_columns.insert(_schema.column_id(slot_expr->column_id())); } @@ -537,12 +543,13 @@ Status SegmentIterator::_extract_common_expr_columns(vectorized::VExpr* expr) { return Status::OK(); } -Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode(vectorized::VExpr* expr) { +Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode( + const vectorized::VExprSPtr& expr) { if (expr == nullptr) { return Status::OK(); } - auto children = expr->children(); + auto& children = expr->children(); for (int i = 0; i < children.size(); ++i) { RETURN_IF_ERROR(_execute_predicates_except_leafnode_of_andnode(children[i])); } @@ -551,7 +558,7 @@ Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode(vectorize if (node_type == TExprNodeType::SLOT_REF) { _column_predicate_info->column_name = expr->expr_name(); } else if (_is_literal_node(node_type)) { - auto v_literal_expr = dynamic_cast(expr); + auto v_literal_expr = std::dynamic_pointer_cast(expr); _column_predicate_info->query_value = v_literal_expr->value(); } else if (node_type == TExprNodeType::BINARY_PRED || node_type == TExprNodeType::MATCH_PRED) { if (node_type == TExprNodeType::MATCH_PRED) { @@ -720,7 +727,7 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() { for (auto pred : _col_preds_except_leafnode_of_andnode) { auto column_name = _schema.column(pred->column_id())->name(); - if (_remaining_vconjunct_root != nullptr && + if (!_remaining_conjunct_roots.empty() && _check_column_pred_all_push_down(column_name, true) && !pred->predicate_params()->marked_by_runtime_filter) { int32_t unique_id = _schema.unique_id(pred->column_id()); @@ -1272,8 +1279,10 @@ Status SegmentIterator::_vec_init_lazy_materialization() { // Step2: extract columns that can execute expr context _is_common_expr_column.resize(_schema.columns().size(), false); - if (_enable_common_expr_pushdown && _remaining_vconjunct_root != nullptr) { - RETURN_IF_ERROR(_extract_common_expr_columns(_remaining_vconjunct_root)); + if (_enable_common_expr_pushdown && !_remaining_conjunct_roots.empty()) { + for (auto expr : _remaining_conjunct_roots) { + RETURN_IF_ERROR(_extract_common_expr_columns(expr)); + } if (!_common_expr_columns.empty()) { _is_need_expr_eval = true; for (auto cid : _schema.column_ids()) { @@ -1888,60 +1897,15 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size, vectorized::Block* block) { SCOPED_RAW_TIMER(&_opts.stats->expr_filter_ns); - DCHECK(_remaining_vconjunct_root != nullptr); + DCHECK(!_remaining_conjunct_roots.empty()); DCHECK(block->rows() != 0); size_t prev_columns = block->columns(); - Defer defer {[&]() { vectorized::Block::erase_useless_column(block, prev_columns); }}; - int result_column_id = -1; - RETURN_IF_ERROR(_common_vexpr_ctxs_pushdown->execute(block, &result_column_id)); - vectorized::ColumnPtr filter_column = block->get_by_position(result_column_id).column; - if (auto* nullable_column = - vectorized::check_and_get_column(*filter_column)) { - vectorized::ColumnPtr nested_column = nullable_column->get_nested_column_ptr(); + vectorized::IColumn::Filter filter; + RETURN_IF_ERROR(vectorized::VExprContext::execute_conjuncts_and_filter_block( + _common_expr_ctxs_push_down, block, _columns_to_filter, prev_columns, filter)); - vectorized::MutableColumnPtr mutable_holder = - nested_column->use_count() == 1 - ? nested_column->assume_mutable() - : nested_column->clone_resized(nested_column->size()); - - vectorized::ColumnUInt8* concrete_column = - typeid_cast(mutable_holder.get()); - if (!concrete_column) { - return Status::InvalidArgument( - "Illegal type {} of column for filter. Must be UInt8 or Nullable(UInt8).", - filter_column->get_name()); - } - auto* __restrict null_map = nullable_column->get_null_map_data().data(); - vectorized::IColumn::Filter& filter = concrete_column->get_data(); - auto* __restrict filter_data = filter.data(); - - const size_t size = filter.size(); - for (size_t i = 0; i < size; ++i) { - filter_data[i] &= !null_map[i]; - } - - selected_size = _evaluate_common_expr_filter(sel_rowid_idx, selected_size, filter); - RETURN_IF_CATCH_EXCEPTION( - vectorized::Block::filter_block_internal(block, _columns_to_filter, filter)); - } else if (auto* const_column = - vectorized::check_and_get_column(*filter_column)) { - bool ret = const_column->get_bool(0); - if (!ret) { - for (auto& col : _columns_to_filter) { - std::move(*block->get_by_position(col).column).assume_mutable()->clear(); - } - selected_size = 0; - } - } else { - const vectorized::IColumn::Filter& filter = - assert_cast&>( - *filter_column) - .get_data(); - selected_size = _evaluate_common_expr_filter(sel_rowid_idx, selected_size, filter); - RETURN_IF_CATCH_EXCEPTION( - vectorized::Block::filter_block_internal(block, _columns_to_filter, filter)); - } + selected_size = _evaluate_common_expr_filter(sel_rowid_idx, selected_size, filter); return Status::OK(); } @@ -2119,7 +2083,7 @@ Status SegmentIterator::current_block_row_locations(std::vector* bl */ bool SegmentIterator::_check_column_pred_all_push_down(const std::string& column_name, bool in_compound) { - if (_remaining_vconjunct_root == nullptr) { + if (_remaining_conjunct_roots.empty()) { return true; } @@ -2139,21 +2103,22 @@ bool SegmentIterator::_check_column_pred_all_push_down(const std::string& column return true; } -void SegmentIterator::_calculate_pred_in_remaining_vconjunct_root(const vectorized::VExpr* expr) { +void SegmentIterator::_calculate_pred_in_remaining_conjunct_root( + const vectorized::VExprSPtr& expr) { if (expr == nullptr) { return; } - auto children = expr->children(); + auto& children = expr->children(); for (int i = 0; i < children.size(); ++i) { - _calculate_pred_in_remaining_vconjunct_root(children[i]); + _calculate_pred_in_remaining_conjunct_root(children[i]); } auto node_type = expr->node_type(); if (node_type == TExprNodeType::SLOT_REF) { _column_predicate_info->column_name = expr->expr_name(); } else if (_is_literal_node(node_type)) { - auto v_literal_expr = static_cast(expr); + auto v_literal_expr = static_cast(expr.get()); _column_predicate_info->query_value = v_literal_expr->value(); } else { if (node_type == TExprNodeType::MATCH_PRED) { diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index a339929100..6280dad7ef 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -187,7 +187,8 @@ private: bool _column_has_fulltext_index(int32_t unique_id); inline bool _inverted_index_not_support_pred_type(const PredicateType& type); bool _can_filter_by_preds_except_leafnode_of_andnode(); - [[nodiscard]] Status _execute_predicates_except_leafnode_of_andnode(vectorized::VExpr* expr); + [[nodiscard]] Status _execute_predicates_except_leafnode_of_andnode( + const vectorized::VExprSPtr& expr); [[nodiscard]] Status _execute_compound_fn(const std::string& function_name); bool _is_literal_node(const TExprNodeType::type& node_type); @@ -234,7 +235,7 @@ private: bool _can_evaluated_by_vectorized(ColumnPredicate* predicate); - [[nodiscard]] Status _extract_common_expr_columns(vectorized::VExpr* expr); + [[nodiscard]] Status _extract_common_expr_columns(const vectorized::VExprSPtr& expr); [[nodiscard]] Status _execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size, vectorized::Block* block); uint16_t _evaluate_common_expr_filter(uint16_t* sel_rowid_idx, uint16_t selected_size, @@ -265,7 +266,7 @@ private: // return true means one column's predicates all pushed down bool _check_column_pred_all_push_down(const std::string& column_name, bool in_compound = false); - void _calculate_pred_in_remaining_vconjunct_root(const vectorized::VExpr* expr); + void _calculate_pred_in_remaining_conjunct_root(const vectorized::VExprSPtr& expr); // todo(wb) remove this method after RowCursor is removed void _convert_rowcursor_to_short_key(const RowCursor& key, size_t num_keys) { @@ -386,9 +387,9 @@ private: // make a copy of `_opts.column_predicates` in order to make local changes std::vector _col_predicates; std::vector _col_preds_except_leafnode_of_andnode; - doris::vectorized::VExprContext* _common_vexpr_ctxs_pushdown; + vectorized::VExprContextSPtrs _common_expr_ctxs_push_down; bool _enable_common_expr_pushdown = false; - doris::vectorized::VExpr* _remaining_vconjunct_root; + std::vector _remaining_conjunct_roots; std::vector _pred_except_leafnode_of_andnode_evaluate_result; std::unique_ptr _column_predicate_info; std::unordered_map> diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 5baedefa42..48fbe5bca1 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -270,14 +270,14 @@ Status BlockChanger::change_block(vectorized::Block* ref_block, RowDescriptor(_desc_tbl.get_tuple_descriptor(_desc_tbl.get_row_tuples()[0]), false); if (_where_expr != nullptr) { - vectorized::VExprContext* ctx = nullptr; - RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(&pool, *_where_expr, &ctx)); + vectorized::VExprContextSPtr ctx = nullptr; + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(*_where_expr, ctx)); Defer defer {[&]() { ctx->close(state); }}; RETURN_IF_ERROR(ctx->prepare(state, row_desc)); RETURN_IF_ERROR(ctx->open(state)); RETURN_IF_ERROR( - vectorized::VExprContext::filter_block(ctx, ref_block, ref_block->columns())); + vectorized::VExprContext::filter_block(ctx.get(), ref_block, ref_block->columns())); } const int row_size = ref_block->rows(); @@ -302,9 +302,8 @@ Status BlockChanger::change_block(vectorized::Block* ref_block, value->ptr(), column, row_size); } } else if (_schema_mapping[idx].expr != nullptr) { - vectorized::VExprContext* ctx = nullptr; - RETURN_IF_ERROR( - vectorized::VExpr::create_expr_tree(&pool, *_schema_mapping[idx].expr, &ctx)); + vectorized::VExprContextSPtr ctx; + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(*_schema_mapping[idx].expr, ctx)); Defer defer {[&]() { ctx->close(state); }}; RETURN_IF_ERROR(ctx->prepare(state, row_desc)); RETURN_IF_ERROR(ctx->open(state)); diff --git a/be/src/runtime/fold_constant_executor.cpp b/be/src/runtime/fold_constant_executor.cpp index 36d63a12a1..5e8932ff01 100644 --- a/be/src/runtime/fold_constant_executor.cpp +++ b/be/src/runtime/fold_constant_executor.cpp @@ -79,15 +79,15 @@ Status FoldConstantExecutor::fold_constant_vexpr(const TFoldConstantParams& para for (const auto& m : expr_map) { PExprResultMap pexpr_result_map; for (const auto& n : m.second) { - vectorized::VExprContext* ctx = nullptr; + vectorized::VExprContextSPtr ctx; const TExpr& texpr = n.second; // create expr tree from TExpr - RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(&_pool, texpr, &ctx)); + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(texpr, ctx)); // close context expr Defer defer {[&]() { ctx->close(_runtime_state.get()); }}; // prepare and open context - RETURN_IF_ERROR(_prepare_and_open(ctx)); + RETURN_IF_ERROR(_prepare_and_open(ctx.get())); vectorized::Block tmp_block; tmp_block.insert({vectorized::ColumnUInt8::create(1), diff --git a/be/src/service/point_query_executor.cpp b/be/src/service/point_query_executor.cpp index 45e155b622..4e31442896 100644 --- a/be/src/service/point_query_executor.cpp +++ b/be/src/service/point_query_executor.cpp @@ -47,7 +47,7 @@ namespace doris { Reusable::~Reusable() { - for (vectorized::VExprContext* ctx : _output_exprs_ctxs) { + for (auto& ctx : _output_exprs_ctxs) { ctx->close(_runtime_state.get()); } } @@ -62,8 +62,7 @@ Status Reusable::init(const TDescriptorTable& t_desc_tbl, const std::vectorslots(), 10); } - RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(_runtime_state->obj_pool(), output_exprs, - &_output_exprs_ctxs)); + RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(output_exprs, _output_exprs_ctxs)); RowDescriptor row_desc(tuple_desc(), false); // Prepare the exprs to run. RETURN_IF_ERROR(vectorized::VExpr::prepare(_output_exprs_ctxs, _runtime_state.get(), row_desc)); diff --git a/be/src/service/point_query_executor.h b/be/src/service/point_query_executor.h index 3b1214d3e6..1f24177dcc 100644 --- a/be/src/service/point_query_executor.h +++ b/be/src/service/point_query_executor.h @@ -51,6 +51,7 @@ #include "util/slice.h" #include "vec/core/block.h" #include "vec/data_types/serde/data_type_serde.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { @@ -60,10 +61,6 @@ class RuntimeState; class TDescriptorTable; class TExpr; -namespace vectorized { -class VExprContext; -} // namespace vectorized - // For caching point lookup pre allocted blocks and exprs class Reusable { public: @@ -89,7 +86,7 @@ public: TupleDescriptor* tuple_desc() { return _desc_tbl->get_tuple_descriptor(0); } - const std::vector& output_exprs() { return _output_exprs_ctxs; } + const vectorized::VExprContextSPtrs& output_exprs() { return _output_exprs_ctxs; } private: // caching TupleDescriptor, output_expr, etc... @@ -98,7 +95,7 @@ private: std::mutex _block_mutex; // prevent from allocte too many tmp blocks std::vector> _block_pool; - std::vector _output_exprs_ctxs; + vectorized::VExprContextSPtrs _output_exprs_ctxs; int64_t _create_timestamp = 0; vectorized::DataTypeSerDeSPtrs _data_type_serdes; std::unordered_map _col_uid_to_idx; diff --git a/be/src/vec/common/sort/vsort_exec_exprs.cpp b/be/src/vec/common/sort/vsort_exec_exprs.cpp index 536914770d..a8dd70ae1c 100644 --- a/be/src/vec/common/sort/vsort_exec_exprs.cpp +++ b/be/src/vec/common/sort/vsort_exec_exprs.cpp @@ -27,9 +27,6 @@ namespace doris { class ObjectPool; class RowDescriptor; class RuntimeState; -namespace vectorized { -class VExprContext; -} // namespace vectorized } // namespace doris namespace doris::vectorized { @@ -50,19 +47,19 @@ Status VSortExecExprs::init(const TSortInfo& sort_info, ObjectPool* pool) { Status VSortExecExprs::init(const std::vector& ordering_exprs, const std::vector* sort_tuple_slot_exprs, ObjectPool* pool) { - RETURN_IF_ERROR(VExpr::create_expr_trees(pool, ordering_exprs, &_lhs_ordering_expr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(ordering_exprs, _lhs_ordering_expr_ctxs)); if (sort_tuple_slot_exprs != NULL) { _materialize_tuple = true; - RETURN_IF_ERROR(VExpr::create_expr_trees(pool, *sort_tuple_slot_exprs, - &_sort_tuple_slot_expr_ctxs)); + RETURN_IF_ERROR( + VExpr::create_expr_trees(*sort_tuple_slot_exprs, _sort_tuple_slot_expr_ctxs)); } else { _materialize_tuple = false; } return Status::OK(); } -Status VSortExecExprs::init(const std::vector& lhs_ordering_expr_ctxs, - const std::vector& rhs_ordering_expr_ctxs) { +Status VSortExecExprs::init(const VExprContextSPtrs& lhs_ordering_expr_ctxs, + const VExprContextSPtrs& rhs_ordering_expr_ctxs) { _lhs_ordering_expr_ctxs = lhs_ordering_expr_ctxs; _rhs_ordering_expr_ctxs = rhs_ordering_expr_ctxs; return Status::OK(); @@ -83,7 +80,7 @@ Status VSortExecExprs::open(RuntimeState* state) { } RETURN_IF_ERROR(VExpr::open(_lhs_ordering_expr_ctxs, state)); RETURN_IF_ERROR( - VExpr::clone_if_not_exists(_lhs_ordering_expr_ctxs, state, &_rhs_ordering_expr_ctxs)); + VExpr::clone_if_not_exists(_lhs_ordering_expr_ctxs, state, _rhs_ordering_expr_ctxs)); return Status::OK(); } diff --git a/be/src/vec/common/sort/vsort_exec_exprs.h b/be/src/vec/common/sort/vsort_exec_exprs.h index 14ce9ffbcb..8179c93488 100644 --- a/be/src/vec/common/sort/vsort_exec_exprs.h +++ b/be/src/vec/common/sort/vsort_exec_exprs.h @@ -20,6 +20,7 @@ #include #include "common/status.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { @@ -36,7 +37,6 @@ class TSortInfo; // If _materialize_tuple is true, SortExecExprs also stores the slot expressions used to // materialize the sort tuples. namespace vectorized { -class VExprContext; class VSortExecExprs { public: @@ -53,19 +53,15 @@ public: // close all expressions used for sorting and tuple materialization. void close(RuntimeState* state); - const std::vector& sort_tuple_slot_expr_ctxs() const { + const VExprContextSPtrs& sort_tuple_slot_expr_ctxs() const { return _sort_tuple_slot_expr_ctxs; } // Can only be used after calling prepare() - const std::vector& lhs_ordering_expr_ctxs() const { - return _lhs_ordering_expr_ctxs; - } + const VExprContextSPtrs& lhs_ordering_expr_ctxs() const { return _lhs_ordering_expr_ctxs; } // Can only be used after calling open() - const std::vector& rhs_ordering_expr_ctxs() const { - return _rhs_ordering_expr_ctxs; - } + const VExprContextSPtrs& rhs_ordering_expr_ctxs() const { return _rhs_ordering_expr_ctxs; } bool need_materialize_tuple() const { return _materialize_tuple; } @@ -75,8 +71,8 @@ public: private: // Create two VExprContexts for evaluating over the TupleRows. - std::vector _lhs_ordering_expr_ctxs; - std::vector _rhs_ordering_expr_ctxs; + VExprContextSPtrs _lhs_ordering_expr_ctxs; + VExprContextSPtrs _rhs_ordering_expr_ctxs; // If true, the tuples to be sorted are materialized by // _sort_tuple_slot_exprs before the actual sort is performed. @@ -85,7 +81,7 @@ private: // Expressions used to materialize slots in the tuples to be sorted. // One expr per slot in the materialized tuple. Valid only if // _materialize_tuple is true. - std::vector _sort_tuple_slot_expr_ctxs; + VExprContextSPtrs _sort_tuple_slot_expr_ctxs; // for some reason, _sort_tuple_slot_expr_ctxs is not-null but _lhs_ordering_expr_ctxs is nullable // this flag list would be used to convert column to nullable. @@ -94,8 +90,8 @@ private: // Initialize directly from already-created VExprContexts. Callers should manually call // Prepare(), Open(), and Close() on input VExprContexts (instead of calling the // analogous functions in this class). Used for testing. - Status init(const std::vector& lhs_ordering_expr_ctxs, - const std::vector& rhs_ordering_expr_ctxs); + Status init(const VExprContextSPtrs& lhs_ordering_expr_ctxs, + const VExprContextSPtrs& rhs_ordering_expr_ctxs); // Initialize the ordering and (optionally) materialization expressions from the thrift // TExprs into the specified pool. sort_tuple_slot_exprs is NULL if the tuple is not diff --git a/be/src/vec/core/sort_cursor.h b/be/src/vec/core/sort_cursor.h index 8c6accaba8..e1efe7cd01 100644 --- a/be/src/vec/core/sort_cursor.h +++ b/be/src/vec/core/sort_cursor.h @@ -208,7 +208,7 @@ using BlockSupplier = std::function; struct BlockSupplierSortCursorImpl : public MergeSortCursorImpl { BlockSupplierSortCursorImpl(const BlockSupplier& block_supplier, - const std::vector& ordering_expr, + const VExprContextSPtrs& ordering_expr, const std::vector& is_asc_order, const std::vector& nulls_first) : _ordering_expr(ordering_expr), _block_supplier(block_supplier) { @@ -266,7 +266,7 @@ struct BlockSupplierSortCursorImpl : public MergeSortCursorImpl { return _block.clone_with_columns(std::move(columns)); } - std::vector _ordering_expr; + VExprContextSPtrs _ordering_expr; Block _block; BlockSupplier _block_supplier {}; bool _is_eof = false; diff --git a/be/src/vec/exec/format/generic_reader.h b/be/src/vec/exec/format/generic_reader.h index fc09a23710..d83cc1d2ce 100644 --- a/be/src/vec/exec/format/generic_reader.h +++ b/be/src/vec/exec/format/generic_reader.h @@ -56,7 +56,7 @@ public: virtual Status set_fill_columns( const std::unordered_map>& partition_columns, - const std::unordered_map& missing_columns) { + const std::unordered_map& missing_columns) { return Status::OK(); } diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index f6f9d136e2..b414247c7c 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -241,10 +241,10 @@ Status OrcReader::_create_file_reader() { Status OrcReader::init_reader( std::unordered_map* colname_to_value_range, - VExprContext* vconjunct_ctx) { + VExprContextSPtrs& conjuncts) { _colname_to_value_range = colname_to_value_range; - _lazy_read_ctx.vconjunct_ctx = vconjunct_ctx; _text_converter.reset(new TextConverter('\\')); + _lazy_read_ctx.conjuncts = conjuncts; SCOPED_RAW_TIMER(&_statistics.parse_meta_time); RETURN_IF_ERROR(_create_file_reader()); RETURN_IF_ERROR(_init_read_columns()); @@ -622,7 +622,7 @@ bool OrcReader::_init_search_argument( Status OrcReader::set_fill_columns( const std::unordered_map>& partition_columns, - const std::unordered_map& missing_columns) { + const std::unordered_map& missing_columns) { SCOPED_RAW_TIMER(&_statistics.parse_meta_time); // std::unordered_map> @@ -642,28 +642,29 @@ Status OrcReader::set_fill_columns( return; } else if (VRuntimeFilterWrapper* runtime_filter = typeid_cast(expr)) { - VExpr* filter_impl = const_cast(runtime_filter->get_impl()); + auto filter_impl = const_cast(runtime_filter->get_impl().get()); if (VBloomPredicate* bloom_predicate = typeid_cast(filter_impl)) { - for (VExpr* child : bloom_predicate->children()) { - visit_slot(child); + for (auto& child : bloom_predicate->children()) { + visit_slot(child.get()); } } else if (VInPredicate* in_predicate = typeid_cast(filter_impl)) { if (in_predicate->children().size() > 0) { - visit_slot(in_predicate->children()[0]); + visit_slot(in_predicate->children()[0].get()); } } else { - for (VExpr* child : filter_impl->children()) { - visit_slot(child); + for (auto& child : filter_impl->children()) { + visit_slot(child.get()); } } } else { - for (VExpr* child : expr->children()) { - visit_slot(child); + for (auto& child : expr->children()) { + visit_slot(child.get()); } } }; - if (_lazy_read_ctx.vconjunct_ctx != nullptr) { - visit_slot(_lazy_read_ctx.vconjunct_ctx->root()); + + for (auto& conjunct : _lazy_read_ctx.conjuncts) { + visit_slot(conjunct->root().get()); } for (auto& read_col : _read_cols_lower_case) { @@ -778,7 +779,7 @@ Status OrcReader::_fill_partition_columns( Status OrcReader::_fill_missing_columns( Block* block, size_t rows, - const std::unordered_map& missing_columns) { + const std::unordered_map& missing_columns) { for (auto& kv : missing_columns) { if (kv.second == nullptr) { // no default column, fill with null @@ -787,7 +788,7 @@ Status OrcReader::_fill_missing_columns( nullable_column->insert_many_defaults(rows); } else { // fill with default value - auto* ctx = kv.second; + auto& ctx = kv.second; auto origin_column_num = block->columns(); int result_column_id = -1; // PT1 => dest primitive type @@ -1237,18 +1238,14 @@ Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { _fill_partition_columns(block, *read_rows, _lazy_read_ctx.partition_columns)); RETURN_IF_ERROR(_fill_missing_columns(block, *read_rows, _lazy_read_ctx.missing_columns)); - if (_lazy_read_ctx.vconjunct_ctx != nullptr) { - std::vector columns_to_filter; + if (!_lazy_read_ctx.conjuncts.empty()) { int column_to_keep = block->columns(); - columns_to_filter.resize(column_to_keep); - for (uint32_t i = 0; i < column_to_keep; ++i) { - columns_to_filter[i] = i; + VExprContextSPtrs filter_conjuncts; + for (auto& conjunct : _lazy_read_ctx.conjuncts) { + filter_conjuncts.push_back(conjunct); } - std::vector filter_conjuncts; - filter_conjuncts.push_back(_lazy_read_ctx.vconjunct_ctx); - RETURN_IF_CATCH_EXCEPTION( - RETURN_IF_ERROR(VExprContext::execute_conjuncts_and_filter_block( - filter_conjuncts, nullptr, block, columns_to_filter, column_to_keep))); + RETURN_IF_ERROR( + VExprContext::filter_block(_lazy_read_ctx.conjuncts, block, column_to_keep)); } } return Status::OK(); @@ -1291,8 +1288,10 @@ Status OrcReader::filter(orc::ColumnVectorBatch& data, uint16_t* sel, uint16_t s _filter.reset(new IColumn::Filter(size, 1)); auto* __restrict result_filter_data = _filter->data(); bool can_filter_all = false; - std::vector filter_conjuncts; - filter_conjuncts.push_back(_lazy_read_ctx.vconjunct_ctx); + VExprContextSPtrs filter_conjuncts; + for (auto& conjunct : _lazy_read_ctx.conjuncts) { + filter_conjuncts.push_back(conjunct); + } RETURN_IF_CATCH_EXCEPTION(RETURN_IF_ERROR(VExprContext::execute_conjuncts( filter_conjuncts, nullptr, block, _filter.get(), &can_filter_all))); diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 4af091981e..52a230147a 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -90,7 +90,7 @@ struct OrcPredicate { }; struct LazyReadContext { - VExprContext* vconjunct_ctx = nullptr; + VExprContextSPtrs conjuncts; bool can_lazy_read = false; // block->rows() returns the number of rows of the first column, // so we should check and resize the first column @@ -109,9 +109,9 @@ struct LazyReadContext { // lazy read partition columns or all partition columns std::unordered_map> partition_columns; - std::unordered_map predicate_missing_columns; + std::unordered_map predicate_missing_columns; // lazy read missing columns or all missing columns - std::unordered_map missing_columns; + std::unordered_map missing_columns; }; class OrcReader : public GenericReader { @@ -142,12 +142,12 @@ public: Status init_reader( std::unordered_map* colname_to_value_range, - VExprContext* vconjunct_ctx); + VExprContextSPtrs& conjuncts); Status set_fill_columns( const std::unordered_map>& partition_columns, - const std::unordered_map& missing_columns) override; + const std::unordered_map& missing_columns) override; Status _init_select_types(const orc::Type& type, int idx); @@ -157,7 +157,7 @@ public: partition_columns); Status _fill_missing_columns( Block* block, size_t rows, - const std::unordered_map& missing_columns); + const std::unordered_map& missing_columns); Status get_next_block(Block* block, size_t* read_rows, bool* eof) override; diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index abda93afde..f04f46c9a2 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -97,7 +97,7 @@ RowGroupReader::RowGroupReader(io::FileReaderSPtr file_reader, RowGroupReader::~RowGroupReader() { _column_readers.clear(); - for (auto* ctx : _dict_filter_conjuncts) { + for (auto& ctx : _dict_filter_conjuncts) { if (ctx) { ctx->close(_state); } @@ -110,8 +110,8 @@ Status RowGroupReader::init( std::unordered_map& col_offsets, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, - const std::vector* not_single_slot_filter_conjuncts, - const std::unordered_map>* slot_id_to_filter_conjuncts) { + const VExprContextSPtrs* not_single_slot_filter_conjuncts, + const std::unordered_map* slot_id_to_filter_conjuncts) { _tuple_descriptor = tuple_descriptor; _row_descriptor = row_descriptor; _col_name_to_slot_id = colname_to_slot_id; @@ -162,7 +162,7 @@ Status RowGroupReader::init( } else { if (_slot_id_to_filter_conjuncts->find(slot_id) != _slot_id_to_filter_conjuncts->end()) { - for (VExprContext* ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { + for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { _filter_conjuncts.push_back(ctx); } } @@ -174,7 +174,7 @@ Status RowGroupReader::init( auto& [value, slot_desc] = kv.second; auto iter = _slot_id_to_filter_conjuncts->find(slot_desc->id()); if (iter != _slot_id_to_filter_conjuncts->end()) { - for (VExprContext* ctx : iter->second) { + for (auto& ctx : iter->second) { _filter_conjuncts.push_back(ctx); } } @@ -206,8 +206,8 @@ bool RowGroupReader::_can_filter_by_dict(int slot_id, } // TODO:check expr like 'a > 10 is null', 'a > 10' should can be filter by dict. - for (VExprContext* ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { - const VExpr* root_expr = ctx->root(); + for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { + const auto& root_expr = ctx->root(); if (root_expr->node_type() == TExprNodeType::FUNCTION_CALL) { std::string is_null_str; std::string function_name = root_expr->fn().name.function_name; @@ -296,8 +296,7 @@ Status RowGroupReader::next_batch(Block* block, size_t batch_size, size_t* read_ _fill_partition_columns(block, *read_rows, _lazy_read_ctx.partition_columns)); RETURN_IF_ERROR(_fill_missing_columns(block, *read_rows, _lazy_read_ctx.missing_columns)); - Status st = - VExprContext::filter_block(_lazy_read_ctx.vconjunct_ctx, block, block->columns()); + Status st = VExprContext::filter_block(_lazy_read_ctx.conjuncts, block, block->columns()); *read_rows = block->rows(); return st; } @@ -326,11 +325,12 @@ Status RowGroupReader::next_batch(Block* block, size_t batch_size, size_t* read_ for (uint32_t i = 0; i < column_to_keep; ++i) { columns_to_filter[i] = i; } - if (_lazy_read_ctx.vconjunct_ctx != nullptr) { + if (!_lazy_read_ctx.conjuncts.empty()) { std::vector filters; if (_position_delete_ctx.has_filter) { filters.push_back(_pos_delete_filter_ptr.get()); } + RETURN_IF_CATCH_EXCEPTION( RETURN_IF_ERROR(VExprContext::execute_conjuncts_and_filter_block( _filter_conjuncts, &filters, block, columns_to_filter, @@ -441,7 +441,12 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re if (_position_delete_ctx.has_filter) { filters.push_back(_pos_delete_filter_ptr.get()); } - RETURN_IF_ERROR(VExprContext::execute_conjuncts(_filter_conjuncts, &filters, block, + + VExprContextSPtrs filter_contexts; + for (auto& conjunct : _filter_conjuncts) { + filter_contexts.emplace_back(conjunct.get()); + } + RETURN_IF_ERROR(VExprContext::execute_conjuncts(filter_contexts, &filters, block, &result_filter, &can_filter_all)); if (_lazy_read_ctx.resize_first_column) { @@ -589,7 +594,7 @@ Status RowGroupReader::_fill_partition_columns( Status RowGroupReader::_fill_missing_columns( Block* block, size_t rows, - const std::unordered_map& missing_columns) { + const std::unordered_map& missing_columns) { for (auto& kv : missing_columns) { if (kv.second == nullptr) { // no default column, fill with null @@ -598,7 +603,7 @@ Status RowGroupReader::_fill_missing_columns( nullable_column->insert_many_defaults(rows); } else { // fill with default value - auto* ctx = kv.second; + auto& ctx = kv.second; auto origin_column_num = block->columns(); int result_column_id = -1; // PT1 => dest primitive type @@ -761,10 +766,13 @@ Status RowGroupReader::_rewrite_dict_predicates() { } // 2.2 Execute conjuncts and filter block. - const std::vector* ctxs = nullptr; + VExprContextSPtrs ctxs; auto iter = _slot_id_to_filter_conjuncts->find(slot_id); if (iter != _slot_id_to_filter_conjuncts->end()) { - ctxs = &(iter->second); + for (auto& ctx : iter->second) { + ctxs.emplace_back(ctx.get()); + _filter_conjuncts.push_back(ctx); + } } else { std::stringstream msg; msg << "_slot_id_to_filter_conjuncts: slot_id [" << slot_id << "] not found"; @@ -779,7 +787,7 @@ Status RowGroupReader::_rewrite_dict_predicates() { temp_block.get_by_position(0).column->assume_mutable()->resize(dict_value_column_size); } RETURN_IF_CATCH_EXCEPTION(RETURN_IF_ERROR(VExprContext::execute_conjuncts_and_filter_block( - *ctxs, nullptr, &temp_block, columns_to_filter, column_to_keep))); + ctxs, nullptr, &temp_block, columns_to_filter, column_to_keep))); if (dict_pos != 0) { // We have to clean the first column to insert right data. temp_block.get_by_position(0).column->assume_mutable()->clear(); @@ -795,9 +803,6 @@ Status RowGroupReader::_rewrite_dict_predicates() { // About Performance: if dict_column size is too large, it will generate a large IN filter. if (dict_column->size() > MAX_DICT_CODE_PREDICATE_TO_REWRITE) { - for (auto& ctx : (*ctxs)) { - _filter_conjuncts.push_back(ctx); - } it = _dict_filter_cols.erase(it); continue; } @@ -825,7 +830,7 @@ Status RowGroupReader::_rewrite_dict_predicates() { Status RowGroupReader::_rewrite_dict_conjuncts(std::vector& dict_codes, int slot_id, bool is_nullable) { - VExpr* root; + VExprSPtr root; if (dict_codes.size() == 1) { { TFunction fn; @@ -850,7 +855,7 @@ Status RowGroupReader::_rewrite_dict_conjuncts(std::vector& dict_codes, texpr_node.__set_child_type(TPrimitiveType::INT); texpr_node.__set_num_children(2); texpr_node.__set_is_nullable(is_nullable); - root = _obj_pool->add(VectorizedFnCall::create_unique(texpr_node).release()); + root = VectorizedFnCall::create_shared(texpr_node); } { SlotDescriptor* slot = nullptr; @@ -861,8 +866,7 @@ Status RowGroupReader::_rewrite_dict_conjuncts(std::vector& dict_codes, break; } } - VExpr* slot_ref_expr = _obj_pool->add(VSlotRef::create_unique(slot).release()); - root->add_child(slot_ref_expr); + root->add_child(VSlotRef::create_shared(slot)); } { TExprNode texpr_node; @@ -872,8 +876,7 @@ Status RowGroupReader::_rewrite_dict_conjuncts(std::vector& dict_codes, int_literal.__set_value(dict_codes[0]); texpr_node.__set_int_literal(int_literal); texpr_node.__set_is_nullable(is_nullable); - VExpr* literal_expr = _obj_pool->add(VLiteral::create_unique(texpr_node).release()); - root->add_child(literal_expr); + root->add_child(VLiteral::create_shared(texpr_node)); } } else { { @@ -888,13 +891,13 @@ Status RowGroupReader::_rewrite_dict_conjuncts(std::vector& dict_codes, // VdirectInPredicate assume is_nullable = false. node.__set_is_nullable(false); - root = _obj_pool->add(vectorized::VDirectInPredicate::create_unique(node).release()); + root = vectorized::VDirectInPredicate::create_shared(node); std::shared_ptr hybrid_set( create_set(PrimitiveType::TYPE_INT, dict_codes.size())); for (int j = 0; j < dict_codes.size(); ++j) { hybrid_set->insert(&dict_codes[j]); } - static_cast(root)->set_filter(hybrid_set); + static_cast(root.get())->set_filter(hybrid_set); } { SlotDescriptor* slot = nullptr; @@ -905,12 +908,10 @@ Status RowGroupReader::_rewrite_dict_conjuncts(std::vector& dict_codes, break; } } - VExpr* slot_ref_expr = _obj_pool->add(VSlotRef::create_unique(slot).release()); - root->add_child(slot_ref_expr); + root->add_child(VSlotRef::create_shared(slot)); } } - VExprContext* rewritten_conjunct_ctx = - _obj_pool->add(VExprContext::create_unique(root).release()); + VExprContextSPtr rewritten_conjunct_ctx = VExprContext::create_shared(root); RETURN_IF_ERROR(rewritten_conjunct_ctx->prepare(_state, *_row_descriptor)); RETURN_IF_ERROR(rewritten_conjunct_ctx->open(_state)); _dict_filter_conjuncts.push_back(rewritten_conjunct_ctx); diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.h b/be/src/vec/exec/format/parquet/vparquet_group_reader.h index a6af66d7a3..393f738857 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.h @@ -32,6 +32,7 @@ #include "vec/columns/column.h" #include "vec/common/allocator.h" #include "vec/exec/format/parquet/parquet_common.h" +#include "vec/exprs/vexpr_fwd.h" #include "vparquet_column_reader.h" namespace cctz { @@ -50,7 +51,6 @@ class IOContext; namespace vectorized { class Block; class FieldDescriptor; -class VExprContext; } // namespace vectorized } // namespace doris namespace tparquet { @@ -77,7 +77,7 @@ public: }; struct LazyReadContext { - VExprContext* vconjunct_ctx = nullptr; + VExprContextSPtrs conjuncts; bool can_lazy_read = false; // block->rows() returns the number of rows of the first column, // so we should check and resize the first column @@ -95,9 +95,9 @@ public: // lazy read partition columns or all partition columns std::unordered_map> partition_columns; - std::unordered_map predicate_missing_columns; + std::unordered_map predicate_missing_columns; // lazy read missing columns or all missing columns - std::unordered_map missing_columns; + std::unordered_map missing_columns; }; /** @@ -148,13 +148,12 @@ public: const LazyReadContext& lazy_read_ctx, RuntimeState* state); ~RowGroupReader(); - Status init( - const FieldDescriptor& schema, std::vector& row_ranges, - std::unordered_map& col_offsets, - const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, - const std::unordered_map* colname_to_slot_id, - const std::vector* not_single_slot_filter_conjuncts, - const std::unordered_map>* slot_id_to_filter_conjuncts); + Status init(const FieldDescriptor& schema, std::vector& row_ranges, + std::unordered_map& col_offsets, + const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, + const std::unordered_map* colname_to_slot_id, + const VExprContextSPtrs* not_single_slot_filter_conjuncts, + const std::unordered_map* slot_id_to_filter_conjuncts); Status next_batch(Block* block, size_t batch_size, size_t* read_rows, bool* batch_eof); int64_t lazy_read_filtered_rows() const { return _lazy_read_filtered_rows; } @@ -175,7 +174,7 @@ private: partition_columns); Status _fill_missing_columns( Block* block, size_t rows, - const std::unordered_map& missing_columns); + const std::unordered_map& missing_columns); Status _build_pos_delete_filter(size_t read_rows); Status _filter_block(Block* block, int column_to_keep, const std::vector& columns_to_filter); @@ -210,9 +209,9 @@ private: const TupleDescriptor* _tuple_descriptor; const RowDescriptor* _row_descriptor; const std::unordered_map* _col_name_to_slot_id; - const std::unordered_map>* _slot_id_to_filter_conjuncts; - std::vector _dict_filter_conjuncts; - std::vector _filter_conjuncts; + const std::unordered_map* _slot_id_to_filter_conjuncts; + VExprContextSPtrs _dict_filter_conjuncts; + VExprContextSPtrs _filter_conjuncts; // std::pair std::vector> _dict_filter_cols; RuntimeState* _state; diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_reader.cpp index a9bf8dc6c8..23e1fcf7d9 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp @@ -283,11 +283,11 @@ Status ParquetReader::init_reader( const std::vector& all_column_names, const std::vector& missing_column_names, std::unordered_map* colname_to_value_range, - VExprContext* vconjunct_ctx, const TupleDescriptor* tuple_descriptor, + const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, - const std::vector* not_single_slot_filter_conjuncts, - const std::unordered_map>* slot_id_to_filter_conjuncts, + const VExprContextSPtrs* not_single_slot_filter_conjuncts, + const std::unordered_map* slot_id_to_filter_conjuncts, bool filter_groups) { _tuple_descriptor = tuple_descriptor; _row_descriptor = row_descriptor; @@ -325,7 +325,7 @@ Status ParquetReader::init_reader( _colname_to_value_range = colname_to_value_range; RETURN_IF_ERROR(_init_read_columns()); // build column predicates for column lazy read - _lazy_read_ctx.vconjunct_ctx = vconjunct_ctx; + _lazy_read_ctx.conjuncts = conjuncts; RETURN_IF_ERROR(_init_row_groups(filter_groups)); return Status::OK(); } @@ -333,7 +333,7 @@ Status ParquetReader::init_reader( Status ParquetReader::set_fill_columns( const std::unordered_map>& partition_columns, - const std::unordered_map& missing_columns) { + const std::unordered_map& missing_columns) { SCOPED_RAW_TIMER(&_statistics.parse_meta_time); // std::unordered_map> std::unordered_map> predicate_columns; @@ -352,28 +352,30 @@ Status ParquetReader::set_fill_columns( return; } else if (VRuntimeFilterWrapper* runtime_filter = typeid_cast(expr)) { - VExpr* filter_impl = const_cast(runtime_filter->get_impl()); + VExpr* filter_impl = const_cast(runtime_filter->get_impl().get()); if (VBloomPredicate* bloom_predicate = typeid_cast(filter_impl)) { - for (VExpr* child : bloom_predicate->children()) { - visit_slot(child); + for (auto& child : bloom_predicate->children()) { + visit_slot(child.get()); } } else if (VInPredicate* in_predicate = typeid_cast(filter_impl)) { if (in_predicate->children().size() > 0) { - visit_slot(in_predicate->children()[0]); + visit_slot(in_predicate->children()[0].get()); } } else { - for (VExpr* child : filter_impl->children()) { - visit_slot(child); + for (auto& child : filter_impl->children()) { + visit_slot(child.get()); } } } else { - for (VExpr* child : expr->children()) { - visit_slot(child); + for (auto& child : expr->children()) { + visit_slot(child.get()); } } }; - if (_lazy_read_ctx.vconjunct_ctx != nullptr) { - visit_slot(_lazy_read_ctx.vconjunct_ctx->root()); + if (!_lazy_read_ctx.conjuncts.empty()) { + for (auto& conjunct : _lazy_read_ctx.conjuncts) { + visit_slot(conjunct->root().get()); + } } const FieldDescriptor& schema = _file_metadata->schema(); @@ -700,7 +702,7 @@ Status ParquetReader::_process_page_index(const tparquet::RowGroup& row_group, _statistics.read_rows += row_group.num_rows; }; - if (_has_complex_type || _lazy_read_ctx.vconjunct_ctx == nullptr || + if (_has_complex_type || _lazy_read_ctx.conjuncts.empty() || _colname_to_value_range == nullptr || _colname_to_value_range->empty()) { read_whole_row_group(); return Status::OK(); diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.h b/be/src/vec/exec/format/parquet/vparquet_reader.h index 4f45e6979f..9ae83845cf 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_reader.h @@ -108,11 +108,11 @@ public: const std::vector& all_column_names, const std::vector& missing_column_names, std::unordered_map* colname_to_value_range, - VExprContext* vconjunct_ctx, const TupleDescriptor* tuple_descriptor, + const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, - const std::vector* not_single_slot_filter_conjuncts, - const std::unordered_map>* slot_id_to_filter_conjuncts, + const VExprContextSPtrs* not_single_slot_filter_conjuncts, + const std::unordered_map* slot_id_to_filter_conjuncts, bool filter_groups = true); Status get_next_block(Block* block, size_t* read_rows, bool* eof) override; @@ -140,7 +140,7 @@ public: Status set_fill_columns( const std::unordered_map>& partition_columns, - const std::unordered_map& missing_columns) override; + const std::unordered_map& missing_columns) override; std::vector get_metadata_key_values(); void set_table_to_file_col_map(std::unordered_map& map) { @@ -261,7 +261,7 @@ private: const TupleDescriptor* _tuple_descriptor; const RowDescriptor* _row_descriptor; const std::unordered_map* _colname_to_slot_id; - const std::vector* _not_single_slot_filter_conjuncts; - const std::unordered_map>* _slot_id_to_filter_conjuncts; + const VExprContextSPtrs* _not_single_slot_filter_conjuncts; + const std::unordered_map* _slot_id_to_filter_conjuncts; }; } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/iceberg_reader.cpp b/be/src/vec/exec/format/table/iceberg_reader.cpp index 6144863bfa..6d2f572586 100644 --- a/be/src/vec/exec/format/table/iceberg_reader.cpp +++ b/be/src/vec/exec/format/table/iceberg_reader.cpp @@ -114,11 +114,11 @@ Status IcebergTableReader::init_reader( const std::vector& file_col_names, const std::unordered_map& col_id_name_map, std::unordered_map* colname_to_value_range, - VExprContext* vconjunct_ctx, const TupleDescriptor* tuple_descriptor, + const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, - const std::vector* not_single_slot_filter_conjuncts, - const std::unordered_map>* slot_id_to_filter_conjuncts) { + const VExprContextSPtrs* not_single_slot_filter_conjuncts, + const std::unordered_map* slot_id_to_filter_conjuncts) { ParquetReader* parquet_reader = static_cast(_file_format_reader.get()); _col_id_name_map = col_id_name_map; _file_col_names = file_col_names; @@ -130,7 +130,7 @@ Status IcebergTableReader::init_reader( parquet_reader->set_table_to_file_col_map(_table_col_to_file_col); Status status = parquet_reader->init_reader( _all_required_col_names, _not_in_file_col_names, &_new_colname_to_value_range, - vconjunct_ctx, tuple_descriptor, row_descriptor, colname_to_slot_id, + conjuncts, tuple_descriptor, row_descriptor, colname_to_slot_id, not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts); return status; } @@ -167,7 +167,7 @@ Status IcebergTableReader::get_next_block(Block* block, size_t* read_rows, bool* Status IcebergTableReader::set_fill_columns( const std::unordered_map>& partition_columns, - const std::unordered_map& missing_columns) { + const std::unordered_map& missing_columns) { return _file_format_reader->set_fill_columns(partition_columns, missing_columns); } @@ -248,7 +248,7 @@ Status IcebergTableReader::_position_delete( return nullptr; } create_status = delete_reader.init_reader(delete_file_col_names, _not_in_file_col_names, - nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, {}, nullptr, nullptr, nullptr, nullptr, nullptr, false); if (!create_status.ok()) { return nullptr; @@ -256,7 +256,7 @@ Status IcebergTableReader::_position_delete( std::unordered_map> partition_columns; - std::unordered_map missing_columns; + std::unordered_map missing_columns; delete_reader.set_fill_columns(partition_columns, missing_columns); bool dictionary_coded = true; diff --git a/be/src/vec/exec/format/table/iceberg_reader.h b/be/src/vec/exec/format/table/iceberg_reader.h index e469b54ae7..451c51445e 100644 --- a/be/src/vec/exec/format/table/iceberg_reader.h +++ b/be/src/vec/exec/format/table/iceberg_reader.h @@ -80,7 +80,7 @@ public: Status set_fill_columns( const std::unordered_map>& partition_columns, - const std::unordered_map& missing_columns) override; + const std::unordered_map& missing_columns) override; bool fill_all_columns() const override; @@ -91,11 +91,11 @@ public: const std::vector& file_col_names, const std::unordered_map& col_id_name_map, std::unordered_map* colname_to_value_range, - VExprContext* vconjunct_ctx, const TupleDescriptor* tuple_descriptor, + const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, - const std::vector* not_single_slot_filter_conjuncts, - const std::unordered_map>* slot_id_to_filter_conjuncts); + const VExprContextSPtrs* not_single_slot_filter_conjuncts, + const std::unordered_map* slot_id_to_filter_conjuncts); enum { DATA, POSITION_DELETE, EQUALITY_DELETE }; diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h b/be/src/vec/exec/join/process_hash_table_probe_impl.h index e1ce4c1c91..5ef335ea52 100644 --- a/be/src/vec/exec/join/process_hash_table_probe_impl.h +++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h @@ -677,11 +677,20 @@ Status ProcessHashTableProbe::do_process_with_other_join_conjuncts( // dispose the other join conjunct exec auto row_count = output_block->rows(); if (row_count) { - int result_column_id = -1; int orig_columns = output_block->columns(); - RETURN_IF_ERROR((*_join_node->_vother_join_conjunct_ptr) - ->execute(output_block, &result_column_id)); + IColumn::Filter other_conjunct_filter(row_count, 1); + bool can_be_filter_all; + RETURN_IF_ERROR(VExprContext::execute_conjuncts( + _join_node->_other_join_conjuncts, nullptr, output_block, + &other_conjunct_filter, &can_be_filter_all)); + auto result_column_id = output_block->columns(); + auto filter_column = ColumnVector::create(); + if (can_be_filter_all) { + memset(other_conjunct_filter.data(), 0, row_count); + } + filter_column->get_data() = std::move(other_conjunct_filter); + output_block->insert({std::move(filter_column), std::make_shared(), ""}); auto column = output_block->get_by_position(result_column_id).column; if constexpr (JoinOpType == TJoinOp::LEFT_OUTER_JOIN || JoinOpType == TJoinOp::FULL_OUTER_JOIN) { diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index dedcfdf9d2..2e91f199c2 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -346,10 +346,10 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { std::vector probe_not_ignore_null(eq_join_conjuncts.size()); size_t conjuncts_index = 0; for (const auto& eq_join_conjunct : eq_join_conjuncts) { - VExprContext* ctx = nullptr; - RETURN_IF_ERROR(VExpr::create_expr_tree(_pool, eq_join_conjunct.left, &ctx)); + VExprContextSPtr ctx; + RETURN_IF_ERROR(VExpr::create_expr_tree(eq_join_conjunct.left, ctx)); _probe_expr_ctxs.push_back(ctx); - RETURN_IF_ERROR(VExpr::create_expr_tree(_pool, eq_join_conjunct.right, &ctx)); + RETURN_IF_ERROR(VExpr::create_expr_tree(eq_join_conjunct.right, ctx)); _build_expr_ctxs.push_back(ctx); bool null_aware = eq_join_conjunct.__isset.opcode && @@ -371,10 +371,17 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { _probe_column_disguise_null.reserve(eq_join_conjuncts.size()); - if (tnode.hash_join_node.__isset.vother_join_conjunct) { - _vother_join_conjunct_ptr.reset(new VExprContext*); - RETURN_IF_ERROR(VExpr::create_expr_tree(_pool, tnode.hash_join_node.vother_join_conjunct, - _vother_join_conjunct_ptr.get())); + if (tnode.hash_join_node.__isset.other_join_conjuncts && + !tnode.hash_join_node.other_join_conjuncts.empty()) { + RETURN_IF_ERROR(VExpr::create_expr_trees(tnode.hash_join_node.other_join_conjuncts, + _other_join_conjuncts)); + + DCHECK(!_build_unique); + DCHECK(_have_other_join_conjunct); + } else if (tnode.hash_join_node.__isset.vother_join_conjunct) { + _other_join_conjuncts.resize(1); + RETURN_IF_ERROR(VExpr::create_expr_tree(tnode.hash_join_node.vother_join_conjunct, + _other_join_conjuncts[0])); // If LEFT SEMI JOIN/LEFT ANTI JOIN with not equal predicate, // build table should not be deduplicated. @@ -474,9 +481,9 @@ Status HashJoinNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(VExpr::prepare(_build_expr_ctxs, state, child(1)->row_desc())); RETURN_IF_ERROR(VExpr::prepare(_probe_expr_ctxs, state, child(0)->row_desc())); - // _vother_join_conjuncts are evaluated in the context of the rows produced by this node - if (_vother_join_conjunct_ptr) { - RETURN_IF_ERROR((*_vother_join_conjunct_ptr)->prepare(state, *_intermediate_row_desc)); + // _other_join_conjuncts are evaluated in the context of the rows produced by this node + for (auto& conjunct : _other_join_conjuncts) { + RETURN_IF_ERROR(conjunct->prepare(state, *_intermediate_row_desc)); } RETURN_IF_ERROR(VExpr::prepare(_output_expr_ctxs, state, *_intermediate_row_desc)); @@ -605,8 +612,7 @@ Status HashJoinNode::pull(doris::RuntimeState* state, vectorized::Block* output_ DCHECK(output_rows <= state->batch_size()); { SCOPED_TIMER(_join_filter_timer); - RETURN_IF_ERROR( - VExprContext::filter_block(_vconjunct_ctx_ptr, &temp_block, temp_block.columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, &temp_block, temp_block.columns())); } RETURN_IF_ERROR(_build_output_block(&temp_block, output_block)); _reset_tuple_is_null_column(); @@ -753,8 +759,8 @@ Status HashJoinNode::alloc_resource(doris::RuntimeState* state) { } RETURN_IF_ERROR(VExpr::open(_build_expr_ctxs, state)); RETURN_IF_ERROR(VExpr::open(_probe_expr_ctxs, state)); - if (_vother_join_conjunct_ptr) { - RETURN_IF_ERROR((*_vother_join_conjunct_ptr)->open(state)); + for (auto& conjunct : _other_join_conjuncts) { + RETURN_IF_ERROR(conjunct->open(state)); } return Status::OK(); } @@ -763,8 +769,8 @@ void HashJoinNode::release_resource(RuntimeState* state) { VExpr::close(_build_expr_ctxs, state); VExpr::close(_probe_expr_ctxs, state); - if (_vother_join_conjunct_ptr) { - (*_vother_join_conjunct_ptr)->close(state); + for (auto& conjunct : _other_join_conjuncts) { + conjunct->close(state); } _release_mem(); VJoinNodeBase::release_resource(state); @@ -985,7 +991,7 @@ Status HashJoinNode::_extract_join_column(Block& block, ColumnUInt8::MutablePtr& return Status::OK(); } -Status HashJoinNode::_do_evaluate(Block& block, std::vector& exprs, +Status HashJoinNode::_do_evaluate(Block& block, VExprContextSPtrs& exprs, RuntimeProfile::Counter& expr_call_timer, std::vector& res_col_ids) { for (size_t i = 0; i < exprs.size(); ++i) { diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index eef8b30b90..33ae53f923 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -43,6 +43,7 @@ #include "vec/core/block.h" #include "vec/core/types.h" #include "vec/exec/join/join_op.h" // IWYU pragma: keep +#include "vec/exprs/vexpr_fwd.h" #include "vec/runtime/shared_hash_table_controller.h" #include "vjoin_node_base.h" @@ -252,13 +253,12 @@ public: bool should_build_hash_table() const { return _should_build_hash_table; } private: - using VExprContexts = std::vector; // probe expr - VExprContexts _probe_expr_ctxs; + VExprContextSPtrs _probe_expr_ctxs; // build expr - VExprContexts _build_expr_ctxs; + VExprContextSPtrs _build_expr_ctxs; // other expr - std::unique_ptr _vother_join_conjunct_ptr; + VExprContextSPtrs _other_join_conjuncts; // mark the join column whether support null eq std::vector _is_null_safe_eq_join; @@ -346,7 +346,7 @@ private: Status _process_build_block(RuntimeState* state, Block& block, uint8_t offset); - Status _do_evaluate(Block& block, std::vector& exprs, + Status _do_evaluate(Block& block, VExprContextSPtrs& exprs, RuntimeProfile::Counter& expr_call_timer, std::vector& res_col_ids); template diff --git a/be/src/vec/exec/join/vjoin_node_base.cpp b/be/src/vec/exec/join/vjoin_node_base.cpp index f6f84b2b91..3bc1d93c8b 100644 --- a/be/src/vec/exec/join/vjoin_node_base.cpp +++ b/be/src/vec/exec/join/vjoin_node_base.cpp @@ -54,9 +54,10 @@ VJoinNodeBase::VJoinNodeBase(ObjectPool* pool, const TPlanNode& tnode, const Des : (tnode.__isset.nested_loop_join_node ? tnode.nested_loop_join_node.join_op : TJoinOp::CROSS_JOIN)), - _have_other_join_conjunct(tnode.__isset.hash_join_node - ? tnode.hash_join_node.__isset.vother_join_conjunct - : false), + _have_other_join_conjunct(tnode.__isset.hash_join_node && + ((tnode.hash_join_node.__isset.other_join_conjuncts && + !tnode.hash_join_node.other_join_conjuncts.empty()) || + tnode.hash_join_node.__isset.vother_join_conjunct)), _match_all_probe(_join_op == TJoinOp::LEFT_OUTER_JOIN || _join_op == TJoinOp::FULL_OUTER_JOIN), _match_all_build(_join_op == TJoinOp::RIGHT_OUTER_JOIN || @@ -183,8 +184,8 @@ Status VJoinNodeBase::init(const TPlanNode& tnode, RuntimeState* state) { ? tnode.hash_join_node.srcExprList : tnode.nested_loop_join_node.srcExprList; for (const auto& expr : output_exprs) { - VExprContext* ctx = nullptr; - RETURN_IF_ERROR(VExpr::create_expr_tree(_pool, expr, &ctx)); + VExprContextSPtr ctx; + RETURN_IF_ERROR(VExpr::create_expr_tree(expr, ctx)); _output_expr_ctxs.push_back(ctx); } } diff --git a/be/src/vec/exec/join/vjoin_node_base.h b/be/src/vec/exec/join/vjoin_node_base.h index 9a33f41579..6972357083 100644 --- a/be/src/vec/exec/join/vjoin_node_base.h +++ b/be/src/vec/exec/join/vjoin_node_base.h @@ -31,14 +31,12 @@ #include "util/runtime_profile.h" #include "vec/core/block.h" #include "vec/data_types/data_type.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class ObjectPool; class RuntimeState; -namespace vectorized { -class VExprContext; -} // namespace vectorized } // namespace doris namespace doris::vectorized { @@ -118,7 +116,7 @@ protected: std::unique_ptr _output_row_desc; std::unique_ptr _intermediate_row_desc; // output expr - std::vector _output_expr_ctxs; + VExprContextSPtrs _output_expr_ctxs; Block _join_block; diff --git a/be/src/vec/exec/join/vnested_loop_join_node.cpp b/be/src/vec/exec/join/vnested_loop_join_node.cpp index 9d314f4160..b8160b2ade 100644 --- a/be/src/vec/exec/join/vnested_loop_join_node.cpp +++ b/be/src/vec/exec/join/vnested_loop_join_node.cpp @@ -111,10 +111,15 @@ Status VNestedLoopJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { _is_output_left_side_only = tnode.nested_loop_join_node.is_output_left_side_only; } - if (tnode.nested_loop_join_node.__isset.vjoin_conjunct) { - _vjoin_conjunct_ptr.reset(new VExprContext*); - RETURN_IF_ERROR(VExpr::create_expr_tree(_pool, tnode.nested_loop_join_node.vjoin_conjunct, - _vjoin_conjunct_ptr.get())); + if (tnode.nested_loop_join_node.__isset.join_conjuncts && + !tnode.nested_loop_join_node.join_conjuncts.empty()) { + RETURN_IF_ERROR(VExpr::create_expr_trees(tnode.nested_loop_join_node.join_conjuncts, + _join_conjuncts)); + } else if (tnode.nested_loop_join_node.__isset.vjoin_conjunct) { + VExprContextSPtr context; + RETURN_IF_ERROR( + VExpr::create_expr_tree(tnode.nested_loop_join_node.vjoin_conjunct, context)); + _join_conjuncts.emplace_back(context); } std::vector filter_src_exprs; @@ -123,8 +128,7 @@ Status VNestedLoopJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(state->runtime_filter_mgr()->register_filter( RuntimeFilterRole::PRODUCER, _runtime_filter_descs[i], state->query_options())); } - RETURN_IF_ERROR( - vectorized::VExpr::create_expr_trees(_pool, filter_src_exprs, &_filter_src_expr_ctxs)); + RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(filter_src_exprs, _filter_src_expr_ctxs)); return Status::OK(); } @@ -149,8 +153,8 @@ Status VNestedLoopJoinNode::prepare(RuntimeState* state) { RETURN_IF_INVALID_TUPLE_IDX(build_tuple_desc->id(), tuple_idx); } - if (_vjoin_conjunct_ptr) { - RETURN_IF_ERROR((*_vjoin_conjunct_ptr)->prepare(state, *_intermediate_row_desc)); + for (auto& conjunct : _join_conjuncts) { + RETURN_IF_ERROR(conjunct->prepare(state, *_intermediate_row_desc)); } _num_probe_side_columns = child(0)->row_desc().num_materialized_slots(); _num_build_side_columns = child(1)->row_desc().num_materialized_slots(); @@ -551,66 +555,15 @@ Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags(Block* block, size_t build_block_idx = _current_build_pos == 0 ? _build_blocks.size() - 1 : _current_build_pos - 1; size_t processed_blocks_num = _offset_stack.size(); - if (LIKELY(_vjoin_conjunct_ptr != nullptr && block->rows() > 0)) { - DCHECK((*_vjoin_conjunct_ptr) != nullptr); - int result_column_id = -1; - RETURN_IF_ERROR((*_vjoin_conjunct_ptr)->execute(block, &result_column_id)); - const auto& filter_column = block->get_by_position(result_column_id).column; - if (auto* nullable_column = check_and_get_column(*filter_column)) { - const auto& nested_column = nullable_column->get_nested_column_ptr(); + if (LIKELY(!_join_conjuncts.empty() && block->rows() > 0)) { + IColumn::Filter filter(block->rows(), 1); + bool can_filter_all = false; + RETURN_IF_ERROR(VExprContext::execute_conjuncts(_join_conjuncts, nullptr, IgnoreNull, block, + &filter, &can_filter_all)); - MutableColumnPtr mutable_holder = - nested_column->use_count() == 1 - ? nested_column->assume_mutable() - : nested_column->clone_resized(nested_column->size()); - - ColumnUInt8* concrete_column = assert_cast(mutable_holder.get()); - auto* __restrict null_map = nullable_column->get_null_map_data().data(); - IColumn::Filter& filter = concrete_column->get_data(); - auto* __restrict filter_data = filter.data(); - - const size_t size = filter.size(); - if constexpr (IgnoreNull) { - for (size_t i = 0; i < size; ++i) { - filter_data[i] |= null_map[i]; - } - } else { - for (size_t i = 0; i < size; ++i) { - filter_data[i] &= !null_map[i]; - } - } - _do_filtering_and_update_visited_flags_impl( - block, column_to_keep, build_block_idx, processed_blocks_num, materialize, - filter); - } else if (auto* const_column = check_and_get_column(*filter_column)) { - bool ret = const_column->get_bool(0); - if (ret) { - if constexpr (SetBuildSideFlag) { - for (size_t i = 0; i < processed_blocks_num; i++) { - auto& build_side_flag = - assert_cast( - _build_side_visited_flags[build_block_idx].get()) - ->get_data(); - auto* __restrict build_side_flag_data = build_side_flag.data(); - auto cur_sz = build_side_flag.size(); - _offset_stack.pop(); - memset(reinterpret_cast(build_side_flag_data), 1, cur_sz); - build_block_idx = build_block_idx == 0 ? _build_blocks.size() - 1 - : build_block_idx - 1; - } - } - if constexpr (SetProbeSideFlag) { - _cur_probe_row_visited_flags |= ret; - } - } - if (!materialize || !ret) { - CLEAR_BLOCK - } + if (can_filter_all) { + CLEAR_BLOCK } else { - const IColumn::Filter& filter = - assert_cast&>(*filter_column) - .get_data(); _do_filtering_and_update_visited_flags_impl( block, column_to_keep, build_block_idx, processed_blocks_num, materialize, @@ -643,8 +596,8 @@ Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags(Block* block, Status VNestedLoopJoinNode::alloc_resource(doris::RuntimeState* state) { RETURN_IF_ERROR(VJoinNodeBase::alloc_resource(state)); - if (_vjoin_conjunct_ptr) { - RETURN_IF_ERROR((*_vjoin_conjunct_ptr)->open(state)); + for (auto& conjunct : _join_conjuncts) { + RETURN_IF_ERROR(conjunct->open(state)); } return VExpr::open(_filter_src_expr_ctxs, state); } @@ -697,8 +650,8 @@ Status VNestedLoopJoinNode::pull(RuntimeState* state, vectorized::Block* block, _add_tuple_is_null_column(&tmp_block); { SCOPED_TIMER(_join_filter_timer); - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, &tmp_block, - tmp_block.columns())); + RETURN_IF_ERROR( + VExprContext::filter_block(_conjuncts, &tmp_block, tmp_block.columns())); } RETURN_IF_ERROR(_build_output_block(&tmp_block, block)); _reset_tuple_is_null_column(); @@ -729,7 +682,9 @@ bool VNestedLoopJoinNode::need_more_input_data() const { void VNestedLoopJoinNode::release_resource(doris::RuntimeState* state) { VJoinNodeBase::release_resource(state); VExpr::close(_filter_src_expr_ctxs, state); - if (_vjoin_conjunct_ptr) (*_vjoin_conjunct_ptr)->close(state); + for (auto& conjunct : _join_conjuncts) { + conjunct->close(state); + } } } // namespace doris::vectorized diff --git a/be/src/vec/exec/join/vnested_loop_join_node.h b/be/src/vec/exec/join/vnested_loop_join_node.h index 4249af9266..4bd66798d9 100644 --- a/be/src/vec/exec/join/vnested_loop_join_node.h +++ b/be/src/vec/exec/join/vnested_loop_join_node.h @@ -246,11 +246,11 @@ private: MutableColumns _dst_columns; std::vector _runtime_filter_descs; - std::vector _filter_src_expr_ctxs; + VExprContextSPtrs _filter_src_expr_ctxs; bool _is_output_left_side_only = false; bool _need_more_input_data = true; std::stack _offset_stack; - std::unique_ptr _vjoin_conjunct_ptr; + VExprContextSPtrs _join_conjuncts; friend struct RuntimeFilterBuild; }; diff --git a/be/src/vec/exec/scan/new_es_scan_node.cpp b/be/src/vec/exec/scan/new_es_scan_node.cpp index a037761278..088784b330 100644 --- a/be/src/vec/exec/scan/new_es_scan_node.cpp +++ b/be/src/vec/exec/scan/new_es_scan_node.cpp @@ -167,7 +167,7 @@ Status NewEsScanNode::_init_scanners(std::list* scanners) { _state, this, _limit_per_scanner, _tuple_id, properties, _docvalue_context, doc_value_mode, _state->runtime_profile()); - RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr)); + RETURN_IF_ERROR(scanner->prepare(_state, _conjuncts)); scanners->push_back(scanner); } return Status::OK(); diff --git a/be/src/vec/exec/scan/new_es_scanner.cpp b/be/src/vec/exec/scan/new_es_scanner.cpp index d65ad50615..0bd492b79c 100644 --- a/be/src/vec/exec/scan/new_es_scanner.cpp +++ b/be/src/vec/exec/scan/new_es_scanner.cpp @@ -58,9 +58,9 @@ NewEsScanner::NewEsScanner(RuntimeState* state, NewEsScanNode* parent, int64_t l _docvalue_context(docvalue_context), _doc_value_mode(doc_value_mode) {} -Status NewEsScanner::prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr) { +Status NewEsScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) { VLOG_CRITICAL << NEW_SCANNER_TYPE << "::prepare"; - RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); + RETURN_IF_ERROR(VScanner::prepare(_state, conjuncts)); if (_is_init) { return Status::OK(); diff --git a/be/src/vec/exec/scan/new_es_scanner.h b/be/src/vec/exec/scan/new_es_scanner.h index 28a9872cd5..90b61344de 100644 --- a/be/src/vec/exec/scan/new_es_scanner.h +++ b/be/src/vec/exec/scan/new_es_scanner.h @@ -60,7 +60,7 @@ public: Status close(RuntimeState* state) override; public: - Status prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr); + Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eof) override; diff --git a/be/src/vec/exec/scan/new_file_scan_node.cpp b/be/src/vec/exec/scan/new_file_scan_node.cpp index ed8b80ab57..127539d26e 100644 --- a/be/src/vec/exec/scan/new_file_scan_node.cpp +++ b/be/src/vec/exec/scan/new_file_scan_node.cpp @@ -111,8 +111,8 @@ Status NewFileScanNode::_init_scanners(std::list* scanners) { VFileScanner::create_unique(_state, this, _limit_per_scanner, scan_range.scan_range.ext_scan_range.file_scan_range, runtime_profile(), _kv_cache.get()); - RETURN_IF_ERROR(scanner->prepare(_vconjunct_ctx_ptr, &_colname_to_value_range, - &_colname_to_slot_id)); + RETURN_IF_ERROR( + scanner->prepare(_conjuncts, &_colname_to_value_range, &_colname_to_slot_id)); scanners->push_back(std::move(scanner)); } diff --git a/be/src/vec/exec/scan/new_jdbc_scan_node.cpp b/be/src/vec/exec/scan/new_jdbc_scan_node.cpp index 8b240ef9c6..f8219b4337 100644 --- a/be/src/vec/exec/scan/new_jdbc_scan_node.cpp +++ b/be/src/vec/exec/scan/new_jdbc_scan_node.cpp @@ -68,7 +68,7 @@ Status NewJdbcScanNode::_init_scanners(std::list* scanners) { std::unique_ptr scanner = NewJdbcScanner::create_unique(_state, this, _limit_per_scanner, _tuple_id, _query_string, _table_type, _state->runtime_profile()); - RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr)); + RETURN_IF_ERROR(scanner->prepare(_state, _conjuncts)); scanners->push_back(std::move(scanner)); return Status::OK(); } diff --git a/be/src/vec/exec/scan/new_jdbc_scanner.cpp b/be/src/vec/exec/scan/new_jdbc_scanner.cpp index 150203035b..bc8bf6e044 100644 --- a/be/src/vec/exec/scan/new_jdbc_scanner.cpp +++ b/be/src/vec/exec/scan/new_jdbc_scanner.cpp @@ -54,12 +54,9 @@ NewJdbcScanner::NewJdbcScanner(RuntimeState* state, NewJdbcScanNode* parent, int _connector_close_timer = ADD_TIMER(get_parent()->_scanner_profile, "ConnectorCloseTime"); } -Status NewJdbcScanner::prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr) { +Status NewJdbcScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) { VLOG_CRITICAL << "NewJdbcScanner::Prepare"; - if (vconjunct_ctx_ptr != nullptr) { - // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR(vconjunct_ctx_ptr->clone(state, &_vconjunct_ctx)); - } + RETURN_IF_ERROR(VScanner::prepare(state, conjuncts)); if (_is_init) { return Status::OK(); diff --git a/be/src/vec/exec/scan/new_jdbc_scanner.h b/be/src/vec/exec/scan/new_jdbc_scanner.h index 8db47fb95f..1fb1b64874 100644 --- a/be/src/vec/exec/scan/new_jdbc_scanner.h +++ b/be/src/vec/exec/scan/new_jdbc_scanner.h @@ -52,7 +52,7 @@ public: Status open(RuntimeState* state) override; Status close(RuntimeState* state) override; - Status prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr); + Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eos) override; diff --git a/be/src/vec/exec/scan/new_odbc_scan_node.cpp b/be/src/vec/exec/scan/new_odbc_scan_node.cpp index 856a5f8de5..eafad16596 100644 --- a/be/src/vec/exec/scan/new_odbc_scan_node.cpp +++ b/be/src/vec/exec/scan/new_odbc_scan_node.cpp @@ -67,7 +67,7 @@ Status NewOdbcScanNode::_init_scanners(std::list* scanners) { } std::shared_ptr scanner = NewOdbcScanner::create_shared( _state, this, _limit_per_scanner, _odbc_scan_node, _state->runtime_profile()); - RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr)); + RETURN_IF_ERROR(scanner->prepare(_state, _conjuncts)); scanners->push_back(scanner); return Status::OK(); } diff --git a/be/src/vec/exec/scan/new_odbc_scanner.cpp b/be/src/vec/exec/scan/new_odbc_scanner.cpp index 1022be3c83..2b371bbea7 100644 --- a/be/src/vec/exec/scan/new_odbc_scanner.cpp +++ b/be/src/vec/exec/scan/new_odbc_scanner.cpp @@ -60,12 +60,9 @@ NewOdbcScanner::NewOdbcScanner(RuntimeState* state, NewOdbcScanNode* parent, int _tuple_id(odbc_scan_node.tuple_id), _tuple_desc(nullptr) {} -Status NewOdbcScanner::prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr) { +Status NewOdbcScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) { VLOG_CRITICAL << NEW_SCANNER_TYPE << "::prepare"; - if (vconjunct_ctx_ptr != nullptr) { - // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR(vconjunct_ctx_ptr->clone(state, &_vconjunct_ctx)); - } + RETURN_IF_ERROR(VScanner::prepare(state, conjuncts)); if (_is_init) { return Status::OK(); diff --git a/be/src/vec/exec/scan/new_odbc_scanner.h b/be/src/vec/exec/scan/new_odbc_scanner.h index 0fbafd0c5c..e75b6465a0 100644 --- a/be/src/vec/exec/scan/new_odbc_scanner.h +++ b/be/src/vec/exec/scan/new_odbc_scanner.h @@ -56,7 +56,7 @@ public: Status close(RuntimeState* state) override; public: - Status prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr); + Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eos) override; diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp b/be/src/vec/exec/scan/new_olap_scan_node.cpp index e713dde642..b656444329 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.cpp +++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp @@ -431,9 +431,17 @@ Status NewOlapScanNode::_init_scanners(std::list* scanners) { SCOPED_TIMER(_scanner_init_timer); auto span = opentelemetry::trace::Tracer::GetCurrentSpan(); - if (_vconjunct_ctx_ptr && _vconjunct_ctx_ptr->root()) { - _runtime_profile->add_info_string("RemainedDownPredicates", - _vconjunct_ctx_ptr->root()->debug_string()); + if (!_conjuncts.empty()) { + std::string message; + for (auto& conjunct : _conjuncts) { + if (conjunct->root()) { + if (!message.empty()) { + message += ", "; + } + message += conjunct->root()->debug_string(); + } + } + _runtime_profile->add_info_string("RemainedDownPredicates", message); } if (!_olap_scan_node.output_column_unique_ids.empty()) { diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 450753b2a5..605688bd0d 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -103,11 +103,12 @@ static std::string read_columns_to_string(TabletSchemaSPtr tablet_schema, Status NewOlapScanner::init() { _is_init = true; auto parent = static_cast(_parent); - RETURN_IF_ERROR(VScanner::prepare(_state, parent->_vconjunct_ctx_ptr)); - if (parent->_common_vexpr_ctxs_pushdown != nullptr) { - // Copy common_vexpr_ctxs_pushdown from scan node to this scanner's _common_vexpr_ctxs_pushdown, just necessary. - RETURN_IF_ERROR( - parent->_common_vexpr_ctxs_pushdown->clone(_state, &_common_vexpr_ctxs_pushdown)); + RETURN_IF_ERROR(VScanner::prepare(_state, parent->_conjuncts)); + + for (auto& ctx : parent->_common_expr_ctxs_push_down) { + VExprContextSPtr context; + RETURN_IF_ERROR(ctx->clone(_state, context)); + _common_expr_ctxs_push_down.emplace_back(context); } // set limit to reduce end of rowset and segment mem use @@ -264,14 +265,19 @@ Status NewOlapScanner::_init_tablet_reader_params( real_parent->_olap_scan_node.push_down_agg_type_opt; } _tablet_reader_params.version = Version(0, _version); - // TODO: If a new runtime filter arrives after `_vconjunct_ctx` move to `_common_vexpr_ctxs_pushdown`, - // `_vconjunct_ctx` and `_common_vexpr_ctxs_pushdown` will have values at the same time, - // and the root() of `_vconjunct_ctx` and `_common_vexpr_ctxs_pushdown` should be merged as `remaining_vconjunct_root` - _tablet_reader_params.remaining_vconjunct_root = - (_common_vexpr_ctxs_pushdown == nullptr) - ? (_vconjunct_ctx == nullptr ? nullptr : _vconjunct_ctx->root()) - : _common_vexpr_ctxs_pushdown->root(); - _tablet_reader_params.common_vexpr_ctxs_pushdown = _common_vexpr_ctxs_pushdown; + + // TODO: If a new runtime filter arrives after `_conjuncts` move to `_common_expr_ctxs_push_down`, + if (_common_expr_ctxs_push_down.empty()) { + for (auto& conjunct : _conjuncts) { + _tablet_reader_params.remaining_conjunct_roots.emplace_back(conjunct->root()); + } + } else { + for (auto& ctx : _common_expr_ctxs_push_down) { + _tablet_reader_params.remaining_conjunct_roots.emplace_back(ctx->root()); + } + } + + _tablet_reader_params.common_expr_ctxs_push_down = _common_expr_ctxs_push_down; _tablet_reader_params.output_columns = ((NewOlapScanNode*)_parent)->_maybe_read_column_ids; // Condition @@ -403,7 +409,7 @@ Status NewOlapScanner::_init_tablet_reader_params( _tablet_reader_params.read_orderby_key_num_prefix_columns = olap_scan_node.sort_info.is_asc_order.size(); _tablet_reader_params.read_orderby_key_limit = _limit; - _tablet_reader_params.filter_block_vconjunct_ctx_ptr = &_vconjunct_ctx; + _tablet_reader_params.filter_block_conjuncts = _conjuncts; } // runtime predicate push down optimization for topn diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index f4a9265d62..767106b936 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -97,10 +97,10 @@ VFileScanner::VFileScanner(RuntimeState* state, NewFileScanNode* parent, int64_t } Status VFileScanner::prepare( - VExprContext* vconjunct_ctx_ptr, + const VExprContextSPtrs& conjuncts, std::unordered_map* colname_to_value_range, const std::unordered_map* colname_to_slot_id) { - RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); + RETURN_IF_ERROR(VScanner::prepare(_state, conjuncts)); _colname_to_value_range = colname_to_value_range; _col_name_to_slot_id = colname_to_slot_id; @@ -126,11 +126,19 @@ Status VFileScanner::prepare( std::vector({_input_tuple_desc->id()}), std::vector({false}))); // prepare pre filters - if (_params.__isset.pre_filter_exprs) { - RETURN_IF_ERROR(doris::vectorized::VExpr::create_expr_tree( - _state->obj_pool(), _params.pre_filter_exprs, &_pre_conjunct_ctx_ptr)); - RETURN_IF_ERROR(_pre_conjunct_ctx_ptr->prepare(_state, *_src_row_desc)); - RETURN_IF_ERROR(_pre_conjunct_ctx_ptr->open(_state)); + if (_params.__isset.pre_filter_exprs_list) { + RETURN_IF_ERROR(doris::vectorized::VExpr::create_expr_trees( + _params.pre_filter_exprs_list, _pre_conjunct_ctxs)); + } else if (_params.__isset.pre_filter_exprs) { + VExprContextSPtr context; + RETURN_IF_ERROR( + doris::vectorized::VExpr::create_expr_tree(_params.pre_filter_exprs, context)); + _pre_conjunct_ctxs.emplace_back(context); + } + + for (auto& conjunct : _pre_conjunct_ctxs) { + RETURN_IF_ERROR(conjunct->prepare(_state, *_src_row_desc)); + RETURN_IF_ERROR(conjunct->open(_state)); } } @@ -141,21 +149,27 @@ Status VFileScanner::prepare( return Status::OK(); } -Status VFileScanner::_split_conjuncts(VExpr* conjunct_expr_root) { - static constexpr auto is_leaf = [](VExpr* expr) { return !expr->is_and_expr(); }; - if (conjunct_expr_root != nullptr) { +Status VFileScanner::_split_conjuncts() { + for (auto& conjunct : _conjuncts) { + RETURN_IF_ERROR(_split_conjuncts_expr(conjunct, conjunct->root())); + } + return Status::OK(); +} +Status VFileScanner::_split_conjuncts_expr(const VExprContextSPtr& context, + const VExprSPtr& conjunct_expr_root) { + static constexpr auto is_leaf = [](const auto& expr) { return !expr->is_and_expr(); }; + if (conjunct_expr_root) { if (is_leaf(conjunct_expr_root)) { auto impl = conjunct_expr_root->get_impl(); // If impl is not null, which means this a conjuncts from runtime filter. - VExpr* cur_expr = impl ? const_cast(impl) : conjunct_expr_root; - VExprContext* new_ctx = - _state->obj_pool()->add(VExprContext::create_unique(cur_expr).release()); - _vconjunct_ctx->clone_fn_contexts(new_ctx); + auto cur_expr = impl ? impl : conjunct_expr_root; + VExprContextSPtr new_ctx = VExprContext::create_shared(cur_expr); + context->clone_fn_contexts(new_ctx.get()); RETURN_IF_ERROR(new_ctx->prepare(_state, *_default_val_row_desc)); RETURN_IF_ERROR(new_ctx->open(_state)); std::vector slot_ids; - _get_slot_ids(cur_expr, &slot_ids); + _get_slot_ids(cur_expr.get(), &slot_ids); if (slot_ids.size() == 0) { _not_single_slot_filter_conjuncts.emplace_back(new_ctx); return Status::OK(); @@ -169,29 +183,25 @@ Status VFileScanner::_split_conjuncts(VExpr* conjunct_expr_root) { } if (single_slot) { SlotId slot_id = slot_ids[0]; - if (_slot_id_to_filter_conjuncts.find(slot_id) == - _slot_id_to_filter_conjuncts.end()) { - _slot_id_to_filter_conjuncts.insert({slot_id, std::vector()}); - } _slot_id_to_filter_conjuncts[slot_id].emplace_back(new_ctx); } else { _not_single_slot_filter_conjuncts.emplace_back(new_ctx); } } else { - RETURN_IF_ERROR(_split_conjuncts(conjunct_expr_root->children()[0])); - RETURN_IF_ERROR(_split_conjuncts(conjunct_expr_root->children()[1])); + RETURN_IF_ERROR(_split_conjuncts_expr(context, conjunct_expr_root->children()[0])); + RETURN_IF_ERROR(_split_conjuncts_expr(context, conjunct_expr_root->children()[1])); } } return Status::OK(); } void VFileScanner::_get_slot_ids(VExpr* expr, std::vector* slot_ids) { - for (VExpr* child_expr : expr->children()) { + for (auto& child_expr : expr->children()) { if (child_expr->is_slot_ref()) { - VSlotRef* slot_ref = reinterpret_cast(child_expr); + VSlotRef* slot_ref = reinterpret_cast(child_expr.get()); slot_ids->emplace_back(slot_ref->slot_id()); } - _get_slot_ids(child_expr, slot_ids); + _get_slot_ids(child_expr.get(), slot_ids); } } @@ -255,7 +265,7 @@ Status VFileScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eo // Fill columns not exist in file with null or default value RETURN_IF_ERROR(_fill_missing_columns(read_rows)); } - // Apply _pre_conjunct_ctx_ptr to filter src block. + // Apply _pre_conjunct_ctxs to filter src block. RETURN_IF_ERROR(_pre_filter_src_block()); // Convert src block to output block (dest block), string to dest data type and apply filters. RETURN_IF_ERROR(_convert_to_output_block(block)); @@ -409,7 +419,7 @@ Status VFileScanner::_fill_missing_columns(size_t rows) { nullable_column->insert_many_defaults(rows); } else { // fill with default value - auto* ctx = it->second; + auto& ctx = it->second; auto origin_column_num = _src_block_ptr->columns(); int result_column_id = -1; // PT1 => dest primitive type @@ -441,12 +451,12 @@ Status VFileScanner::_pre_filter_src_block() { if (!_is_load) { return Status::OK(); } - if (_pre_conjunct_ctx_ptr) { + if (!_pre_conjunct_ctxs.empty()) { SCOPED_TIMER(_pre_filter_timer); auto origin_column_num = _src_block_ptr->columns(); auto old_rows = _src_block_ptr->rows(); - RETURN_IF_ERROR(vectorized::VExprContext::filter_block(_pre_conjunct_ctx_ptr, - _src_block_ptr, origin_column_num)); + RETURN_IF_ERROR(vectorized::VExprContext::filter_block(_pre_conjunct_ctxs, _src_block_ptr, + origin_column_num)); _counter.num_rows_unselected += old_rows - _src_block.rows(); } return Status::OK(); @@ -476,7 +486,7 @@ Status VFileScanner::_convert_to_output_block(Block* block) { int dest_index = ctx_idx++; vectorized::ColumnPtr column_ptr; - auto* ctx = _dest_vexpr_ctx[dest_index]; + auto& ctx = _dest_vexpr_ctx[dest_index]; int result_column_id = -1; // PT1 => dest primitive type RETURN_IF_ERROR(ctx->execute(&_src_block, &result_column_id)); @@ -583,8 +593,11 @@ Status VFileScanner::_get_next_reader() { const_cast(&_state->timezone_obj()), _io_ctx.get(), _state, _kv_cache, _state->query_options().enable_parquet_lazy_mat); RETURN_IF_ERROR(parquet_reader->open()); - if (!_is_load && _push_down_expr == nullptr && _vconjunct_ctx != nullptr) { - RETURN_IF_ERROR(_vconjunct_ctx->clone(_state, &_push_down_expr)); + if (!_is_load && _push_down_conjuncts.empty() && !_conjuncts.empty()) { + _push_down_conjuncts.resize(_conjuncts.size()); + for (size_t i = 0; i != _conjuncts.size(); ++i) { + RETURN_IF_ERROR(_conjuncts[i]->clone(_state, _push_down_conjuncts[i])); + } _discard_conjuncts(); } if (range.__isset.table_format_params && @@ -594,24 +607,29 @@ Status VFileScanner::_get_next_reader() { _state, _params, range, _kv_cache, _io_ctx.get()); init_status = iceberg_reader->init_reader( - _file_col_names, _col_id_name_map, _colname_to_value_range, _push_down_expr, - _real_tuple_desc, _default_val_row_desc.get(), _col_name_to_slot_id, - &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); + _file_col_names, _col_id_name_map, _colname_to_value_range, + _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(), + _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, + &_slot_id_to_filter_conjuncts); RETURN_IF_ERROR(iceberg_reader->init_row_filters(range)); _cur_reader = std::move(iceberg_reader); } else { std::vector place_holder; init_status = parquet_reader->init_reader( - _file_col_names, place_holder, _colname_to_value_range, _push_down_expr, - _real_tuple_desc, _default_val_row_desc.get(), _col_name_to_slot_id, - &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); + _file_col_names, place_holder, _colname_to_value_range, + _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(), + _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, + &_slot_id_to_filter_conjuncts); _cur_reader = std::move(parquet_reader); } break; } case TFileFormatType::FORMAT_ORC: { - if (!_is_load && _push_down_expr == nullptr && _vconjunct_ctx != nullptr) { - RETURN_IF_ERROR(_vconjunct_ctx->clone(_state, &_push_down_expr)); + if (!_is_load && _push_down_conjuncts.empty() && !_conjuncts.empty()) { + _push_down_conjuncts.resize(_conjuncts.size()); + for (size_t i = 0; i != _conjuncts.size(); ++i) { + RETURN_IF_ERROR(_conjuncts[i]->clone(_state, _push_down_conjuncts[i])); + } _discard_conjuncts(); } _cur_reader = OrcReader::create_unique( @@ -619,7 +637,7 @@ Status VFileScanner::_get_next_reader() { _state->query_options().batch_size, _state->timezone(), _io_ctx.get(), _state->query_options().enable_orc_lazy_mat); init_status = ((OrcReader*)(_cur_reader.get())) - ->init_reader(_colname_to_value_range, _push_down_expr); + ->init_reader(_colname_to_value_range, _push_down_conjuncts); break; } case TFileFormatType::FORMAT_CSV_PLAIN: @@ -679,7 +697,7 @@ Status VFileScanner::_get_next_reader() { Status VFileScanner::_generate_fill_columns() { std::unordered_map> partition_columns; - std::unordered_map missing_columns; + std::unordered_map missing_columns; const TFileRangeDesc& range = _ranges.at(_next_range - 1); if (range.__isset.columns_from_path && !_partition_slot_descs.empty()) { @@ -777,12 +795,11 @@ Status VFileScanner::_init_expr_ctxes() { if (!slot_desc->is_materialized()) { continue; } - vectorized::VExprContext* ctx = nullptr; + vectorized::VExprContextSPtr ctx; auto it = _params.default_value_of_src_slot.find(slot_desc->id()); if (it != std::end(_params.default_value_of_src_slot)) { if (!it->second.nodes.empty()) { - RETURN_IF_ERROR( - vectorized::VExpr::create_expr_tree(_state->obj_pool(), it->second, &ctx)); + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(it->second, ctx)); RETURN_IF_ERROR(ctx->prepare(_state, *_default_val_row_desc)); RETURN_IF_ERROR(ctx->open(_state)); } @@ -805,10 +822,9 @@ Status VFileScanner::_init_expr_ctxes() { slot_desc->id(), slot_desc->col_name()); } - vectorized::VExprContext* ctx = nullptr; + vectorized::VExprContextSPtr ctx; if (!it->second.nodes.empty()) { - RETURN_IF_ERROR( - vectorized::VExpr::create_expr_tree(_state->obj_pool(), it->second, &ctx)); + RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(it->second, ctx)); RETURN_IF_ERROR(ctx->prepare(_state, *_src_row_desc)); RETURN_IF_ERROR(ctx->open(_state)); } @@ -837,8 +853,8 @@ Status VFileScanner::_init_expr_ctxes() { _output_tuple_desc && _output_tuple_desc->slots().back()->type().is_variant_type(); // TODO: It should can move to scan node to process. - if (_vconjunct_ctx && _vconjunct_ctx->root()) { - _split_conjuncts(_vconjunct_ctx->root()); + if (!_conjuncts.empty()) { + _split_conjuncts(); } return Status::OK(); } @@ -854,18 +870,18 @@ Status VFileScanner::close(RuntimeState* state) { } } - for (auto it : _col_default_value_ctx) { + for (auto& it : _col_default_value_ctx) { if (it.second != nullptr) { it.second->close(state); } } - if (_pre_conjunct_ctx_ptr) { - _pre_conjunct_ctx_ptr->close(state); + for (auto& conjunct : _pre_conjunct_ctxs) { + conjunct->close(state); } - if (_push_down_expr) { - _push_down_expr->close(state); + for (auto& conjunct : _push_down_conjuncts) { + conjunct->close(state); } for (auto& [k, v] : _slot_id_to_filter_conjuncts) { @@ -876,7 +892,7 @@ Status VFileScanner::close(RuntimeState* state) { } } - for (auto* ctx : _not_single_slot_filter_conjuncts) { + for (auto ctx : _not_single_slot_filter_conjuncts) { if (ctx != nullptr) { ctx->close(state); } diff --git a/be/src/vec/exec/scan/vfile_scanner.h b/be/src/vec/exec/scan/vfile_scanner.h index 54a1df77d3..162be02bda 100644 --- a/be/src/vec/exec/scan/vfile_scanner.h +++ b/be/src/vec/exec/scan/vfile_scanner.h @@ -71,7 +71,7 @@ public: Status close(RuntimeState* state) override; - Status prepare(VExprContext* vconjunct_ctx_ptr, + Status prepare(const VExprContextSPtrs& conjuncts, std::unordered_map* colname_to_value_range, const std::unordered_map* colname_to_slot_id); @@ -108,11 +108,11 @@ protected: // created from param.expr_of_dest_slot // For query, it saves default value expr of all dest columns, or nullptr for NULL. // For load, it saves conversion expr/default value of all dest columns. - std::vector _dest_vexpr_ctx; + VExprContextSPtrs _dest_vexpr_ctx; // dest slot name to index in _dest_vexpr_ctx; std::unordered_map _dest_slot_name_to_idx; // col name to default value expr - std::unordered_map _col_default_value_ctx; + std::unordered_map _col_default_value_ctx; // the map values of dest slot id to src slot desc // if there is not key of dest slot id in dest_sid_to_src_sid_without_trans, it will be set to nullptr std::vector _src_slot_descs_order_by_dest; @@ -128,7 +128,7 @@ protected: std::unordered_set _missing_cols; // For load task - doris::vectorized::VExprContext* _pre_conjunct_ctx_ptr = nullptr; + vectorized::VExprContextSPtrs _pre_conjunct_ctxs; std::unique_ptr _src_row_desc; // row desc for default exprs std::unique_ptr _default_val_row_desc; @@ -146,7 +146,7 @@ protected: Block* _src_block_ptr; Block _src_block; - VExprContext* _push_down_expr = nullptr; + VExprContextSPtrs _push_down_conjuncts; bool _is_dynamic_schema = false; // for tracing dynamic schema std::unique_ptr _full_base_schema_view; @@ -165,9 +165,9 @@ private: const std::unordered_map* _col_name_to_slot_id; // single slot filter conjuncts - std::unordered_map> _slot_id_to_filter_conjuncts; + std::unordered_map _slot_id_to_filter_conjuncts; // not single(zero or multi) slot filter conjuncts - std::vector _not_single_slot_filter_conjuncts; + VExprContextSPtrs _not_single_slot_filter_conjuncts; private: Status _init_expr_ctxes(); @@ -179,7 +179,9 @@ private: Status _convert_to_output_block(Block* block); Status _generate_fill_columns(); Status _handle_dynamic_block(Block* block); - Status _split_conjuncts(VExpr* conjunct_expr_root); + Status _split_conjuncts(); + Status _split_conjuncts_expr(const VExprContextSPtr& context, + const VExprSPtr& conjunct_expr_root); void _get_slot_ids(VExpr* expr, std::vector* slot_ids); void _reset_counter() { diff --git a/be/src/vec/exec/scan/vmeta_scan_node.cpp b/be/src/vec/exec/scan/vmeta_scan_node.cpp index 3dec257dca..b94049697d 100644 --- a/be/src/vec/exec/scan/vmeta_scan_node.cpp +++ b/be/src/vec/exec/scan/vmeta_scan_node.cpp @@ -65,7 +65,7 @@ Status VMetaScanNode::_init_scanners(std::list* scanners) { for (auto& scan_range : _scan_ranges) { std::shared_ptr scanner = VMetaScanner::create_shared( _state, this, _tuple_id, scan_range, _limit_per_scanner, runtime_profile()); - RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr)); + RETURN_IF_ERROR(scanner->prepare(_state, _conjuncts)); scanners->push_back(scanner); } return Status::OK(); diff --git a/be/src/vec/exec/scan/vmeta_scanner.cpp b/be/src/vec/exec/scan/vmeta_scanner.cpp index 4a59804e6a..41114eba76 100644 --- a/be/src/vec/exec/scan/vmeta_scanner.cpp +++ b/be/src/vec/exec/scan/vmeta_scanner.cpp @@ -70,9 +70,9 @@ Status VMetaScanner::open(RuntimeState* state) { return Status::OK(); } -Status VMetaScanner::prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr) { +Status VMetaScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) { VLOG_CRITICAL << "VMetaScanner::prepare"; - RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); + RETURN_IF_ERROR(VScanner::prepare(_state, conjuncts)); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); RETURN_IF_ERROR(_fetch_metadata(_scan_range.meta_scan_range)); return Status::OK(); diff --git a/be/src/vec/exec/scan/vmeta_scanner.h b/be/src/vec/exec/scan/vmeta_scanner.h index 3cac485cb2..de6e5cbd41 100644 --- a/be/src/vec/exec/scan/vmeta_scanner.h +++ b/be/src/vec/exec/scan/vmeta_scanner.h @@ -55,7 +55,7 @@ public: Status open(RuntimeState* state) override; Status close(RuntimeState* state) override; - Status prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr); + Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eos) override; diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index 4c0241f9c2..007b197897 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -353,7 +353,7 @@ bool VScanNode::runtime_filters_are_ready_or_timeout() { Status VScanNode::_acquire_runtime_filter(bool wait) { SCOPED_TIMER(_acquire_runtime_filter_timer); - std::vector vexprs; + VExprSPtrs vexprs; for (size_t i = 0; i < _runtime_filter_descs.size(); ++i) { IRuntimeFilter* runtime_filter = _runtime_filter_ctxs[i].runtime_filter; bool ready = runtime_filter->is_ready(); @@ -380,59 +380,19 @@ Status VScanNode::_acquire_runtime_filter(bool wait) { return Status::OK(); } -Status VScanNode::_append_rf_into_conjuncts(std::vector& vexprs) { +Status VScanNode::_append_rf_into_conjuncts(const VExprSPtrs& vexprs) { if (vexprs.empty()) { return Status::OK(); } - VExpr* last_expr = nullptr; - if (_vconjunct_ctx_ptr != nullptr) { - last_expr = _vconjunct_ctx_ptr->root(); - } else { - DCHECK(_rf_vexpr_set.find(vexprs[0]) == _rf_vexpr_set.end()); - last_expr = vexprs[0]; - _rf_vexpr_set.insert(vexprs[0]); + for (auto& expr : vexprs) { + VExprContextSPtr conjunct = VExprContext::create_shared(expr); + RETURN_IF_ERROR(conjunct->prepare(_state, _row_descriptor)); + RETURN_IF_ERROR(conjunct->open(_state)); + _rf_vexpr_set.insert(expr); + _conjuncts.emplace_back(conjunct); } - for (size_t j = _vconjunct_ctx_ptr ? 0 : 1; j < vexprs.size(); j++) { - if (_rf_vexpr_set.find(vexprs[j]) != _rf_vexpr_set.end()) { - continue; - } - TFunction fn; - TFunctionName fn_name; - fn_name.__set_db_name(""); - fn_name.__set_function_name("and"); - fn.__set_name(fn_name); - fn.__set_binary_type(TFunctionBinaryType::BUILTIN); - std::vector arg_types; - arg_types.push_back(create_type_desc(PrimitiveType::TYPE_BOOLEAN)); - arg_types.push_back(create_type_desc(PrimitiveType::TYPE_BOOLEAN)); - fn.__set_arg_types(arg_types); - fn.__set_ret_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN)); - fn.__set_has_var_args(false); - TExprNode texpr_node; - texpr_node.__set_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN)); - texpr_node.__set_node_type(TExprNodeType::COMPOUND_PRED); - texpr_node.__set_opcode(TExprOpcode::COMPOUND_AND); - texpr_node.__set_fn(fn); - texpr_node.__set_is_nullable(last_expr->is_nullable() || vexprs[j]->is_nullable()); - VExpr* new_node = _pool->add(VcompoundPred::create_unique(texpr_node).release()); - new_node->add_child(last_expr); - DCHECK((vexprs[j])->get_impl() != nullptr); - new_node->add_child(vexprs[j]); - last_expr = new_node; - _rf_vexpr_set.insert(vexprs[j]); - } - auto new_vconjunct_ctx_ptr = _pool->add(VExprContext::create_unique(last_expr).release()); - if (_vconjunct_ctx_ptr) { - _vconjunct_ctx_ptr->clone_fn_contexts(new_vconjunct_ctx_ptr); - } - RETURN_IF_ERROR(new_vconjunct_ctx_ptr->prepare(_state, _row_descriptor)); - RETURN_IF_ERROR(new_vconjunct_ctx_ptr->open(_state)); - if (_vconjunct_ctx_ptr) { - _stale_vexpr_ctxs.push_back(_vconjunct_ctx_ptr); - } - _vconjunct_ctx_ptr = new_vconjunct_ctx_ptr; return Status::OK(); } @@ -459,11 +419,12 @@ void VScanNode::release_resource(RuntimeState* state) { runtime_filter->consumer_close(); } - for (auto& ctx : _stale_vexpr_ctxs) { + for (auto& ctx : _stale_expr_ctxs) { ctx->close(state); } - if (_common_vexpr_ctxs_pushdown) { - _common_vexpr_ctxs_pushdown->close(state); + + for (auto& ctx : _common_expr_ctxs_push_down) { + ctx->close(state); } ExecNode::release_resource(state); @@ -529,21 +490,26 @@ Status VScanNode::_normalize_conjuncts() { } } } - if (_vconjunct_ctx_ptr) { - if (_vconjunct_ctx_ptr->root()) { - VExpr* new_root; - RETURN_IF_ERROR(_normalize_predicate(_vconjunct_ctx_ptr->root(), &new_root)); + + for (auto it = _conjuncts.begin(); it != _conjuncts.end();) { + auto& conjunct = *it; + if (conjunct->root()) { + VExprSPtr new_root; + RETURN_IF_ERROR(_normalize_predicate(conjunct->root(), conjunct.get(), new_root)); if (new_root) { - _vconjunct_ctx_ptr->set_root(new_root); + conjunct->set_root(new_root); if (_should_push_down_common_expr()) { - _common_vexpr_ctxs_pushdown = _vconjunct_ctx_ptr; - _vconjunct_ctx_ptr = nullptr; + _common_expr_ctxs_push_down.emplace_back(conjunct); + it = _conjuncts.erase(it); + continue; } - } else { // All conjucts are pushed down as predicate column - _stale_vexpr_ctxs.push_back(_vconjunct_ctx_ptr); - _vconjunct_ctx_ptr = nullptr; + } else { // All conjuncts are pushed down as predicate column + _stale_expr_ctxs.emplace_back(conjunct); + it = _conjuncts.erase(it); + continue; } } + ++it; } for (auto& it : _slot_id_to_value_range) { std::visit( @@ -559,28 +525,30 @@ Status VScanNode::_normalize_conjuncts() { return Status::OK(); } -Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output_expr) { - static constexpr auto is_leaf = [](VExpr* expr) { return !expr->is_and_expr(); }; - auto in_predicate_checker = [](const std::vector& children, const VSlotRef** slot, - VExpr** child_contains_slot) { +Status VScanNode::_normalize_predicate(const VExprSPtr& conjunct_expr_root, VExprContext* context, + VExprSPtr& output_expr) { + static constexpr auto is_leaf = [](auto&& expr) { return !expr->is_and_expr(); }; + auto in_predicate_checker = [](const VExprSPtrs& children, std::shared_ptr& slot, + VExprSPtr& child_contains_slot) { if (children.empty() || VExpr::expr_without_cast(children[0])->node_type() != TExprNodeType::SLOT_REF) { // not a slot ref(column) return false; } - *slot = reinterpret_cast(VExpr::expr_without_cast(children[0])); - *child_contains_slot = children[0]; + slot = std::dynamic_pointer_cast(VExpr::expr_without_cast(children[0])); + child_contains_slot = children[0]; return true; }; - auto eq_predicate_checker = [](const std::vector& children, const VSlotRef** slot, - VExpr** child_contains_slot) { - for (const VExpr* child : children) { + auto eq_predicate_checker = [](const VExprSPtrs& children, std::shared_ptr& slot, + VExprSPtr& child_contains_slot) { + for (const auto& child : children) { if (VExpr::expr_without_cast(child)->node_type() != TExprNodeType::SLOT_REF) { // not a slot ref(column) continue; } - *slot = reinterpret_cast(VExpr::expr_without_cast(child)); - *child_contains_slot = const_cast(child); + slot = std::dynamic_pointer_cast(VExpr::expr_without_cast(child)); + CHECK(slot != nullptr); + child_contains_slot = child; return true; } return false; @@ -590,15 +558,15 @@ Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output if (is_leaf(conjunct_expr_root)) { auto impl = conjunct_expr_root->get_impl(); // If impl is not null, which means this a conjuncts from runtime filter. - VExpr* cur_expr = impl ? const_cast(impl) : conjunct_expr_root; - bool is_runtimer_filter_predicate = + auto cur_expr = impl ? impl.get() : conjunct_expr_root.get(); + bool _is_runtime_filter_predicate = _rf_vexpr_set.find(conjunct_expr_root) != _rf_vexpr_set.end(); SlotDescriptor* slot = nullptr; ColumnValueRangeType* range = nullptr; PushDownType pdt = PushDownType::UNACCEPTABLE; - RETURN_IF_ERROR(_eval_const_conjuncts(cur_expr, _vconjunct_ctx_ptr, &pdt)); + RETURN_IF_ERROR(_eval_const_conjuncts(cur_expr, context, &pdt)); if (pdt == PushDownType::ACCEPTABLE) { - *output_expr = nullptr; + output_expr = nullptr; return Status::OK(); } if (_is_predicate_acting_on_slot(cur_expr, in_predicate_checker, &slot, &range) || @@ -607,26 +575,26 @@ Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output [&](auto& value_range) { Defer mark_runtime_filter_flag {[&]() { value_range.mark_runtime_filter_predicate( - is_runtimer_filter_predicate); + _is_runtime_filter_predicate); }}; RETURN_IF_PUSH_DOWN(_normalize_in_and_eq_predicate( - cur_expr, _vconjunct_ctx_ptr, slot, value_range, &pdt)); + cur_expr, context, slot, value_range, &pdt)); RETURN_IF_PUSH_DOWN(_normalize_not_in_and_not_eq_predicate( - cur_expr, _vconjunct_ctx_ptr, slot, value_range, &pdt)); + cur_expr, context, slot, value_range, &pdt)); RETURN_IF_PUSH_DOWN(_normalize_is_null_predicate( - cur_expr, _vconjunct_ctx_ptr, slot, value_range, &pdt)); + cur_expr, context, slot, value_range, &pdt)); RETURN_IF_PUSH_DOWN(_normalize_noneq_binary_predicate( - cur_expr, _vconjunct_ctx_ptr, slot, value_range, &pdt)); - RETURN_IF_PUSH_DOWN(_normalize_match_predicate( - cur_expr, _vconjunct_ctx_ptr, slot, value_range, &pdt)); + cur_expr, context, slot, value_range, &pdt)); + RETURN_IF_PUSH_DOWN(_normalize_match_predicate(cur_expr, context, slot, + value_range, &pdt)); if (_is_key_column(slot->col_name())) { - RETURN_IF_PUSH_DOWN(_normalize_bitmap_filter( - cur_expr, _vconjunct_ctx_ptr, slot, &pdt)); - RETURN_IF_PUSH_DOWN(_normalize_bloom_filter( - cur_expr, _vconjunct_ctx_ptr, slot, &pdt)); + RETURN_IF_PUSH_DOWN( + _normalize_bitmap_filter(cur_expr, context, slot, &pdt)); + RETURN_IF_PUSH_DOWN( + _normalize_bloom_filter(cur_expr, context, slot, &pdt)); if (_state->enable_function_pushdown()) { RETURN_IF_PUSH_DOWN(_normalize_function_filters( - cur_expr, _vconjunct_ctx_ptr, slot, &pdt)); + cur_expr, context, slot, &pdt)); } } }, @@ -635,54 +603,52 @@ Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output if (pdt == PushDownType::UNACCEPTABLE && TExprNodeType::COMPOUND_PRED == cur_expr->node_type()) { - _normalize_compound_predicate(cur_expr, _vconjunct_ctx_ptr, &pdt, - is_runtimer_filter_predicate, in_predicate_checker, - eq_predicate_checker); - *output_expr = conjunct_expr_root; // remaining in conjunct tree + _normalize_compound_predicate(cur_expr, context, &pdt, _is_runtime_filter_predicate, + in_predicate_checker, eq_predicate_checker); + output_expr = conjunct_expr_root; // remaining in conjunct tree return Status::OK(); } if (pdt == PushDownType::ACCEPTABLE && _is_key_column(slot->col_name())) { - *output_expr = nullptr; + output_expr = nullptr; return Status::OK(); } else { // for PARTIAL_ACCEPTABLE and UNACCEPTABLE, do not remove expr from the tree - *output_expr = conjunct_expr_root; + output_expr = conjunct_expr_root; return Status::OK(); } } else { - VExpr* left_child; - RETURN_IF_ERROR(_normalize_predicate(conjunct_expr_root->children()[0], &left_child)); - VExpr* right_child; - RETURN_IF_ERROR(_normalize_predicate(conjunct_expr_root->children()[1], &right_child)); + VExprSPtr left_child; + RETURN_IF_ERROR( + _normalize_predicate(conjunct_expr_root->children()[0], context, left_child)); + VExprSPtr right_child; + RETURN_IF_ERROR( + _normalize_predicate(conjunct_expr_root->children()[1], context, right_child)); if (left_child != nullptr && right_child != nullptr) { conjunct_expr_root->set_children({left_child, right_child}); - *output_expr = conjunct_expr_root; + output_expr = conjunct_expr_root; return Status::OK(); } else { if (left_child == nullptr) { - conjunct_expr_root->children()[0]->close( - _state, _vconjunct_ctx_ptr, - _vconjunct_ctx_ptr->get_function_state_scope()); + conjunct_expr_root->children()[0]->close(_state, context, + context->get_function_state_scope()); } if (right_child == nullptr) { - conjunct_expr_root->children()[1]->close( - _state, _vconjunct_ctx_ptr, - _vconjunct_ctx_ptr->get_function_state_scope()); + conjunct_expr_root->children()[1]->close(_state, context, + context->get_function_state_scope()); } // here only close the and expr self, do not close the child conjunct_expr_root->set_children({}); - conjunct_expr_root->close(_state, _vconjunct_ctx_ptr, - _vconjunct_ctx_ptr->get_function_state_scope()); + conjunct_expr_root->close(_state, context, context->get_function_state_scope()); } // here do not close VExpr* now - *output_expr = left_child != nullptr ? left_child : right_child; + output_expr = left_child != nullptr ? left_child : right_child; return Status::OK(); } } - *output_expr = conjunct_expr_root; + output_expr = conjunct_expr_root; return Status::OK(); } @@ -720,7 +686,7 @@ Status VScanNode::_normalize_function_filters(VExpr* expr, VExprContext* expr_ct VExpr* fn_expr = expr; if (TExprNodeType::COMPOUND_PRED == expr->node_type() && expr->fn().name.function_name == "not") { - fn_expr = fn_expr->children()[0]; + fn_expr = fn_expr->children()[0].get(); opposite = true; } @@ -741,11 +707,12 @@ Status VScanNode::_normalize_function_filters(VExpr* expr, VExprContext* expr_ct bool VScanNode::_is_predicate_acting_on_slot( VExpr* expr, - const std::function&, const VSlotRef**, VExpr**)>& checker, + const std::function&, VExprSPtr&)>& + checker, SlotDescriptor** slot_desc, ColumnValueRangeType** range) { - const VSlotRef* slot_ref = nullptr; - VExpr* child_contains_slot = nullptr; - if (!checker(expr->children(), &slot_ref, &child_contains_slot)) { + std::shared_ptr slot_ref; + VExprSPtr child_contains_slot; + if (!checker(expr->children(), slot_ref, child_contains_slot)) { // not a slot ref(column) return false; } @@ -759,7 +726,7 @@ bool VScanNode::_is_predicate_acting_on_slot( if (child_contains_slot->type().type != (*slot_desc)->type().type || child_contains_slot->type().precision != (*slot_desc)->type().precision || child_contains_slot->type().scale != (*slot_desc)->type().scale) { - if (!ignore_cast(*slot_desc, child_contains_slot)) { + if (!ignore_cast(*slot_desc, child_contains_slot.get())) { // the type of predicate not match the slot's type return false; } @@ -781,7 +748,7 @@ Status VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, Pu if (const ColumnConst* const_column = check_and_get_column(const_col_wrapper->column_ptr)) { constant_val = const_cast(const_column->get_data_at(0).data); - if (constant_val == nullptr || *reinterpret_cast(constant_val) == false) { + if (constant_val == nullptr || !*reinterpret_cast(constant_val)) { *pdt = PushDownType::ACCEPTABLE; _eos = true; } @@ -798,7 +765,7 @@ Status VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, Pu DCHECK_EQ(bool_column->size(), 1); if (bool_column->size() == 1) { constant_val = const_cast(bool_column->get_data_at(0).data); - if (constant_val == nullptr || *reinterpret_cast(constant_val) == false) { + if (constant_val == nullptr || !*reinterpret_cast(constant_val)) { *pdt = PushDownType::ACCEPTABLE; _eos = true; } @@ -1081,16 +1048,16 @@ Status VScanNode::_normalize_noneq_binary_predicate(VExpr* expr, VExprContext* e Status VScanNode::_normalize_compound_predicate( vectorized::VExpr* expr, VExprContext* expr_ctx, PushDownType* pdt, - bool is_runtimer_filter_predicate, - const std::function&, const VSlotRef**, VExpr**)>& + bool _is_runtime_filter_predicate, + const std::function&, VExprSPtr&)>& in_predicate_checker, - const std::function&, const VSlotRef**, VExpr**)>& + const std::function&, VExprSPtr&)>& eq_predicate_checker) { if (TExprNodeType::COMPOUND_PRED == expr->node_type()) { auto compound_fn_name = expr->fn().name.function_name; auto children_num = expr->children().size(); for (auto i = 0; i < children_num; ++i) { - VExpr* child_expr = expr->children()[i]; + auto child_expr = expr->children()[i].get(); if (TExprNodeType::BINARY_PRED == child_expr->node_type()) { SlotDescriptor* slot = nullptr; ColumnValueRangeType* range_on_slot = nullptr; @@ -1104,7 +1071,7 @@ Status VScanNode::_normalize_compound_predicate( [&](auto& value_range) { Defer mark_runtime_filter_flag {[&]() { value_range.mark_runtime_filter_predicate( - is_runtimer_filter_predicate); + _is_runtime_filter_predicate); }}; _normalize_binary_in_compound_predicate(child_expr, expr_ctx, slot, value_range, pdt); @@ -1126,7 +1093,7 @@ Status VScanNode::_normalize_compound_predicate( [&](auto& value_range) { Defer mark_runtime_filter_flag {[&]() { value_range.mark_runtime_filter_predicate( - is_runtimer_filter_predicate); + _is_runtime_filter_predicate); }}; _normalize_match_in_compound_predicate(child_expr, expr_ctx, slot, value_range, pdt); @@ -1137,7 +1104,7 @@ Status VScanNode::_normalize_compound_predicate( } } else if (TExprNodeType::COMPOUND_PRED == child_expr->node_type()) { _normalize_compound_predicate(child_expr, expr_ctx, pdt, - is_runtimer_filter_predicate, in_predicate_checker, + _is_runtime_filter_predicate, in_predicate_checker, eq_predicate_checker); } } @@ -1329,22 +1296,22 @@ Status VScanNode::try_append_late_arrival_runtime_filter(int* arrived_rf_num) { } // 1. Check if are runtime filter ready but not applied. - std::vector vexprs; + VExprSPtrs exprs; int current_arrived_rf_num = 0; for (size_t i = 0; i < _runtime_filter_descs.size(); ++i) { if (_runtime_filter_ctxs[i].apply_mark) { ++current_arrived_rf_num; continue; } else if (_runtime_filter_ctxs[i].runtime_filter->is_ready()) { - _runtime_filter_ctxs[i].runtime_filter->get_prepared_vexprs(&vexprs, _row_descriptor, - _state); + _runtime_filter_ctxs[i].runtime_filter->get_prepared_exprs(&exprs, _row_descriptor, + _state); ++current_arrived_rf_num; _runtime_filter_ctxs[i].apply_mark = true; } } // 2. Append unapplied runtime filters to vconjunct_ctx_ptr - if (!vexprs.empty()) { - RETURN_IF_ERROR(_append_rf_into_conjuncts(vexprs)); + if (!exprs.empty()) { + RETURN_IF_ERROR(_append_rf_into_conjuncts(exprs)); } if (current_arrived_rf_num == _runtime_filter_descs.size()) { _is_all_rf_applied = true; @@ -1354,10 +1321,13 @@ Status VScanNode::try_append_late_arrival_runtime_filter(int* arrived_rf_num) { return Status::OK(); } -Status VScanNode::clone_vconjunct_ctx(VExprContext** _vconjunct_ctx) { - if (_vconjunct_ctx_ptr) { +Status VScanNode::clone_conjunct_ctxs(VExprContextSPtrs& conjuncts) { + if (!_conjuncts.empty()) { std::unique_lock l(_rf_locks); - return _vconjunct_ctx_ptr->clone(_state, _vconjunct_ctx); + conjuncts.resize(_conjuncts.size()); + for (size_t i = 0; i != _conjuncts.size(); ++i) { + RETURN_IF_ERROR(_conjuncts[i]->clone(_state, conjuncts[i])); + } } return Status::OK(); } diff --git a/be/src/vec/exec/scan/vscan_node.h b/be/src/vec/exec/scan/vscan_node.h index 9eafb61289..e4c2fb6118 100644 --- a/be/src/vec/exec/scan/vscan_node.h +++ b/be/src/vec/exec/scan/vscan_node.h @@ -144,8 +144,8 @@ public: // Return num of filters which are applied already. Status try_append_late_arrival_runtime_filter(int* arrived_rf_num); - // Clone current vconjunct_ctx to _vconjunct_ctx, if exists. - Status clone_vconjunct_ctx(VExprContext** _vconjunct_ctx); + // Clone current _conjuncts to conjuncts, if exists. + Status clone_conjunct_ctxs(VExprContextSPtrs& conjuncts); int runtime_filter_num() const { return (int)_runtime_filter_ctxs.size(); } @@ -269,7 +269,7 @@ protected: // Set to true if the runtime filter is ready. std::vector _runtime_filter_ready_flag; doris::Mutex _rf_locks; - phmap::flat_hash_set _rf_vexpr_set; + phmap::flat_hash_set _rf_vexpr_set; // True means all runtime filters are applied to scanners bool _is_all_rf_applied = true; @@ -321,8 +321,8 @@ protected: // Every time vconjunct_ctx_ptr is updated, the old ctx will be stored in this vector // so that it will be destroyed uniformly at the end of the query. - std::vector _stale_vexpr_ctxs; - VExprContext* _common_vexpr_ctxs_pushdown = nullptr; + VExprContextSPtrs _stale_expr_ctxs; + VExprContextSPtrs _common_expr_ctxs_push_down; // If sort info is set, push limit to each scanner; int64_t _limit_per_scanner = -1; @@ -373,10 +373,11 @@ private: // Get all arrived runtime filters at Open phase. Status _acquire_runtime_filter(bool wait = true); // Append late-arrival runtime filters to the vconjunct_ctx. - Status _append_rf_into_conjuncts(std::vector& vexprs); + Status _append_rf_into_conjuncts(const VExprSPtrs& vexprs); Status _normalize_conjuncts(); - Status _normalize_predicate(VExpr* conjunct_expr_root, VExpr** output_expr); + Status _normalize_predicate(const VExprSPtr& conjunct_expr_root, VExprContext* context, + VExprSPtr& output_expr); Status _eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, PushDownType* pdt); Status _normalize_bloom_filter(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, @@ -388,10 +389,11 @@ private: Status _normalize_function_filters(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, PushDownType* pdt); - bool _is_predicate_acting_on_slot(VExpr* expr, - const std::function&, - const VSlotRef**, VExpr**)>& checker, - SlotDescriptor** slot_desc, ColumnValueRangeType** range); + bool _is_predicate_acting_on_slot( + VExpr* expr, + const std::function&, VExprSPtr&)>& + checker, + SlotDescriptor** slot_desc, ColumnValueRangeType** range); template Status _normalize_in_and_eq_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx, @@ -410,9 +412,9 @@ private: Status _normalize_compound_predicate( vectorized::VExpr* expr, VExprContext* expr_ctx, PushDownType* pdt, bool is_runtimer_filter_predicate, - const std::function&, const VSlotRef**, VExpr**)>& + const std::function&, VExprSPtr&)>& in_predicate_checker, - const std::function&, const VSlotRef**, VExpr**)>& + const std::function&, VExprSPtr&)>& eq_predicate_checker); template diff --git a/be/src/vec/exec/scan/vscanner.cpp b/be/src/vec/exec/scan/vscanner.cpp index f3144c8c62..d7c6d20aa0 100644 --- a/be/src/vec/exec/scan/vscanner.cpp +++ b/be/src/vec/exec/scan/vscanner.cpp @@ -40,11 +40,14 @@ VScanner::VScanner(RuntimeState* state, VScanNode* parent, int64_t limit, Runtim _is_load = (_input_tuple_desc != nullptr); } -Status VScanner::prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr) { - if (vconjunct_ctx_ptr != nullptr) { - // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR(vconjunct_ctx_ptr->clone(_state, &_vconjunct_ctx)); +Status VScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) { + if (!conjuncts.empty()) { + _conjuncts.resize(conjuncts.size()); + for (size_t i = 0; i != conjuncts.size(); ++i) { + RETURN_IF_ERROR(conjuncts[i]->clone(state, _conjuncts[i])); + } } + return Status::OK(); } @@ -107,7 +110,7 @@ Status VScanner::get_block(RuntimeState* state, Block* block, bool* eof) { Status VScanner::_filter_output_block(Block* block) { auto old_rows = block->rows(); - Status st = VExprContext::filter_block(_vconjunct_ctx, block, block->columns()); + Status st = VExprContext::filter_block(_conjuncts, block, block->columns()); _counter.num_rows_unselected += old_rows - block->rows(); return st; } @@ -127,13 +130,13 @@ Status VScanner::try_append_late_arrival_runtime_filter() { } // There are newly arrived runtime filters, - // renew the vconjunct_ctx_ptr - if (_vconjunct_ctx) { + // renew the _conjuncts + if (!_conjuncts.empty()) { _discard_conjuncts(); } // Notice that the number of runtime filters may be larger than _applied_rf_num. // But it is ok because it will be updated at next time. - RETURN_IF_ERROR(_parent->clone_vconjunct_ctx(&_vconjunct_ctx)); + RETURN_IF_ERROR(_parent->clone_conjunct_ctxs(_conjuncts)); _applied_rf_num = arrived_rf_num; return Status::OK(); } @@ -142,14 +145,16 @@ Status VScanner::close(RuntimeState* state) { if (_is_closed) { return Status::OK(); } - for (auto& ctx : _stale_vexpr_ctxs) { + for (auto& ctx : _stale_expr_ctxs) { ctx->close(state); } - if (_vconjunct_ctx) { - _vconjunct_ctx->close(state); + + for (auto& conjunct : _conjuncts) { + conjunct->close(state); } - if (_common_vexpr_ctxs_pushdown) { - _common_vexpr_ctxs_pushdown->close(state); + + for (auto& ctx : _common_expr_ctxs_push_down) { + ctx->close(state); } COUNTER_UPDATE(_parent->_scanner_wait_worker_timer, _scanner_wait_worker_timer); diff --git a/be/src/vec/exec/scan/vscanner.h b/be/src/vec/exec/scan/vscanner.h index 4fcc019fdc..bba93dec47 100644 --- a/be/src/vec/exec/scan/vscanner.h +++ b/be/src/vec/exec/scan/vscanner.h @@ -77,7 +77,7 @@ protected: Status _filter_output_block(Block* block); // Not virtual, all child will call this method explictly - Status prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr); + Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); public: VScanNode* get_parent() { return _parent; } @@ -126,8 +126,6 @@ public: void set_status_on_failure(const Status& st) { _status = st; } - VExprContext** vconjunct_ctx_ptr() { return &_vconjunct_ctx; } - // return false if _is_counted_down is already true, // otherwise, set _is_counted_down to true and return true. bool set_counted_down() { @@ -140,10 +138,10 @@ public: protected: void _discard_conjuncts() { - if (_vconjunct_ctx) { - _stale_vexpr_ctxs.push_back(_vconjunct_ctx); - _vconjunct_ctx = nullptr; + for (auto& conjunct : _conjuncts) { + _stale_expr_ctxs.emplace_back(conjunct); } + _conjuncts.clear(); } protected: @@ -171,14 +169,15 @@ protected: // means all runtime filters are arrived and applied. int _applied_rf_num = 0; int _total_rf_num = 0; - // Cloned from _vconjunct_ctx of scan node. + // Cloned from _conjuncts of scan node. // It includes predicate in SQL and runtime filters. - VExprContext* _vconjunct_ctx = nullptr; - VExprContext* _common_vexpr_ctxs_pushdown = nullptr; - // Late arriving runtime filters will update _vconjunct_ctx. - // The old _vconjunct_ctx will be temporarily placed in _stale_vexpr_ctxs + VExprContextSPtrs _conjuncts; + + VExprContextSPtrs _common_expr_ctxs_push_down; + // Late arriving runtime filters will update _conjuncts. + // The old _conjuncts will be temporarily placed in _stale_expr_ctxs // and will be destroyed at the end. - std::vector _stale_vexpr_ctxs; + VExprContextSPtrs _stale_expr_ctxs; // num of rows read from scanner int64_t _num_rows_read = 0; diff --git a/be/src/vec/exec/vaggregation_node.cpp b/be/src/vec/exec/vaggregation_node.cpp index 1ebb0721b7..e4f2074dce 100644 --- a/be/src/vec/exec/vaggregation_node.cpp +++ b/be/src/vec/exec/vaggregation_node.cpp @@ -147,8 +147,7 @@ AggregationNode::~AggregationNode() = default; Status AggregationNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(ExecNode::init(tnode, state)); // ignore return status for now , so we need to introduce ExecNode::init() - RETURN_IF_ERROR( - VExpr::create_expr_trees(_pool, tnode.agg_node.grouping_exprs, &_probe_expr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(tnode.agg_node.grouping_exprs, _probe_expr_ctxs)); // init aggregate functions _aggregate_evaluators.reserve(tnode.agg_node.aggregate_functions.size()); @@ -181,7 +180,7 @@ Status AggregationNode::init(const TPlanNode& tnode, RuntimeState* state) { return Status::OK(); } -void AggregationNode::_init_hash_method(std::vector& probe_exprs) { +void AggregationNode::_init_hash_method(const VExprContextSPtrs& probe_exprs) { DCHECK(probe_exprs.size() >= 1); if (probe_exprs.size() == 1) { auto is_nullable = probe_exprs[0]->root()->is_nullable(); @@ -262,8 +261,8 @@ void AggregationNode::_init_hash_method(std::vector& probe_exprs) _probe_key_sz.resize(_probe_expr_ctxs.size()); for (int i = 0; i < _probe_expr_ctxs.size(); ++i) { - const auto vexpr = _probe_expr_ctxs[i]->root(); - const auto& data_type = vexpr->data_type(); + const auto& expr = _probe_expr_ctxs[i]->root(); + const auto& data_type = expr->data_type(); if (!data_type->have_maximum_size_of_value()) { use_fixed_key = false; @@ -473,9 +472,9 @@ Status AggregationNode::prepare_profile(RuntimeState* state) { std::bind(&AggregationNode::_update_memusage_with_serialized_key, this); _executor.close = std::bind(&AggregationNode::_close_with_serialized_key, this); - _should_limit_output = _limit != -1 && // has limit - _vconjunct_ctx_ptr == nullptr && // no having conjunct - _needs_finalize; // agg's finalize step + _should_limit_output = _limit != -1 && // has limit + _conjuncts.empty() && // no having conjunct + _needs_finalize; // agg's finalize step } return Status::OK(); @@ -578,7 +577,7 @@ Status AggregationNode::pull(doris::RuntimeState* state, vectorized::Block* bloc RETURN_IF_ERROR(_executor.get_result(state, block, eos)); _make_nullable_output_key(block); // dispose the having clause, should not be execute in prestreaming agg - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, block, block->columns())); reached_limit(block, eos); return Status::OK(); diff --git a/be/src/vec/exec/vaggregation_node.h b/be/src/vec/exec/vaggregation_node.h index 4a62131e23..a0f95dd05c 100644 --- a/be/src/vec/exec/vaggregation_node.h +++ b/be/src/vec/exec/vaggregation_node.h @@ -889,7 +889,7 @@ private: friend class pipeline::AggSourceOperator; friend class pipeline::StreamingAggSourceOperator; // group by k1,k2 - std::vector _probe_expr_ctxs; + VExprContextSPtrs _probe_expr_ctxs; // left / full join will change the key nullable make output/input solt // nullable diff. so we need make nullable of it. std::vector _make_nullable_keys; @@ -999,7 +999,7 @@ private: Status _merge_with_serialized_key(Block* block); void _update_memusage_with_serialized_key(); void _close_with_serialized_key(); - void _init_hash_method(std::vector& probe_exprs); + void _init_hash_method(const VExprContextSPtrs& probe_exprs); template void _pre_serialize_key_if_need(AggState& state, AggMethod& agg_method, @@ -1072,7 +1072,7 @@ private: CHECK(ctxs.size() == 1 && ctxs[0]->root()->is_slot_ref()) << "input_exprs_ctxs is invalid, input_exprs_ctx[0]=" << ctxs[0]->root()->debug_string(); - return ((VSlotRef*)ctxs[0]->root())->column_id(); + return ((VSlotRef*)ctxs[0]->root().get())->column_id(); } template diff --git a/be/src/vec/exec/vanalytic_eval_node.cpp b/be/src/vec/exec/vanalytic_eval_node.cpp index 528a624432..cef242e688 100644 --- a/be/src/vec/exec/vanalytic_eval_node.cpp +++ b/be/src/vec/exec/vanalytic_eval_node.cpp @@ -138,10 +138,9 @@ Status VAnalyticEvalNode::init(const TPlanNode& tnode, RuntimeState* state) { _agg_intput_columns[i].resize(desc.nodes[0].num_children); for (int j = 0; j < desc.nodes[0].num_children; ++j) { ++node_idx; - VExpr* expr = nullptr; - VExprContext* ctx = nullptr; - RETURN_IF_ERROR( - VExpr::create_tree_from_thrift(_pool, desc.nodes, &node_idx, &expr, &ctx)); + VExprSPtr expr; + VExprContextSPtr ctx; + RETURN_IF_ERROR(VExpr::create_tree_from_thrift(desc.nodes, &node_idx, expr, ctx)); _agg_expr_ctxs[i].emplace_back(ctx); } @@ -154,10 +153,9 @@ Status VAnalyticEvalNode::init(const TPlanNode& tnode, RuntimeState* state) { } } - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, analytic_node.partition_exprs, - &_partition_by_eq_expr_ctxs)); RETURN_IF_ERROR( - VExpr::create_expr_trees(_pool, analytic_node.order_by_exprs, &_order_by_eq_expr_ctxs)); + VExpr::create_expr_trees(analytic_node.partition_exprs, _partition_by_eq_expr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(analytic_node.order_by_exprs, _order_by_eq_expr_ctxs)); _partition_by_column_idxs.resize(_partition_by_eq_expr_ctxs.size()); _ordey_by_column_idxs.resize(_order_by_eq_expr_ctxs.size()); _agg_functions_size = _agg_functions.size(); @@ -286,8 +284,7 @@ Status VAnalyticEvalNode::pull(doris::RuntimeState* /*state*/, vectorized::Block } } RETURN_IF_ERROR(_output_current_block(output_block)); - RETURN_IF_ERROR( - VExprContext::filter_block(_vconjunct_ctx_ptr, output_block, output_block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, output_block, output_block->columns())); reached_limit(output_block, eos); return Status::OK(); } @@ -345,7 +342,7 @@ Status VAnalyticEvalNode::get_next(RuntimeState* state, vectorized::Block* block } } RETURN_IF_ERROR(_output_current_block(block)); - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, block, block->columns())); reached_limit(block, eos); return Status::OK(); } @@ -602,8 +599,9 @@ Status VAnalyticEvalNode::sink(doris::RuntimeState* /*state*/, vectorized::Block return Status::OK(); } -Status VAnalyticEvalNode::_insert_range_column(vectorized::Block* block, VExprContext* expr, - IColumn* dst_column, size_t length) { +Status VAnalyticEvalNode::_insert_range_column(vectorized::Block* block, + const VExprContextSPtr& expr, IColumn* dst_column, + size_t length) { int result_col_id = -1; RETURN_IF_ERROR(expr->execute(block, &result_col_id)); DCHECK_GE(result_col_id, 0); diff --git a/be/src/vec/exec/vanalytic_eval_node.h b/be/src/vec/exec/vanalytic_eval_node.h index ef344fdcc8..bd08ef2dca 100644 --- a/be/src/vec/exec/vanalytic_eval_node.h +++ b/be/src/vec/exec/vanalytic_eval_node.h @@ -37,6 +37,7 @@ #include "vec/common/arena.h" #include "vec/core/block.h" #include "vec/data_types/data_type.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class DescriptorTbl; @@ -44,9 +45,6 @@ class ObjectPool; class RuntimeState; class TupleDescriptor; -namespace vectorized { -class VExprContext; -} // namespace vectorized } // namespace doris namespace doris::vectorized { @@ -102,8 +100,8 @@ private: Status _init_result_columns(); Status _create_agg_status(); Status _destroy_agg_status(); - Status _insert_range_column(vectorized::Block* block, VExprContext* expr, IColumn* dst_column, - size_t length); + Status _insert_range_column(vectorized::Block* block, const VExprContextSPtr& expr, + IColumn* dst_column, size_t length); void _update_order_by_range(); bool _init_next_partition(BlockRowPos found_partition_end); @@ -140,9 +138,9 @@ private: std::vector _input_blocks; std::vector input_block_first_row_positions; std::vector _agg_functions; - std::vector> _agg_expr_ctxs; - std::vector _partition_by_eq_expr_ctxs; - std::vector _order_by_eq_expr_ctxs; + std::vector _agg_expr_ctxs; + VExprContextSPtrs _partition_by_eq_expr_ctxs; + VExprContextSPtrs _order_by_eq_expr_ctxs; std::vector> _agg_intput_columns; std::vector _result_window_columns; diff --git a/be/src/vec/exec/vdata_gen_scan_node.cpp b/be/src/vec/exec/vdata_gen_scan_node.cpp index 9fccc16aaf..367bdb8499 100644 --- a/be/src/vec/exec/vdata_gen_scan_node.cpp +++ b/be/src/vec/exec/vdata_gen_scan_node.cpp @@ -102,7 +102,7 @@ Status VDataGenFunctionScanNode::get_next(RuntimeState* state, vectorized::Block } RETURN_IF_CANCELLED(state); Status res = _table_func->get_next(state, block, eos); - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, block, block->columns())); reached_limit(block, eos); return res; } diff --git a/be/src/vec/exec/vjdbc_connector.cpp b/be/src/vec/exec/vjdbc_connector.cpp index d8e5405395..613c76f8f8 100644 --- a/be/src/vec/exec/vjdbc_connector.cpp +++ b/be/src/vec/exec/vjdbc_connector.cpp @@ -715,8 +715,7 @@ Status JdbcConnector::exec_write_sql(const std::u16string& insert_stmt, return Status::OK(); } -Status JdbcConnector::exec_stmt_write( - Block* block, const std::vector& output_vexpr_ctxs) { +Status JdbcConnector::exec_stmt_write(Block* block, const VExprContextSPtrs& output_vexpr_ctxs) { SCOPED_TIMER(_result_send_timer); JNIEnv* env = nullptr; RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); diff --git a/be/src/vec/exec/vjdbc_connector.h b/be/src/vec/exec/vjdbc_connector.h index d113cceb6e..38c71d0143 100644 --- a/be/src/vec/exec/vjdbc_connector.h +++ b/be/src/vec/exec/vjdbc_connector.h @@ -78,8 +78,7 @@ public: Status exec_write_sql(const std::u16string& insert_stmt, const fmt::memory_buffer& insert_stmt_buffer) override; - Status exec_stmt_write(Block* block, - const std::vector& output_vexpr_ctxs); + Status exec_stmt_write(Block* block, const VExprContextSPtrs& output_vexpr_ctxs); Status get_next(bool* eos, std::vector& columns, Block* block, int batch_size); diff --git a/be/src/vec/exec/vpartition_sort_node.cpp b/be/src/vec/exec/vpartition_sort_node.cpp index cb3b199285..f52e8cb678 100644 --- a/be/src/vec/exec/vpartition_sort_node.cpp +++ b/be/src/vec/exec/vpartition_sort_node.cpp @@ -55,8 +55,8 @@ Status VPartitionSortNode::init(const TPlanNode& tnode, RuntimeState* state) { } //partition by key if (tnode.partition_sort_node.__isset.partition_exprs) { - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, tnode.partition_sort_node.partition_exprs, - &_partition_expr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(tnode.partition_sort_node.partition_exprs, + _partition_expr_ctxs)); _partition_exprs_num = _partition_expr_ctxs.size(); _partition_columns.resize(_partition_exprs_num); } diff --git a/be/src/vec/exec/vpartition_sort_node.h b/be/src/vec/exec/vpartition_sort_node.h index 0b24ce8378..4aae4a7acb 100644 --- a/be/src/vec/exec/vpartition_sort_node.h +++ b/be/src/vec/exec/vpartition_sort_node.h @@ -353,7 +353,7 @@ private: std::unique_ptr _agg_arena_pool; // partition by k1,k2 int _partition_exprs_num = 0; - std::vector _partition_expr_ctxs; + VExprContextSPtrs _partition_expr_ctxs; std::vector _partition_columns; std::vector _partition_key_sz; std::vector _hash_values; diff --git a/be/src/vec/exec/vrepeat_node.cpp b/be/src/vec/exec/vrepeat_node.cpp index cb6a2278e6..8a67e5a90d 100644 --- a/be/src/vec/exec/vrepeat_node.cpp +++ b/be/src/vec/exec/vrepeat_node.cpp @@ -57,7 +57,7 @@ VRepeatNode::VRepeatNode(ObjectPool* pool, const TPlanNode& tnode, const Descrip Status VRepeatNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(ExecNode::init(tnode, state)); - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, tnode.repeat_node.exprs, &_expr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(tnode.repeat_node.exprs, _expr_ctxs)); return Status::OK(); } @@ -224,7 +224,7 @@ Status VRepeatNode::push(RuntimeState* state, vectorized::Block* input_block, bo if (input_block->rows() > 0) { _intermediate_block = Block::create_unique(); - for (auto expr : _expr_ctxs) { + for (auto& expr : _expr_ctxs) { int result_column_id = -1; RETURN_IF_ERROR(expr->execute(input_block, &result_column_id)); DCHECK(result_column_id != -1); diff --git a/be/src/vec/exec/vrepeat_node.h b/be/src/vec/exec/vrepeat_node.h index 4a2c242e6c..837b4c8aca 100644 --- a/be/src/vec/exec/vrepeat_node.h +++ b/be/src/vec/exec/vrepeat_node.h @@ -27,6 +27,7 @@ #include "common/global_types.h" #include "exec/exec_node.h" #include "vec/core/block.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { @@ -39,7 +40,6 @@ class SlotDescriptor; class TupleDescriptor; namespace vectorized { -class VExprContext; class VRepeatNode : public ExecNode { public: @@ -79,7 +79,7 @@ private: std::vector _output_slots; - std::vector _expr_ctxs; + VExprContextSPtrs _expr_ctxs; bool _child_eos; int _repeat_id_idx; }; diff --git a/be/src/vec/exec/vschema_scan_node.cpp b/be/src/vec/exec/vschema_scan_node.cpp index fd06f48ba6..ecc0fdb888 100644 --- a/be/src/vec/exec/vschema_scan_node.cpp +++ b/be/src/vec/exec/vschema_scan_node.cpp @@ -268,7 +268,7 @@ Status VSchemaScanNode::get_next(RuntimeState* state, vectorized::Block* block, *src_block.get_by_name(dest_slot_desc->col_name()).column, 0, src_block.rows()); } - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, block, _dest_tuple_desc->slots().size())); VLOG_ROW << "VSchemaScanNode output rows: " << src_block.rows(); src_block.clear(); diff --git a/be/src/vec/exec/vselect_node.cpp b/be/src/vec/exec/vselect_node.cpp index c8b61ec94a..ee1628cd19 100644 --- a/be/src/vec/exec/vselect_node.cpp +++ b/be/src/vec/exec/vselect_node.cpp @@ -75,8 +75,7 @@ Status VSelectNode::get_next(RuntimeState* state, vectorized::Block* block, bool Status VSelectNode::pull(RuntimeState* state, vectorized::Block* output_block, bool* eos) { RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR( - VExprContext::filter_block(_vconjunct_ctx_ptr, output_block, output_block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, output_block, output_block->columns())); reached_limit(output_block, eos); return Status::OK(); diff --git a/be/src/vec/exec/vset_operation_node.cpp b/be/src/vec/exec/vset_operation_node.cpp index d18ee55931..58b59c1b77 100644 --- a/be/src/vec/exec/vset_operation_node.cpp +++ b/be/src/vec/exec/vset_operation_node.cpp @@ -205,8 +205,8 @@ Status VSetOperationNode::init(const TPlanNode& tnode, RuntimeStat } for (auto& texprs : result_texpr_lists) { - std::vector ctxs; - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, texprs, &ctxs)); + VExprContextSPtrs ctxs; + RETURN_IF_ERROR(VExpr::create_expr_trees(texprs, ctxs)); _child_expr_lists.push_back(ctxs); } @@ -218,7 +218,7 @@ Status VSetOperationNode::init(const TPlanNode& tnode, RuntimeStat template Status VSetOperationNode::alloc_resource(RuntimeState* state) { // open result expr lists. - for (const std::vector& exprs : _child_expr_lists) { + for (const VExprContextSPtrs& exprs : _child_expr_lists) { RETURN_IF_ERROR(VExpr::open(exprs, state)); } _probe_columns.resize(_child_expr_lists[1].size()); @@ -431,8 +431,7 @@ Status VSetOperationNode::pull(RuntimeState* state, Block* output_ }, *_hash_table_variants); RETURN_IF_ERROR(st); - RETURN_IF_ERROR( - VExprContext::filter_block(_vconjunct_ctx_ptr, output_block, output_block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, output_block, output_block->columns())); reached_limit(output_block, eos); return Status::OK(); } diff --git a/be/src/vec/exec/vset_operation_node.h b/be/src/vec/exec/vset_operation_node.h index f4c9e0c870..39a74f899e 100644 --- a/be/src/vec/exec/vset_operation_node.h +++ b/be/src/vec/exec/vset_operation_node.h @@ -104,7 +104,7 @@ private: int64_t _valid_element_in_hash_tbl; //The i-th result expr list refers to the i-th child. - std::vector> _child_expr_lists; + std::vector _child_expr_lists; //record build column type DataTypes _left_table_data_types; //first:column_id, could point to origin column or cast column diff --git a/be/src/vec/exec/vtable_function_node.cpp b/be/src/vec/exec/vtable_function_node.cpp index ce80dbb931..d3e967afcc 100644 --- a/be/src/vec/exec/vtable_function_node.cpp +++ b/be/src/vec/exec/vtable_function_node.cpp @@ -48,15 +48,15 @@ Status VTableFunctionNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(ExecNode::init(tnode, state)); for (const TExpr& texpr : tnode.table_function_node.fnCallExprList) { - VExprContext* ctx = nullptr; - RETURN_IF_ERROR(VExpr::create_expr_tree(_pool, texpr, &ctx)); + VExprContextSPtr ctx; + RETURN_IF_ERROR(VExpr::create_expr_tree(texpr, ctx)); _vfn_ctxs.push_back(ctx); - VExpr* root = ctx->root(); + auto root = ctx->root(); const std::string& tf_name = root->fn().name.function_name; TableFunction* fn = nullptr; RETURN_IF_ERROR(TableFunctionFactory::get_fn(tf_name, _pool, &fn)); - fn->set_vexpr_context(ctx); + fn->set_expr_context(ctx); _fns.push_back(fn); } _fn_num = _fns.size(); @@ -236,8 +236,7 @@ Status VTableFunctionNode::_get_expanded_block(RuntimeState* state, Block* outpu } // 3. eval conjuncts - RETURN_IF_ERROR( - VExprContext::filter_block(_vconjunct_ctx_ptr, output_block, output_block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, output_block, output_block->columns())); *eos = _child_eos && _cur_child_offset == -1; return Status::OK(); diff --git a/be/src/vec/exec/vtable_function_node.h b/be/src/vec/exec/vtable_function_node.h index 1a4f1438c3..d2ca9589c2 100644 --- a/be/src/vec/exec/vtable_function_node.h +++ b/be/src/vec/exec/vtable_function_node.h @@ -145,7 +145,7 @@ private: std::vector _output_slots; int64_t _cur_child_offset = 0; - std::vector _vfn_ctxs; + VExprContextSPtrs _vfn_ctxs; std::vector _fns; int _fn_num = 0; diff --git a/be/src/vec/exec/vunion_node.cpp b/be/src/vec/exec/vunion_node.cpp index 6bee8ec6a5..a2fef62076 100644 --- a/be/src/vec/exec/vunion_node.cpp +++ b/be/src/vec/exec/vunion_node.cpp @@ -63,15 +63,15 @@ Status VUnionNode::init(const TPlanNode& tnode, RuntimeState* state) { // Create const_expr_ctx_lists_ from thrift exprs. auto& const_texpr_lists = tnode.union_node.const_expr_lists; for (auto& texprs : const_texpr_lists) { - std::vector ctxs; - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, texprs, &ctxs)); + VExprContextSPtrs ctxs; + RETURN_IF_ERROR(VExpr::create_expr_trees(texprs, ctxs)); _const_expr_lists.push_back(ctxs); } // Create result_expr_ctx_lists_ from thrift exprs. auto& result_texpr_lists = tnode.union_node.result_expr_lists; for (auto& texprs : result_texpr_lists) { - std::vector ctxs; - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, texprs, &ctxs)); + VExprContextSPtrs ctxs; + RETURN_IF_ERROR(VExpr::create_expr_trees(texprs, ctxs)); _child_expr_lists.push_back(ctxs); } return Status::OK(); @@ -83,7 +83,7 @@ Status VUnionNode::prepare(RuntimeState* state) { _materialize_exprs_evaluate_timer = ADD_TIMER(_runtime_profile, "MaterializeExprsEvaluateTimer"); // Prepare const expr lists. - for (const std::vector& exprs : _const_expr_lists) { + for (const VExprContextSPtrs& exprs : _const_expr_lists) { RETURN_IF_ERROR(VExpr::prepare(exprs, state, _row_descriptor)); } @@ -107,11 +107,11 @@ Status VUnionNode::open(RuntimeState* state) { Status VUnionNode::alloc_resource(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); // open const expr lists. - for (const std::vector& exprs : _const_expr_lists) { + for (const auto& exprs : _const_expr_lists) { RETURN_IF_ERROR(VExpr::open(exprs, state)); } // open result expr lists. - for (const std::vector& exprs : _child_expr_lists) { + for (const auto& exprs : _child_expr_lists) { RETURN_IF_ERROR(VExpr::open(exprs, state)); } return ExecNode::alloc_resource(state); @@ -297,7 +297,7 @@ Status VUnionNode::get_next(RuntimeState* state, Block* block, bool* eos) { } else if (has_more_const(state)) { RETURN_IF_ERROR(get_next_const(state, block)); } - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, block->columns())); + RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, block, block->columns())); *eos = (!has_more_passthrough() && !has_more_materialized() && !has_more_const(state)); reached_limit(block, eos); @@ -339,7 +339,7 @@ void VUnionNode::debug_string(int indentation_level, std::stringstream* out) con } Status VUnionNode::materialize_block(Block* src_block, int child_idx, Block* res_block) { - const std::vector& child_exprs = _child_expr_lists[child_idx]; + const auto& child_exprs = _child_expr_lists[child_idx]; ColumnsWithTypeAndName colunms; for (size_t i = 0; i < child_exprs.size(); ++i) { int result_column_id = -1; diff --git a/be/src/vec/exec/vunion_node.h b/be/src/vec/exec/vunion_node.h index 79ef72106c..c25bb07102 100644 --- a/be/src/vec/exec/vunion_node.h +++ b/be/src/vec/exec/vunion_node.h @@ -28,6 +28,7 @@ #include "runtime/runtime_state.h" #include "util/runtime_profile.h" #include "vec/core/block.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class DescriptorTbl; @@ -35,7 +36,6 @@ class ObjectPool; class TPlanNode; namespace vectorized { -class VExprContext; class VUnionNode final : public ExecNode { public: @@ -67,10 +67,10 @@ public: private: /// Const exprs materialized by this node. These exprs don't refer to any children. /// Only materialized by the first fragment instance to avoid duplication. - std::vector> _const_expr_lists; + std::vector _const_expr_lists; /// Exprs materialized by this node. The i-th result expr list refers to the i-th child. - std::vector> _child_expr_lists; + std::vector _child_expr_lists; /// Index of the first non-passthrough child; i.e. a child that needs materialization. /// 0 when all children are materialized, '_children.size()' when no children are /// materialized. @@ -106,7 +106,7 @@ private: /// have been consumed from the current child block. Updates '_child_row_idx'. Status materialize_block(Block* dst_block, int child_idx, Block* res_block); - Status get_error_msg(const std::vector& exprs); + Status get_error_msg(const VExprContextSPtrs& exprs); /// Returns true if the child at 'child_idx' can be passed through. bool is_child_passthrough(int child_idx) const { diff --git a/be/src/vec/exprs/lambda_function/lambda_function.h b/be/src/vec/exprs/lambda_function/lambda_function.h index a7f2fb8f69..184b4c2cc3 100644 --- a/be/src/vec/exprs/lambda_function/lambda_function.h +++ b/be/src/vec/exprs/lambda_function/lambda_function.h @@ -33,7 +33,7 @@ public: virtual doris::Status execute(VExprContext* context, doris::vectorized::Block* block, int* result_column_id, const DataTypePtr& result_type, - const std::vector& children) = 0; + const VExprSPtrs& children) = 0; }; using LambdaFunctionPtr = std::shared_ptr; diff --git a/be/src/vec/exprs/lambda_function/varray_filter_function.cpp b/be/src/vec/exprs/lambda_function/varray_filter_function.cpp index 9fae6ed634..0a77ebd673 100644 --- a/be/src/vec/exprs/lambda_function/varray_filter_function.cpp +++ b/be/src/vec/exprs/lambda_function/varray_filter_function.cpp @@ -61,7 +61,7 @@ public: doris::Status execute(VExprContext* context, doris::vectorized::Block* block, int* result_column_id, const DataTypePtr& result_type, - const std::vector& children) override { + const VExprSPtrs& children) override { ///* array_filter(array, array) */// //1. child[0:end]->execute(src_block) diff --git a/be/src/vec/exprs/lambda_function/varray_map_function.cpp b/be/src/vec/exprs/lambda_function/varray_map_function.cpp index 2f57ed8976..67bd6bf4aa 100644 --- a/be/src/vec/exprs/lambda_function/varray_map_function.cpp +++ b/be/src/vec/exprs/lambda_function/varray_map_function.cpp @@ -61,7 +61,7 @@ public: doris::Status execute(VExprContext* context, doris::vectorized::Block* block, int* result_column_id, const DataTypePtr& result_type, - const std::vector& children) override { + const VExprSPtrs& children) override { ///* array_map(lambda,arg1,arg2,.....) */// //1. child[1:end]->execute(src_block) diff --git a/be/src/vec/exprs/table_function/table_function.h b/be/src/vec/exprs/table_function/table_function.h index 7fff88899b..4b31a681b1 100644 --- a/be/src/vec/exprs/table_function/table_function.h +++ b/be/src/vec/exprs/table_function/table_function.h @@ -81,7 +81,7 @@ public: std::string name() const { return _fn_name; } bool eos() const { return _eos; } - void set_vexpr_context(VExprContext* vexpr_context) { _vexpr_context = vexpr_context; } + void set_expr_context(const VExprContextSPtr& expr_context) { _expr_context = expr_context; } void set_nullable() { _is_nullable = true; } bool is_outer() const { return _is_outer; } @@ -97,7 +97,7 @@ public: protected: std::string _fn_name; - VExprContext* _vexpr_context = nullptr; + VExprContextSPtr _expr_context = nullptr; // true if there is no more data can be read from this function. bool _eos = false; // the position of current cursor diff --git a/be/src/vec/exprs/table_function/vexplode.cpp b/be/src/vec/exprs/table_function/vexplode.cpp index 8c17ceeb25..1340ff74bb 100644 --- a/be/src/vec/exprs/table_function/vexplode.cpp +++ b/be/src/vec/exprs/table_function/vexplode.cpp @@ -37,13 +37,13 @@ VExplodeTableFunction::VExplodeTableFunction() { } Status VExplodeTableFunction::process_init(Block* block) { - CHECK(_vexpr_context->root()->children().size() == 1) + CHECK(_expr_context->root()->children().size() == 1) << "VExplodeTableFunction only support 1 child but has " - << _vexpr_context->root()->children().size(); + << _expr_context->root()->children().size(); int value_column_idx = -1; - RETURN_IF_ERROR(_vexpr_context->root()->children()[0]->execute(_vexpr_context, block, - &value_column_idx)); + RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, + &value_column_idx)); _array_column = block->get_by_position(value_column_idx).column->convert_to_full_column_if_const(); diff --git a/be/src/vec/exprs/table_function/vexplode_bitmap.cpp b/be/src/vec/exprs/table_function/vexplode_bitmap.cpp index 2f717b7413..152566d00b 100644 --- a/be/src/vec/exprs/table_function/vexplode_bitmap.cpp +++ b/be/src/vec/exprs/table_function/vexplode_bitmap.cpp @@ -41,13 +41,13 @@ VExplodeBitmapTableFunction::VExplodeBitmapTableFunction() { } Status VExplodeBitmapTableFunction::process_init(Block* block) { - CHECK(_vexpr_context->root()->children().size() == 1) + CHECK(_expr_context->root()->children().size() == 1) << "VExplodeNumbersTableFunction must be have 1 children but have " - << _vexpr_context->root()->children().size(); + << _expr_context->root()->children().size(); int value_column_idx = -1; - RETURN_IF_ERROR(_vexpr_context->root()->children()[0]->execute(_vexpr_context, block, - &value_column_idx)); + RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, + &value_column_idx)); _value_column = block->get_by_position(value_column_idx).column; return Status::OK(); diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.cpp b/be/src/vec/exprs/table_function/vexplode_json_array.cpp index 96e6b10065..16f6c39bf7 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_array.cpp +++ b/be/src/vec/exprs/table_function/vexplode_json_array.cpp @@ -138,12 +138,12 @@ VExplodeJsonArrayTableFunction::VExplodeJsonArrayTableFunction(ExplodeJsonArrayT } Status VExplodeJsonArrayTableFunction::process_init(Block* block) { - CHECK(_vexpr_context->root()->children().size() == 1) - << _vexpr_context->root()->children().size(); + CHECK(_expr_context->root()->children().size() == 1) + << _expr_context->root()->children().size(); int text_column_idx = -1; - RETURN_IF_ERROR(_vexpr_context->root()->children()[0]->execute(_vexpr_context, block, - &text_column_idx)); + RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, + &text_column_idx)); _text_column = block->get_by_position(text_column_idx).column; return Status::OK(); diff --git a/be/src/vec/exprs/table_function/vexplode_numbers.cpp b/be/src/vec/exprs/table_function/vexplode_numbers.cpp index 022c0f13a5..fe450c8096 100644 --- a/be/src/vec/exprs/table_function/vexplode_numbers.cpp +++ b/be/src/vec/exprs/table_function/vexplode_numbers.cpp @@ -41,13 +41,13 @@ VExplodeNumbersTableFunction::VExplodeNumbersTableFunction() { } Status VExplodeNumbersTableFunction::process_init(Block* block) { - CHECK(_vexpr_context->root()->children().size() == 1) + CHECK(_expr_context->root()->children().size() == 1) << "VExplodeSplitTableFunction must be have 1 children but have " - << _vexpr_context->root()->children().size(); + << _expr_context->root()->children().size(); int value_column_idx = -1; - RETURN_IF_ERROR(_vexpr_context->root()->children()[0]->execute(_vexpr_context, block, - &value_column_idx)); + RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, + &value_column_idx)); _value_column = block->get_by_position(value_column_idx).column; if (is_column_const(*_value_column)) { _cur_size = 0; diff --git a/be/src/vec/exprs/table_function/vexplode_split.cpp b/be/src/vec/exprs/table_function/vexplode_split.cpp index e832fc43c1..7350419436 100644 --- a/be/src/vec/exprs/table_function/vexplode_split.cpp +++ b/be/src/vec/exprs/table_function/vexplode_split.cpp @@ -44,17 +44,17 @@ Status VExplodeSplitTableFunction::open() { } Status VExplodeSplitTableFunction::process_init(Block* block) { - CHECK(_vexpr_context->root()->children().size() == 2) + CHECK(_expr_context->root()->children().size() == 2) << "VExplodeSplitTableFunction must be have 2 children but have " - << _vexpr_context->root()->children().size(); + << _expr_context->root()->children().size(); int text_column_idx = -1; int delimiter_column_idx = -1; - RETURN_IF_ERROR(_vexpr_context->root()->children()[0]->execute(_vexpr_context, block, - &text_column_idx)); - RETURN_IF_ERROR(_vexpr_context->root()->children()[1]->execute(_vexpr_context, block, - &delimiter_column_idx)); + RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, + &text_column_idx)); + RETURN_IF_ERROR(_expr_context->root()->children()[1]->execute(_expr_context.get(), block, + &delimiter_column_idx)); // dispose test column _text_column = diff --git a/be/src/vec/exprs/varray_literal.cpp b/be/src/vec/exprs/varray_literal.cpp index 233c3538ab..a9c1a08e1a 100644 --- a/be/src/vec/exprs/varray_literal.cpp +++ b/be/src/vec/exprs/varray_literal.cpp @@ -47,9 +47,9 @@ Status VArrayLiteral::prepare(RuntimeState* state, const RowDescriptor& row_desc RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, row_desc, context)); bool is_null = (_node_type == TExprNodeType::NULL_LITERAL); Field array = is_null ? Field() : Array(); - for (const auto child : _children) { + for (auto& child : _children) { Field item; - auto child_literal = dynamic_cast(child); + auto child_literal = std::dynamic_pointer_cast(child); child_literal->get_column_ptr()->get(0, item); array.get().push_back(item); } diff --git a/be/src/vec/exprs/vbitmap_predicate.h b/be/src/vec/exprs/vbitmap_predicate.h index 366a44ce46..8b4e6e00b8 100644 --- a/be/src/vec/exprs/vbitmap_predicate.h +++ b/be/src/vec/exprs/vbitmap_predicate.h @@ -61,9 +61,7 @@ public: void close(doris::RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VBitmapPredicate::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VBitmapPredicate::create_shared(*this); } const std::string& expr_name() const override; diff --git a/be/src/vec/exprs/vbloom_predicate.h b/be/src/vec/exprs/vbloom_predicate.h index d3de55596b..aaef48a098 100644 --- a/be/src/vec/exprs/vbloom_predicate.h +++ b/be/src/vec/exprs/vbloom_predicate.h @@ -51,9 +51,7 @@ public: FunctionContext::FunctionStateScope scope) override; void close(doris::RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VBloomPredicate::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VBloomPredicate::create_shared(*this); } const std::string& expr_name() const override; void set_filter(std::shared_ptr& filter); diff --git a/be/src/vec/exprs/vcase_expr.cpp b/be/src/vec/exprs/vcase_expr.cpp index 5dc9eec6e8..f5f4172b6a 100644 --- a/be/src/vec/exprs/vcase_expr.cpp +++ b/be/src/vec/exprs/vcase_expr.cpp @@ -118,7 +118,7 @@ std::string VCaseExpr::debug_string() const { out << "CaseExpr(has_case_expr=" << _has_case_expr << " has_else_expr=" << _has_else_expr << " function=" << _function_name << "){"; bool first = true; - for (VExpr* input_expr : children()) { + for (auto& input_expr : children()) { if (first) { first = false; } else { diff --git a/be/src/vec/exprs/vcase_expr.h b/be/src/vec/exprs/vcase_expr.h index c34e8d782e..ece370b736 100644 --- a/be/src/vec/exprs/vcase_expr.h +++ b/be/src/vec/exprs/vcase_expr.h @@ -32,7 +32,6 @@ class TExprNode; namespace vectorized { class Block; -class VExprContext; } // namespace vectorized } // namespace doris @@ -52,9 +51,7 @@ public: FunctionContext::FunctionStateScope scope) override; virtual void close(RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - virtual VExpr* clone(ObjectPool* pool) const override { - return pool->add(VCaseExpr::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VCaseExpr::create_shared(*this); } virtual const std::string& expr_name() const override; virtual std::string debug_string() const override; diff --git a/be/src/vec/exprs/vcast_expr.cpp b/be/src/vec/exprs/vcast_expr.cpp index 65fa18e89b..57fc993608 100644 --- a/be/src/vec/exprs/vcast_expr.cpp +++ b/be/src/vec/exprs/vcast_expr.cpp @@ -118,7 +118,7 @@ std::string VCastExpr::debug_string() const { out << "CastExpr(CAST " << _cast_param_data_type->get_name() << " to " << _target_data_type->get_name() << "){"; bool first = true; - for (VExpr* input_expr : children()) { + for (auto& input_expr : children()) { if (first) { first = false; } else { diff --git a/be/src/vec/exprs/vcast_expr.h b/be/src/vec/exprs/vcast_expr.h index f4e59d1581..7589ab38d5 100644 --- a/be/src/vec/exprs/vcast_expr.h +++ b/be/src/vec/exprs/vcast_expr.h @@ -51,9 +51,7 @@ public: FunctionContext::FunctionStateScope scope) override; virtual void close(doris::RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - virtual VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VCastExpr::create_unique(*this).release()); - } + virtual VExprSPtr clone() const override { return VCastExpr::create_shared(*this); } virtual const std::string& expr_name() const override; virtual std::string debug_string() const override; diff --git a/be/src/vec/exprs/vcolumn_ref.h b/be/src/vec/exprs/vcolumn_ref.h index 4ea582839d..3e1a6252a3 100644 --- a/be/src/vec/exprs/vcolumn_ref.h +++ b/be/src/vec/exprs/vcolumn_ref.h @@ -53,9 +53,7 @@ public: return Status::OK(); } - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VColumnRef::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VColumnRef::create_shared(*this); } bool is_constant() const override { return false; } diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index 47f12fa13e..56e21ad082 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -38,19 +38,17 @@ inline std::string compound_operator_to_string(TExprOpcode::type op) { } } -class VcompoundPred : public VectorizedFnCall { - ENABLE_FACTORY_CREATOR(VcompoundPred); +class VCompoundPred : public VectorizedFnCall { + ENABLE_FACTORY_CREATOR(VCompoundPred); public: - VcompoundPred(const TExprNode& node) : VectorizedFnCall(node) { + VCompoundPred(const TExprNode& node) : VectorizedFnCall(node) { _op = node.opcode; _fn.name.function_name = compound_operator_to_string(_op); _expr_name = "VCompoundPredicate (" + _fn.name.function_name + ")"; } - VExpr* clone(ObjectPool* pool) const override { - return pool->add(VcompoundPred::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VCompoundPred::create_shared(*this); } const std::string& expr_name() const override { return _expr_name; } diff --git a/be/src/vec/exprs/vdirect_in_predicate.h b/be/src/vec/exprs/vdirect_in_predicate.h index 219dda17fc..2110c03324 100644 --- a/be/src/vec/exprs/vdirect_in_predicate.h +++ b/be/src/vec/exprs/vdirect_in_predicate.h @@ -63,9 +63,7 @@ public: return Status::OK(); } - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VDirectInPredicate::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VDirectInPredicate::create_shared(*this); } const std::string& expr_name() const override { return _expr_name; } diff --git a/be/src/vec/exprs/vectorized_agg_fn.cpp b/be/src/vec/exprs/vectorized_agg_fn.cpp index 6576fd1048..d13138fc56 100644 --- a/be/src/vec/exprs/vectorized_agg_fn.cpp +++ b/be/src/vec/exprs/vectorized_agg_fn.cpp @@ -82,9 +82,9 @@ Status AggFnEvaluator::create(ObjectPool* pool, const TExpr& desc, const TSortIn int node_idx = 0; for (int i = 0; i < desc.nodes[0].num_children; ++i) { ++node_idx; - VExpr* expr = nullptr; - VExprContext* ctx = nullptr; - RETURN_IF_ERROR(VExpr::create_tree_from_thrift(pool, desc.nodes, &node_idx, &expr, &ctx)); + VExprSPtr expr; + VExprContextSPtr ctx; + RETURN_IF_ERROR(VExpr::create_tree_from_thrift(desc.nodes, &node_idx, expr, ctx)); agg_fn_evaluator->_input_exprs_ctxs.push_back(ctx); } diff --git a/be/src/vec/exprs/vectorized_agg_fn.h b/be/src/vec/exprs/vectorized_agg_fn.h index bb2b354280..55d11b6925 100644 --- a/be/src/vec/exprs/vectorized_agg_fn.h +++ b/be/src/vec/exprs/vectorized_agg_fn.h @@ -28,6 +28,7 @@ #include "vec/aggregate_functions/aggregate_function.h" #include "vec/core/sort_description.h" #include "vec/data_types/data_type.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class RuntimeState; @@ -43,7 +44,6 @@ class Arena; class Block; class BufferWritable; class IColumn; -class VExprContext; class AggFnEvaluator { ENABLE_FACTORY_CREATOR(AggFnEvaluator); @@ -99,7 +99,7 @@ public: static std::string debug_string(const std::vector& exprs); std::string debug_string() const; bool is_merge() const { return _is_merge; } - const std::vector& input_exprs_ctxs() const { return _input_exprs_ctxs; } + const VExprContextSPtrs& input_exprs_ctxs() const { return _input_exprs_ctxs; } private: const TFunction _fn; @@ -123,7 +123,7 @@ private: RuntimeProfile::Counter* _expr_timer; // input context - std::vector _input_exprs_ctxs; + VExprContextSPtrs _input_exprs_ctxs; SortDescription _sort_description; diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index e3ea8a466c..636be579e7 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -176,7 +176,7 @@ std::string VectorizedFnCall::debug_string() const { out << _expr_name; out << "]{"; bool first = true; - for (VExpr* input_expr : children()) { + for (auto& input_expr : children()) { if (first) { first = false; } else { diff --git a/be/src/vec/exprs/vectorized_fn_call.h b/be/src/vec/exprs/vectorized_fn_call.h index 9fcd3563c9..2d64f9a341 100644 --- a/be/src/vec/exprs/vectorized_fn_call.h +++ b/be/src/vec/exprs/vectorized_fn_call.h @@ -51,9 +51,7 @@ public: FunctionContext::FunctionStateScope scope) override; void close(RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - VExpr* clone(ObjectPool* pool) const override { - return pool->add(VectorizedFnCall::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VectorizedFnCall::create_shared(*this); } const std::string& expr_name() const override; std::string debug_string() const override; bool is_constant() const override { diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index 9f66bba50f..593715cd62 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -127,9 +127,9 @@ void VExpr::close(RuntimeState* state, VExprContext* context, } } -Status VExpr::create_expr(ObjectPool* pool, const TExprNode& texpr_node, VExpr** expr) { +Status VExpr::create_expr(const doris::TExprNode& expr_node, VExprSPtr& expr) { try { - switch (texpr_node.node_type) { + switch (expr_node.node_type) { case TExprNodeType::BOOL_LITERAL: case TExprNodeType::INT_LITERAL: case TExprNodeType::LARGE_INT_LITERAL: @@ -139,90 +139,90 @@ Status VExpr::create_expr(ObjectPool* pool, const TExprNode& texpr_node, VExpr** case TExprNodeType::STRING_LITERAL: case TExprNodeType::JSON_LITERAL: case TExprNodeType::NULL_LITERAL: { - *expr = pool->add(VLiteral::create_unique(texpr_node).release()); + expr = VLiteral::create_shared(expr_node); break; } case TExprNodeType::ARRAY_LITERAL: { - *expr = pool->add(VArrayLiteral::create_unique(texpr_node).release()); + expr = VArrayLiteral::create_shared(expr_node); break; } case TExprNodeType::MAP_LITERAL: { - *expr = pool->add(VMapLiteral::create_unique(texpr_node).release()); + expr = VMapLiteral::create_shared(expr_node); break; } case TExprNodeType::STRUCT_LITERAL: { - *expr = pool->add(VStructLiteral::create_unique(texpr_node).release()); + expr = VStructLiteral::create_shared(expr_node); break; } - case TExprNodeType::SLOT_REF: { - *expr = pool->add(VSlotRef::create_unique(texpr_node).release()); + case doris::TExprNodeType::SLOT_REF: { + expr = VSlotRef::create_shared(expr_node); break; } - case TExprNodeType::COLUMN_REF: { - *expr = pool->add(VColumnRef::create_unique(texpr_node).release()); + case doris::TExprNodeType::COLUMN_REF: { + expr = VColumnRef::create_shared(expr_node); break; } - case TExprNodeType::COMPOUND_PRED: { - *expr = pool->add(VcompoundPred::create_unique(texpr_node).release()); + case doris::TExprNodeType::COMPOUND_PRED: { + expr = VCompoundPred::create_shared(expr_node); break; } - case TExprNodeType::LAMBDA_FUNCTION_EXPR: { - *expr = pool->add(VLambdaFunctionExpr::create_unique(texpr_node).release()); + case doris::TExprNodeType::LAMBDA_FUNCTION_EXPR: { + expr = VLambdaFunctionExpr::create_shared(expr_node); break; } - case TExprNodeType::LAMBDA_FUNCTION_CALL_EXPR: { - *expr = pool->add(VLambdaFunctionCallExpr::create_unique(texpr_node).release()); + case doris::TExprNodeType::LAMBDA_FUNCTION_CALL_EXPR: { + expr = VLambdaFunctionCallExpr::create_shared(expr_node); break; } - case TExprNodeType::ARITHMETIC_EXPR: - case TExprNodeType::BINARY_PRED: - case TExprNodeType::FUNCTION_CALL: - case TExprNodeType::COMPUTE_FUNCTION_CALL: - case TExprNodeType::MATCH_PRED: { - *expr = pool->add(VectorizedFnCall::create_unique(texpr_node).release()); + case doris::TExprNodeType::ARITHMETIC_EXPR: + case doris::TExprNodeType::BINARY_PRED: + case doris::TExprNodeType::FUNCTION_CALL: + case doris::TExprNodeType::COMPUTE_FUNCTION_CALL: + case doris::TExprNodeType::MATCH_PRED: { + expr = VectorizedFnCall::create_shared(expr_node); break; } - case TExprNodeType::CAST_EXPR: { - *expr = pool->add(VCastExpr::create_unique(texpr_node).release()); + case doris::TExprNodeType::CAST_EXPR: { + expr = VCastExpr::create_shared(expr_node); break; } - case TExprNodeType::IN_PRED: { - *expr = pool->add(VInPredicate::create_unique(texpr_node).release()); + case doris::TExprNodeType::IN_PRED: { + expr = VInPredicate::create_shared(expr_node); break; } - case TExprNodeType::CASE_EXPR: { - if (!texpr_node.__isset.case_expr) { + case doris::TExprNodeType::CASE_EXPR: { + if (!expr_node.__isset.case_expr) { return Status::InternalError("Case expression not set in thrift node"); } - *expr = pool->add(VCaseExpr::create_unique(texpr_node).release()); + expr = VCaseExpr::create_shared(expr_node); break; } case TExprNodeType::INFO_FUNC: { - *expr = pool->add(VInfoFunc::create_unique(texpr_node).release()); + expr = VInfoFunc::create_shared(expr_node); break; } case TExprNodeType::TUPLE_IS_NULL_PRED: { - *expr = pool->add(VTupleIsNullPredicate::create_unique(texpr_node).release()); + expr = VTupleIsNullPredicate::create_shared(expr_node); break; } case TExprNodeType::SCHEMA_CHANGE_EXPR: { - *expr = pool->add(VSchemaChangeExpr::create_unique(texpr_node).release()); + expr = VSchemaChangeExpr::create_shared(expr_node); break; } default: - return Status::InternalError("Unknown expr node type: {}", texpr_node.node_type); + return Status::InternalError("Unknown expr node type: {}", expr_node.node_type); } } catch (const Exception& e) { return Status::Error(e.code(), e.to_string()); } - if (!(*expr)->data_type()) { - return Status::InvalidArgument("Unknown expr type: {}", texpr_node.node_type); + if (!expr->data_type()) { + return Status::InvalidArgument("Unknown expr type: {}", expr_node.node_type); } return Status::OK(); } -Status VExpr::create_tree_from_thrift(ObjectPool* pool, const std::vector& nodes, - int* node_idx, VExpr** root_expr, VExprContext** ctx) { +Status VExpr::create_tree_from_thrift(const std::vector& nodes, int* node_idx, + VExprSPtr& root_expr, VExprContextSPtr& ctx) { // propagate error case if (*node_idx >= nodes.size()) { return Status::InternalError("Failed to reconstruct expression tree from thrift."); @@ -230,21 +230,18 @@ Status VExpr::create_tree_from_thrift(ObjectPool* pool, const std::vector(root); // short path for leaf node if (root_children <= 0) { - *ctx = pool->add(VExprContext::create_unique(root).release()); return Status::OK(); } // non-recursive traversal - std::stack> s; + std::stack> s; s.push({root, root_children}); while (!s.empty()) { auto& parent = s.top(); @@ -257,8 +254,8 @@ Status VExpr::create_tree_from_thrift(ObjectPool* pool, const std::vector= nodes.size()) { return Status::InternalError("Failed to reconstruct expression tree from thrift."); } - VExpr* expr = nullptr; - RETURN_IF_ERROR(create_expr(pool, nodes[*node_idx], &expr)); + VExprSPtr expr; + RETURN_IF_ERROR(create_expr(nodes[*node_idx], expr)); DCHECK(expr != nullptr); parent.first->add_child(expr); int num_children = nodes[*node_idx].num_children; @@ -266,18 +263,17 @@ Status VExpr::create_tree_from_thrift(ObjectPool* pool, const std::vectoradd(VExprContext::create_unique(root).release()); return Status::OK(); } -Status VExpr::create_expr_tree(ObjectPool* pool, const TExpr& texpr, VExprContext** ctx) { +Status VExpr::create_expr_tree(const doris::TExpr& texpr, VExprContextSPtr& ctx) { if (texpr.nodes.size() == 0) { - *ctx = nullptr; + ctx = nullptr; return Status::OK(); } int node_idx = 0; - VExpr* e = nullptr; - Status status = create_tree_from_thrift(pool, texpr.nodes, &node_idx, &e, ctx); + VExprSPtr e; + Status status = create_tree_from_thrift(texpr.nodes, &node_idx, e, ctx); if (status.ok() && node_idx + 1 != texpr.nodes.size()) { status = Status::InternalError( "Expression tree only partially reconstructed. Not all thrift nodes were " @@ -291,18 +287,17 @@ Status VExpr::create_expr_tree(ObjectPool* pool, const TExpr& texpr, VExprContex return status; } -Status VExpr::create_expr_trees(ObjectPool* pool, const std::vector& texprs, - std::vector* ctxs) { - ctxs->clear(); +Status VExpr::create_expr_trees(const std::vector& texprs, VExprContextSPtrs& ctxs) { + ctxs.clear(); for (int i = 0; i < texprs.size(); ++i) { - VExprContext* ctx = nullptr; - RETURN_IF_ERROR(create_expr_tree(pool, texprs[i], &ctx)); - ctxs->push_back(ctx); + VExprContextSPtr ctx; + RETURN_IF_ERROR(create_expr_tree(texprs[i], ctx)); + ctxs.push_back(ctx); } return Status::OK(); } -Status VExpr::prepare(const std::vector& ctxs, RuntimeState* state, +Status VExpr::prepare(const VExprContextSPtrs& ctxs, RuntimeState* state, const RowDescriptor& row_desc) { for (auto ctx : ctxs) { RETURN_IF_ERROR(ctx->prepare(state, row_desc)); @@ -310,33 +305,32 @@ Status VExpr::prepare(const std::vector& ctxs, RuntimeState* stat return Status::OK(); } -void VExpr::close(const std::vector& ctxs, RuntimeState* state) { +void VExpr::close(const VExprContextSPtrs& ctxs, RuntimeState* state) { for (auto ctx : ctxs) { ctx->close(state); } } -Status VExpr::open(const std::vector& ctxs, RuntimeState* state) { +Status VExpr::open(const VExprContextSPtrs& ctxs, RuntimeState* state) { for (int i = 0; i < ctxs.size(); ++i) { RETURN_IF_ERROR(ctxs[i]->open(state)); } return Status::OK(); } -Status VExpr::clone_if_not_exists(const std::vector& ctxs, RuntimeState* state, - std::vector* new_ctxs) { - DCHECK(new_ctxs != nullptr); - if (!new_ctxs->empty()) { +Status VExpr::clone_if_not_exists(const VExprContextSPtrs& ctxs, RuntimeState* state, + VExprContextSPtrs& new_ctxs) { + if (!new_ctxs.empty()) { // 'ctxs' was already cloned into '*new_ctxs', nothing to do. - DCHECK_EQ(new_ctxs->size(), ctxs.size()); - for (int i = 0; i < new_ctxs->size(); ++i) { - DCHECK((*new_ctxs)[i]->_is_clone); + DCHECK_EQ(new_ctxs.size(), ctxs.size()); + for (int i = 0; i < new_ctxs.size(); ++i) { + DCHECK(new_ctxs[i]->_is_clone); } return Status::OK(); } - new_ctxs->resize(ctxs.size()); + new_ctxs.resize(ctxs.size()); for (int i = 0; i < ctxs.size(); ++i) { - RETURN_IF_ERROR(ctxs[i]->clone(state, &(*new_ctxs)[i])); + RETURN_IF_ERROR(ctxs[i]->clone(state, new_ctxs[i])); } return Status::OK(); } @@ -353,7 +347,7 @@ std::string VExpr::debug_string() const { return out.str(); } -std::string VExpr::debug_string(const std::vector& exprs) { +std::string VExpr::debug_string(const VExprSPtrs& exprs) { std::stringstream out; out << "["; @@ -365,8 +359,8 @@ std::string VExpr::debug_string(const std::vector& exprs) { return out.str(); } -std::string VExpr::debug_string(const std::vector& ctxs) { - std::vector exprs; +std::string VExpr::debug_string(const VExprContextSPtrs& ctxs) { + VExprSPtrs exprs; for (int i = 0; i < ctxs.size(); ++i) { exprs.push_back(ctxs[i]->root()); } diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index 2eb2e78a32..072ba48af3 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -39,6 +39,7 @@ #include "vec/core/block.h" #include "vec/core/column_with_type_and_name.h" #include "vec/data_types/data_type.h" +#include "vec/exprs/vexpr_fwd.h" #include "vec/functions/function.h" namespace doris { @@ -50,7 +51,6 @@ class RowDescriptor; class RuntimeState; namespace vectorized { -class VExprContext; #define RETURN_IF_ERROR_OR_PREPARED(stmt) \ if (_prepared) { \ @@ -81,7 +81,7 @@ public: VExpr() = default; virtual ~VExpr() = default; - virtual VExpr* clone(ObjectPool* pool) const = 0; + virtual VExprSPtr clone() const = 0; virtual const std::string& expr_name() const = 0; @@ -124,38 +124,38 @@ public: TExprOpcode::type op() const { return _opcode; } - void add_child(VExpr* expr) { _children.push_back(expr); } - VExpr* get_child(int i) const { return _children[i]; } + void add_child(const VExprSPtr& expr) { _children.push_back(expr); } + VExprSPtr get_child(int i) const { return _children[i]; } int get_num_children() const { return _children.size(); } - static Status create_expr_tree(ObjectPool* pool, const TExpr& texpr, VExprContext** ctx); + static Status create_expr_tree(const TExpr& texpr, VExprContextSPtr& ctx); - static Status create_expr_trees(ObjectPool* pool, const std::vector& texprs, - std::vector* ctxs); + static Status create_expr_trees(const std::vector& texprs, VExprContextSPtrs& ctxs); - static Status prepare(const std::vector& ctxs, RuntimeState* state, + static Status prepare(const VExprContextSPtrs& ctxs, RuntimeState* state, const RowDescriptor& row_desc); - static Status open(const std::vector& ctxs, RuntimeState* state); + static Status open(const VExprContextSPtrs& ctxs, RuntimeState* state); - static Status clone_if_not_exists(const std::vector& ctxs, RuntimeState* state, - std::vector* new_ctxs); + static Status clone_if_not_exists(const VExprContextSPtrs& ctxs, RuntimeState* state, + VExprContextSPtrs& new_ctxs); - static void close(const std::vector& ctxs, RuntimeState* state); + static void close(const VExprContextSPtrs& ctxs, RuntimeState* state); bool is_nullable() const { return _data_type->is_nullable(); } PrimitiveType result_type() const { return _type.type; } - static Status create_expr(ObjectPool* pool, const TExprNode& texpr_node, VExpr** expr); + static Status create_expr(const TExprNode& expr_node, VExprSPtr& expr); - static Status create_tree_from_thrift(ObjectPool* pool, const std::vector& nodes, - int* node_idx, VExpr** root_expr, VExprContext** ctx); - virtual const std::vector& children() const { return _children; } - void set_children(std::vector children) { _children = children; } + static Status create_tree_from_thrift(const std::vector& nodes, int* node_idx, + VExprSPtr& root_expr, VExprContextSPtr& ctx); + virtual const VExprSPtrs& children() const { return _children; } + void set_children(const VExprSPtrs& children) { _children = children; } + void set_children(VExprSPtrs&& children) { _children = std::move(children); } virtual std::string debug_string() const; - static std::string debug_string(const std::vector& exprs); - static std::string debug_string(const std::vector& ctxs); + static std::string debug_string(const VExprSPtrs& exprs); + static std::string debug_string(const VExprContextSPtrs& ctxs); bool is_and_expr() const { return _fn.name.function_name == "and"; } @@ -176,15 +176,15 @@ public: int fn_context_index() const { return _fn_context_index; } - static const VExpr* expr_without_cast(const VExpr* expr) { - if (expr->node_type() == TExprNodeType::CAST_EXPR) { + static const VExprSPtr expr_without_cast(const VExprSPtr& expr) { + if (expr->node_type() == doris::TExprNodeType::CAST_EXPR) { return expr_without_cast(expr->_children[0]); } return expr; } // If this expr is a RuntimeFilterWrapper, this method will return an underlying rf expression - virtual const VExpr* get_impl() const { return nullptr; } + virtual const VExprSPtr get_impl() const { return {}; } // If this expr is a BloomPredicate, this method will return a BloomFilterFunc virtual std::shared_ptr get_bloom_filter_func() const { @@ -233,7 +233,7 @@ protected: TExprOpcode::type _opcode; TypeDescriptor _type; DataTypePtr _data_type; - std::vector _children; + VExprSPtrs _children; TFunction _fn; /// Index to pass to ExprContext::fn_context() to retrieve this expr's FunctionContext. @@ -247,8 +247,5 @@ protected: bool _prepared; }; -using VExprSPtr = std::shared_ptr; -using VExprUPtr = std::unique_ptr; - } // namespace vectorized } // namespace doris diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp index 9844989d88..98af614797 100644 --- a/be/src/vec/exprs/vexpr_context.cpp +++ b/be/src/vec/exprs/vexpr_context.cpp @@ -40,7 +40,7 @@ class RowDescriptor; } // namespace doris namespace doris::vectorized { -VExprContext::VExprContext(VExpr* expr) +VExprContext::VExprContext(const VExprSPtr& expr) : _root(expr), _is_clone(false), _prepared(false), @@ -92,21 +92,21 @@ void VExprContext::close(doris::RuntimeState* state) { _closed = true; } -doris::Status VExprContext::clone(RuntimeState* state, VExprContext** new_ctx) { +doris::Status VExprContext::clone(RuntimeState* state, VExprContextSPtr& new_ctx) { DCHECK(_prepared) << "expr context not prepared"; DCHECK(_opened); - DCHECK(*new_ctx == nullptr); + DCHECK(new_ctx.get() == nullptr); - *new_ctx = state->obj_pool()->add(VExprContext::create_unique(_root).release()); + new_ctx = std::make_shared(_root); for (auto& _fn_context : _fn_contexts) { - (*new_ctx)->_fn_contexts.push_back(_fn_context->clone()); + new_ctx->_fn_contexts.push_back(_fn_context->clone()); } - (*new_ctx)->_is_clone = true; - (*new_ctx)->_prepared = true; - (*new_ctx)->_opened = true; + new_ctx->_is_clone = true; + new_ctx->_prepared = true; + new_ctx->_opened = true; - return _root->open(state, *new_ctx, FunctionContext::THREAD_LOCAL); + return _root->open(state, new_ctx.get(), FunctionContext::THREAD_LOCAL); } void VExprContext::clone_fn_contexts(VExprContext* other) { @@ -132,14 +132,34 @@ Status VExprContext::filter_block(VExprContext* vexpr_ctx, Block* block, int col return Block::filter_block(block, result_column_id, column_to_keep); } -// TODO Performance Optimization -Status VExprContext::execute_conjuncts(const std::vector& ctxs, +Status VExprContext::filter_block(const VExprContextSPtrs& expr_contexts, Block* block, + int column_to_keep) { + if (expr_contexts.empty() || block->rows() == 0) { + return Status::OK(); + } + + std::vector columns_to_filter(column_to_keep); + std::iota(columns_to_filter.begin(), columns_to_filter.end(), 0); + + return execute_conjuncts_and_filter_block(expr_contexts, nullptr, block, columns_to_filter, + column_to_keep); +} + +Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs, const std::vector* filters, Block* block, IColumn::Filter* result_filter, bool* can_filter_all) { + return execute_conjuncts(ctxs, filters, false, block, result_filter, can_filter_all); +} + +// TODO Performance Optimization +Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs, + const std::vector* filters, + const bool accept_null, Block* block, + IColumn::Filter* result_filter, bool* can_filter_all) { DCHECK(result_filter->size() == block->rows()); *can_filter_all = false; auto* __restrict result_filter_data = result_filter->data(); - for (auto* ctx : ctxs) { + for (auto& ctx : ctxs) { int result_column_id = -1; RETURN_IF_ERROR(ctx->execute(block, &result_column_id)); ColumnPtr& filter_column = block->get_by_position(result_column_id).column; @@ -156,9 +176,16 @@ Status VExprContext::execute_conjuncts(const std::vector& ctxs, const size_t size = filter.size(); auto* __restrict null_map_data = nullable_column->get_null_map_data().data(); - for (size_t i = 0; i < size; ++i) { - result_filter_data[i] &= (!null_map_data[i]) & filter_data[i]; + if (accept_null) { + for (size_t i = 0; i < size; ++i) { + result_filter_data[i] &= (null_map_data[i]) || filter_data[i]; + } + } else { + for (size_t i = 0; i < size; ++i) { + result_filter_data[i] &= (!null_map_data[i]) & filter_data[i]; + } } + if (memchr(result_filter_data, 0x1, size) == nullptr) { *can_filter_all = true; return Status::OK(); @@ -201,11 +228,12 @@ Status VExprContext::execute_conjuncts(const std::vector& ctxs, // TODO Performance Optimization // need exception safety Status VExprContext::execute_conjuncts_and_filter_block( - const std::vector& ctxs, const std::vector* filters, - Block* block, std::vector& columns_to_filter, int column_to_keep) { + const VExprContextSPtrs& ctxs, const std::vector* filters, Block* block, + std::vector& columns_to_filter, int column_to_keep) { IColumn::Filter result_filter(block->rows(), 1); bool can_filter_all; - RETURN_IF_ERROR(execute_conjuncts(ctxs, filters, block, &result_filter, &can_filter_all)); + RETURN_IF_ERROR( + execute_conjuncts(ctxs, filters, false, block, &result_filter, &can_filter_all)); if (can_filter_all) { for (auto& col : columns_to_filter) { std::move(*block->get_by_position(col).column).assume_mutable()->clear(); @@ -218,12 +246,30 @@ Status VExprContext::execute_conjuncts_and_filter_block( return Status::OK(); } +Status VExprContext::execute_conjuncts_and_filter_block(const VExprContextSPtrs& ctxs, Block* block, + std::vector& columns_to_filter, + int column_to_keep, + IColumn::Filter& filter) { + filter.resize_fill(block->rows(), 1); + bool can_filter_all; + RETURN_IF_ERROR(execute_conjuncts(ctxs, nullptr, false, block, &filter, &can_filter_all)); + if (can_filter_all) { + for (auto& col : columns_to_filter) { + std::move(*block->get_by_position(col).column).assume_mutable()->clear(); + } + } else { + RETURN_IF_CATCH_EXCEPTION(Block::filter_block_internal(block, columns_to_filter, filter)); + } + + Block::erase_useless_column(block, column_to_keep); + return Status::OK(); +} + Status VExprContext::get_output_block_after_execute_exprs( - const std::vector& output_vexpr_ctxs, const Block& input_block, - Block* output_block) { + const VExprContextSPtrs& output_vexpr_ctxs, const Block& input_block, Block* output_block) { vectorized::Block tmp_block(input_block.get_columns_with_type_and_name()); vectorized::ColumnsWithTypeAndName result_columns; - for (auto vexpr_ctx : output_vexpr_ctxs) { + for (auto& vexpr_ctx : output_vexpr_ctxs) { int result_column_id = -1; RETURN_IF_ERROR(vexpr_ctx->execute(&tmp_block, &result_column_id)); DCHECK(result_column_id != -1); diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h index e00d34559b..3332e6f816 100644 --- a/be/src/vec/exprs/vexpr_context.h +++ b/be/src/vec/exprs/vexpr_context.h @@ -27,6 +27,7 @@ #include "runtime/types.h" #include "udf/udf.h" #include "vec/core/block.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class RowDescriptor; @@ -34,22 +35,21 @@ class RuntimeState; } // namespace doris namespace doris::vectorized { -class VExpr; class VExprContext { ENABLE_FACTORY_CREATOR(VExprContext); public: - VExprContext(VExpr* expr); + VExprContext(const VExprSPtr& expr); ~VExprContext(); [[nodiscard]] Status prepare(RuntimeState* state, const RowDescriptor& row_desc); [[nodiscard]] Status open(RuntimeState* state); void close(RuntimeState* state); - [[nodiscard]] Status clone(RuntimeState* state, VExprContext** new_ctx); + [[nodiscard]] Status clone(RuntimeState* state, VExprContextSPtr& new_ctx); [[nodiscard]] Status execute(Block* block, int* result_column_id); - VExpr* root() { return _root; } - void set_root(VExpr* expr) { _root = expr; } + VExprSPtr root() { return _root; } + void set_root(const VExprSPtr& expr) { _root = expr; } /// Creates a FunctionContext, and returns the index that's passed to fn_context() to /// retrieve the created context. Exprs that need a FunctionContext should call this in @@ -68,16 +68,30 @@ public: [[nodiscard]] static Status filter_block(VExprContext* vexpr_ctx, Block* block, int column_to_keep); - [[nodiscard]] static Status execute_conjuncts(const std::vector& ctxs, + + [[nodiscard]] static Status filter_block(const VExprContextSPtrs& expr_contexts, Block* block, + int column_to_keep); + + [[nodiscard]] static Status execute_conjuncts(const VExprContextSPtrs& ctxs, const std::vector* filters, - Block* block, IColumn::Filter* result_filter, + const bool accept_null, Block* block, + IColumn::Filter* result_filter, bool* can_filter_all); + + static Status execute_conjuncts(const VExprContextSPtrs& ctxs, + const std::vector* filters, Block* block, + IColumn::Filter* result_filter, bool* can_filter_all); + [[nodiscard]] static Status execute_conjuncts_and_filter_block( - const std::vector& ctxs, const std::vector* filters, + const VExprContextSPtrs& ctxs, const std::vector* filters, Block* block, std::vector& columns_to_filter, int column_to_keep); - [[nodiscard]] static Status get_output_block_after_execute_exprs( - const std::vector&, const Block&, Block*); + static Status execute_conjuncts_and_filter_block(const VExprContextSPtrs& ctxs, Block* block, + std::vector& columns_to_filter, + int column_to_keep, IColumn::Filter& filter); + + [[nodiscard]] static Status get_output_block_after_execute_exprs(const VExprContextSPtrs&, + const Block&, Block*); int get_last_result_column_id() const { DCHECK(_last_result_column_id != -1); @@ -94,11 +108,44 @@ public: void set_force_materialize_slot() { _force_materialize_slot = true; } + VExprContext& operator=(const VExprContext& other) { + if (this == &other) { + return *this; + } + + _root = other._root; + _is_clone = other._is_clone; + _prepared = other._prepared; + _opened = other._opened; + _closed = other._closed; + + for (auto& fn : other._fn_contexts) { + _fn_contexts.emplace_back(fn->clone()); + } + + _last_result_column_id = other._last_result_column_id; + _depth_num = other._depth_num; + return *this; + } + + VExprContext& operator=(VExprContext&& other) { + _root = other._root; + other._root = nullptr; + _is_clone = other._is_clone; + _prepared = other._prepared; + _opened = other._opened; + _closed = other._closed; + _fn_contexts = std::move(other._fn_contexts); + _last_result_column_id = other._last_result_column_id; + _depth_num = other._depth_num; + return *this; + } + private: friend class VExpr; /// The expr tree this context is for. - VExpr* _root; + VExprSPtr _root; /// True if this context came from a Clone() call. Used to manage FunctionStateScope. bool _is_clone; diff --git a/be/src/vec/exprs/vexpr_fwd.h b/be/src/vec/exprs/vexpr_fwd.h new file mode 100644 index 0000000000..5f0f287509 --- /dev/null +++ b/be/src/vec/exprs/vexpr_fwd.h @@ -0,0 +1,33 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +namespace doris::vectorized { +class VExpr; +class VExprContext; + +using VExprSPtr = std::shared_ptr; +using VExprContextSPtr = std::shared_ptr; + +using VExprSPtrs = std::vector; +using VExprContextSPtrs = std::vector; + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/exprs/vin_predicate.h b/be/src/vec/exprs/vin_predicate.h index 925f7b4ce0..4d047de6a2 100644 --- a/be/src/vec/exprs/vin_predicate.h +++ b/be/src/vec/exprs/vin_predicate.h @@ -50,9 +50,7 @@ public: FunctionContext::FunctionStateScope scope) override; void close(doris::RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VInPredicate::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VInPredicate::create_shared(*this); } const std::string& expr_name() const override; std::string debug_string() const override; diff --git a/be/src/vec/exprs/vinfo_func.h b/be/src/vec/exprs/vinfo_func.h index 6b2d9e90b9..54498272cf 100644 --- a/be/src/vec/exprs/vinfo_func.h +++ b/be/src/vec/exprs/vinfo_func.h @@ -38,9 +38,7 @@ public: VInfoFunc(const TExprNode& node); virtual ~VInfoFunc() {} - virtual VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VInfoFunc::create_unique(*this).release()); - } + virtual VExprSPtr clone() const override { return VInfoFunc::create_shared(*this); } virtual const std::string& expr_name() const override { return _expr_name; } virtual Status execute(VExprContext* context, vectorized::Block* block, int* result_column_id) override; diff --git a/be/src/vec/exprs/vlambda_function_call_expr.h b/be/src/vec/exprs/vlambda_function_call_expr.h index 302b2e8827..1a1ca7df7b 100644 --- a/be/src/vec/exprs/vlambda_function_call_expr.h +++ b/be/src/vec/exprs/vlambda_function_call_expr.h @@ -34,9 +34,7 @@ public: VLambdaFunctionCallExpr(const TExprNode& node) : VExpr(node) {} ~VLambdaFunctionCallExpr() override = default; - VExpr* clone(ObjectPool* pool) const override { - return pool->add(VLambdaFunctionCallExpr::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VLambdaFunctionCallExpr::create_shared(*this); } doris::Status prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc, VExprContext* context) override { @@ -69,7 +67,7 @@ public: out << _expr_name; out << "]{"; bool first = true; - for (VExpr* input_expr : children()) { + for (auto& input_expr : children()) { if (first) { first = false; } else { diff --git a/be/src/vec/exprs/vlambda_function_expr.h b/be/src/vec/exprs/vlambda_function_expr.h index 490d17d01b..8a2e3275d7 100644 --- a/be/src/vec/exprs/vlambda_function_expr.h +++ b/be/src/vec/exprs/vlambda_function_expr.h @@ -33,9 +33,7 @@ public: return get_child(0)->execute(context, block, result_column_id); } - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VLambdaFunctionExpr::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VLambdaFunctionExpr::create_shared(*this); } const std::string& expr_name() const override { return _expr_name; } diff --git a/be/src/vec/exprs/vliteral.h b/be/src/vec/exprs/vliteral.h index 79d3b6d8da..e3ea84121a 100644 --- a/be/src/vec/exprs/vliteral.h +++ b/be/src/vec/exprs/vliteral.h @@ -44,9 +44,7 @@ public: } Status execute(VExprContext* context, vectorized::Block* block, int* result_column_id) override; const std::string& expr_name() const override { return _expr_name; } - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VLiteral::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VLiteral::create_shared(*this); } std::string debug_string() const override; std::string value() const; diff --git a/be/src/vec/exprs/vmap_literal.cpp b/be/src/vec/exprs/vmap_literal.cpp index 2beae8cf32..e75b5dc06c 100644 --- a/be/src/vec/exprs/vmap_literal.cpp +++ b/be/src/vec/exprs/vmap_literal.cpp @@ -52,10 +52,10 @@ Status VMapLiteral::prepare(RuntimeState* state, const RowDescriptor& row_desc, // each child is slot with key1, value1, key2, value2... for (int idx = 0; idx < _children.size() && idx + 1 < _children.size(); idx += 2) { Field kf, vf; - auto key_literal = dynamic_cast(_children[idx]); + auto key_literal = std::dynamic_pointer_cast(_children[idx]); key_literal->get_column_ptr()->get(0, kf); - auto val_literal = - dynamic_cast(VExpr::expr_without_cast(_children[idx + 1])); + auto val_literal = std::dynamic_pointer_cast( + VExpr::expr_without_cast(_children[idx + 1])); val_literal->get_column_ptr()->get(0, vf); keys.get().push_back(kf); diff --git a/be/src/vec/exprs/vmap_literal.h b/be/src/vec/exprs/vmap_literal.h index c107fe3e20..808516aaa0 100644 --- a/be/src/vec/exprs/vmap_literal.h +++ b/be/src/vec/exprs/vmap_literal.h @@ -25,7 +25,6 @@ class RuntimeState; class TExprNode; namespace vectorized { -class VExprContext; class VMapLiteral : public VLiteral { ENABLE_FACTORY_CREATOR(VMapLiteral); diff --git a/be/src/vec/exprs/vruntimefilter_wrapper.cpp b/be/src/vec/exprs/vruntimefilter_wrapper.cpp index 554a15231f..1383d98c60 100644 --- a/be/src/vec/exprs/vruntimefilter_wrapper.cpp +++ b/be/src/vec/exprs/vruntimefilter_wrapper.cpp @@ -44,7 +44,7 @@ class VExprContext; namespace doris::vectorized { -VRuntimeFilterWrapper::VRuntimeFilterWrapper(const TExprNode& node, VExpr* impl) +VRuntimeFilterWrapper::VRuntimeFilterWrapper(const TExprNode& node, const VExprSPtr& impl) : VExpr(node), _impl(impl), _always_true(false), _filtered_rows(0), _scan_rows(0) {} VRuntimeFilterWrapper::VRuntimeFilterWrapper(const VRuntimeFilterWrapper& vexpr) diff --git a/be/src/vec/exprs/vruntimefilter_wrapper.h b/be/src/vec/exprs/vruntimefilter_wrapper.h index 7e31513824..259484bd78 100644 --- a/be/src/vec/exprs/vruntimefilter_wrapper.h +++ b/be/src/vec/exprs/vruntimefilter_wrapper.h @@ -44,7 +44,7 @@ class VRuntimeFilterWrapper final : public VExpr { ENABLE_FACTORY_CREATOR(VRuntimeFilterWrapper); public: - VRuntimeFilterWrapper(const TExprNode& node, VExpr* impl); + VRuntimeFilterWrapper(const TExprNode& node, const VExprSPtr& impl); VRuntimeFilterWrapper(const VRuntimeFilterWrapper& vexpr); ~VRuntimeFilterWrapper() override = default; doris::Status execute(VExprContext* context, doris::vectorized::Block* block, @@ -57,13 +57,11 @@ public: bool is_constant() const override; void close(doris::RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VRuntimeFilterWrapper::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VRuntimeFilterWrapper::create_shared(*this); } const std::string& expr_name() const override; - const std::vector& children() const override { return _impl->children(); } + const VExprSPtrs& children() const override { return _impl->children(); } - const VExpr* get_impl() const override { return _impl; } + const VExprSPtr get_impl() const override { return _impl; } // if filter rate less than this, bloom filter will set always true constexpr static double EXPECTED_FILTER_RATE = 0.4; @@ -80,7 +78,7 @@ public: } private: - VExpr* _impl; + VExprSPtr _impl; bool _always_true; /// TODO: statistic filter rate in the profile diff --git a/be/src/vec/exprs/vschema_change_expr.h b/be/src/vec/exprs/vschema_change_expr.h index fb0dae796c..9dd43a0ae2 100644 --- a/be/src/vec/exprs/vschema_change_expr.h +++ b/be/src/vec/exprs/vschema_change_expr.h @@ -32,7 +32,6 @@ class RuntimeState; namespace vectorized { class Block; -class VExprContext; } // namespace vectorized } // namespace doris @@ -56,9 +55,7 @@ public: FunctionContext::FunctionStateScope scope) override; void close(doris::RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VSchemaChangeExpr::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VSchemaChangeExpr::create_shared(*this); } const std::string& expr_name() const override; std::string debug_string() const override; diff --git a/be/src/vec/exprs/vslot_ref.h b/be/src/vec/exprs/vslot_ref.h index 6db7238597..1f46c44ac8 100644 --- a/be/src/vec/exprs/vslot_ref.h +++ b/be/src/vec/exprs/vslot_ref.h @@ -42,9 +42,7 @@ public: int* result_column_id) override; virtual doris::Status prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc, VExprContext* context) override; - virtual VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VSlotRef::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VSlotRef::create_shared(*this); } virtual const std::string& expr_name() const override; virtual std::string debug_string() const override; diff --git a/be/src/vec/exprs/vstruct_literal.cpp b/be/src/vec/exprs/vstruct_literal.cpp index 295d366b37..9b4e10f720 100644 --- a/be/src/vec/exprs/vstruct_literal.cpp +++ b/be/src/vec/exprs/vstruct_literal.cpp @@ -39,9 +39,9 @@ Status VStructLiteral::prepare(RuntimeState* state, const RowDescriptor& row_des VExprContext* context) { RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, row_desc, context)); Field struct_field = Tuple(); - for (const auto child : _children) { + for (const auto& child : _children) { Field item; - auto child_literal = dynamic_cast(child); + auto child_literal = std::dynamic_pointer_cast(child); child_literal->get_column_ptr()->get(0, item); struct_field.get().push_back(item); } diff --git a/be/src/vec/exprs/vtuple_is_null_predicate.h b/be/src/vec/exprs/vtuple_is_null_predicate.h index d927165ef6..a27eb71cff 100644 --- a/be/src/vec/exprs/vtuple_is_null_predicate.h +++ b/be/src/vec/exprs/vtuple_is_null_predicate.h @@ -47,9 +47,7 @@ public: doris::Status prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc, VExprContext* context) override; - VExpr* clone(doris::ObjectPool* pool) const override { - return pool->add(VTupleIsNullPredicate::create_unique(*this).release()); - } + VExprSPtr clone() const override { return VTupleIsNullPredicate::create_shared(*this); } [[nodiscard]] bool is_constant() const override { return false; } diff --git a/be/src/vec/olap/vcollect_iterator.cpp b/be/src/vec/olap/vcollect_iterator.cpp index 113307bbc0..3fda650024 100644 --- a/be/src/vec/olap/vcollect_iterator.cpp +++ b/be/src/vec/olap/vcollect_iterator.cpp @@ -297,15 +297,15 @@ Status VCollectIterator::_topn_next(Block* block) { bool eof = false; while (read_rows < _topn_limit && !eof) { block->clear_column_data(); - auto res = rs_reader->next_block(block); - if (!res.ok()) { - if (res.is()) { + auto status = rs_reader->next_block(block); + if (!status.ok()) { + if (status.is()) { eof = true; if (block->rows() == 0) { break; } } else { - return res; + return status; } } @@ -313,8 +313,7 @@ Status VCollectIterator::_topn_next(Block* block) { // filter block RETURN_IF_ERROR(VExprContext::filter_block( - *(_reader->_reader_context.filter_block_vconjunct_ctx_ptr), block, - block->columns())); + _reader->_reader_context.filter_block_conjuncts, block, block->columns())); // update read rows read_rows += block->rows(); diff --git a/be/src/vec/runtime/vdata_stream_recvr.cpp b/be/src/vec/runtime/vdata_stream_recvr.cpp index ebee26783a..a59ac3c7fc 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.cpp +++ b/be/src/vec/runtime/vdata_stream_recvr.cpp @@ -338,7 +338,7 @@ VDataStreamRecvr::~VDataStreamRecvr() { DCHECK(_mgr == nullptr) << "Must call close()"; } -Status VDataStreamRecvr::create_merger(const std::vector& ordering_expr, +Status VDataStreamRecvr::create_merger(const VExprContextSPtrs& ordering_expr, const std::vector& is_asc_order, const std::vector& nulls_first, size_t batch_size, int64_t limit, size_t offset) { diff --git a/be/src/vec/runtime/vdata_stream_recvr.h b/be/src/vec/runtime/vdata_stream_recvr.h index 7478dc1eb9..c2374d23a7 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.h +++ b/be/src/vec/runtime/vdata_stream_recvr.h @@ -48,6 +48,7 @@ #include "vec/core/block.h" #include "vec/core/column_with_type_and_name.h" #include "vec/core/materialize_block.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class MemTracker; @@ -59,7 +60,6 @@ class RuntimeState; namespace vectorized { class VDataStreamMgr; class VSortedRunMerger; -class VExprContext; class VDataStreamRecvr { public: @@ -70,7 +70,7 @@ public: virtual ~VDataStreamRecvr(); - Status create_merger(const std::vector& ordering_expr, + Status create_merger(const VExprContextSPtrs& ordering_expr, const std::vector& is_asc_order, const std::vector& nulls_first, size_t batch_size, int64_t limit, size_t offset); diff --git a/be/src/vec/runtime/vfile_result_writer.cpp b/be/src/vec/runtime/vfile_result_writer.cpp index 0c1071c041..ed408e5f7a 100644 --- a/be/src/vec/runtime/vfile_result_writer.cpp +++ b/be/src/vec/runtime/vfile_result_writer.cpp @@ -72,12 +72,13 @@ namespace doris::vectorized { const size_t VFileResultWriter::OUTSTREAM_BUFFER_SIZE_BYTES = 1024 * 1024; using doris::operator<<; -VFileResultWriter::VFileResultWriter( - const ResultFileOptions* file_opts, const TStorageBackendType::type storage_type, - const TUniqueId fragment_instance_id, - const std::vector& output_vexpr_ctxs, - RuntimeProfile* parent_profile, BufferControlBlock* sinker, Block* output_block, - bool output_object_data, const RowDescriptor& output_row_descriptor) +VFileResultWriter::VFileResultWriter(const ResultFileOptions* file_opts, + const TStorageBackendType::type storage_type, + const TUniqueId fragment_instance_id, + const VExprContextSPtrs& output_vexpr_ctxs, + RuntimeProfile* parent_profile, BufferControlBlock* sinker, + Block* output_block, bool output_object_data, + const RowDescriptor& output_row_descriptor) : _file_opts(file_opts), _storage_type(storage_type), _fragment_instance_id(fragment_instance_id), diff --git a/be/src/vec/runtime/vfile_result_writer.h b/be/src/vec/runtime/vfile_result_writer.h index d01ad5bf74..1b9d58144f 100644 --- a/be/src/vec/runtime/vfile_result_writer.h +++ b/be/src/vec/runtime/vfile_result_writer.h @@ -52,9 +52,8 @@ public: VFileResultWriter(const ResultFileOptions* file_option, const TStorageBackendType::type storage_type, const TUniqueId fragment_instance_id, - const std::vector& _output_vexpr_ctxs, - RuntimeProfile* parent_profile, BufferControlBlock* sinker, - Block* output_block, bool output_object_data, + const VExprContextSPtrs& _output_vexpr_ctxs, RuntimeProfile* parent_profile, + BufferControlBlock* sinker, Block* output_block, bool output_object_data, const RowDescriptor& output_row_descriptor); virtual ~VFileResultWriter() = default; @@ -101,7 +100,7 @@ private: const ResultFileOptions* _file_opts; TStorageBackendType::type _storage_type; TUniqueId _fragment_instance_id; - const std::vector& _output_vexpr_ctxs; + const VExprContextSPtrs& _output_vexpr_ctxs; // If the result file format is plain text, like CSV, this _file_writer is owned by this FileResultWriter. // If the result file format is Parquet, this _file_writer is owned by _parquet_writer. diff --git a/be/src/vec/runtime/vorc_writer.cpp b/be/src/vec/runtime/vorc_writer.cpp index f3804a6b62..47fc9242f8 100644 --- a/be/src/vec/runtime/vorc_writer.cpp +++ b/be/src/vec/runtime/vorc_writer.cpp @@ -85,7 +85,7 @@ void VOrcOutputStream::set_written_len(int64_t written_len) { } VOrcWriterWrapper::VOrcWriterWrapper(doris::io::FileWriter* file_writer, - const std::vector& output_vexpr_ctxs, + const VExprContextSPtrs& output_vexpr_ctxs, const std::string& schema, bool output_object_data) : VFileWriterWrapper(output_vexpr_ctxs, output_object_data), _file_writer(file_writer), diff --git a/be/src/vec/runtime/vorc_writer.h b/be/src/vec/runtime/vorc_writer.h index cc66c65a6c..3a7b6c205f 100644 --- a/be/src/vec/runtime/vorc_writer.h +++ b/be/src/vec/runtime/vorc_writer.h @@ -75,8 +75,8 @@ private: class VOrcWriterWrapper final : public VFileWriterWrapper { public: VOrcWriterWrapper(doris::io::FileWriter* file_writer, - const std::vector& output_vexpr_ctxs, - const std::string& schema, bool output_object_data); + const VExprContextSPtrs& output_vexpr_ctxs, const std::string& schema, + bool output_object_data); ~VOrcWriterWrapper() = default; diff --git a/be/src/vec/runtime/vparquet_writer.cpp b/be/src/vec/runtime/vparquet_writer.cpp index c22f110519..13fd6d8b85 100644 --- a/be/src/vec/runtime/vparquet_writer.cpp +++ b/be/src/vec/runtime/vparquet_writer.cpp @@ -255,7 +255,7 @@ void ParquetBuildHelper::build_version(parquet::WriterProperties::Builder& build } VParquetWriterWrapper::VParquetWriterWrapper(doris::io::FileWriter* file_writer, - const std::vector& output_vexpr_ctxs, + const VExprContextSPtrs& output_vexpr_ctxs, const std::vector& parquet_schemas, const TParquetCompressionType::type& compression_type, const bool& parquet_disable_dictionary, diff --git a/be/src/vec/runtime/vparquet_writer.h b/be/src/vec/runtime/vparquet_writer.h index 7d28f35cfb..6e07aa0e44 100644 --- a/be/src/vec/runtime/vparquet_writer.h +++ b/be/src/vec/runtime/vparquet_writer.h @@ -31,14 +31,12 @@ #include "common/status.h" #include "vec/core/block.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { namespace io { class FileWriter; } // namespace io -namespace vectorized { -class VExprContext; -} // namespace vectorized } // namespace doris namespace parquet { namespace schema { @@ -93,7 +91,7 @@ public: class VFileWriterWrapper { public: - VFileWriterWrapper(const std::vector& output_vexpr_ctxs, bool output_object_data) + VFileWriterWrapper(const VExprContextSPtrs& output_vexpr_ctxs, bool output_object_data) : _output_vexpr_ctxs(output_vexpr_ctxs), _cur_written_rows(0), _output_object_data(output_object_data) {} @@ -109,7 +107,7 @@ public: virtual int64_t written_len() = 0; protected: - const std::vector& _output_vexpr_ctxs; + const VExprContextSPtrs& _output_vexpr_ctxs; int64_t _cur_written_rows; bool _output_object_data; }; @@ -118,7 +116,7 @@ protected: class VParquetWriterWrapper final : public VFileWriterWrapper { public: VParquetWriterWrapper(doris::io::FileWriter* file_writer, - const std::vector& output_vexpr_ctxs, + const VExprContextSPtrs& output_vexpr_ctxs, const std::vector& parquet_schemas, const TParquetCompressionType::type& compression_type, const bool& parquet_disable_dictionary, diff --git a/be/src/vec/runtime/vsorted_run_merger.cpp b/be/src/vec/runtime/vsorted_run_merger.cpp index 0010327d01..be7397ee0a 100644 --- a/be/src/vec/runtime/vsorted_run_merger.cpp +++ b/be/src/vec/runtime/vsorted_run_merger.cpp @@ -35,7 +35,7 @@ using std::vector; namespace doris::vectorized { -VSortedRunMerger::VSortedRunMerger(const std::vector& ordering_expr, +VSortedRunMerger::VSortedRunMerger(const VExprContextSPtrs& ordering_expr, const std::vector& is_asc_order, const std::vector& nulls_first, const size_t batch_size, int64_t limit, size_t offset, RuntimeProfile* profile) diff --git a/be/src/vec/runtime/vsorted_run_merger.h b/be/src/vec/runtime/vsorted_run_merger.h index e48552eaba..1f6f566526 100644 --- a/be/src/vec/runtime/vsorted_run_merger.h +++ b/be/src/vec/runtime/vsorted_run_merger.h @@ -28,11 +28,11 @@ #include "vec/core/block.h" #include "vec/core/sort_cursor.h" #include "vec/core/sort_description.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { namespace vectorized { -class VExprContext; // VSortedRunMerger is used to merge multiple sorted runs of blocks. A run is a sorted // sequence of blocks, which are fetched from a BlockSupplier function object. @@ -45,10 +45,9 @@ public: // Function that returns the next block of rows from an input sorted run. The batch // is owned by the supplier (i.e. not VSortedRunMerger). eos is indicated by an NULL // batch being returned. - VSortedRunMerger(const std::vector& ordering_expr, - const std::vector& _is_asc_order, const std::vector& _nulls_first, - const size_t batch_size, int64_t limit, size_t offset, - RuntimeProfile* profile); + VSortedRunMerger(const VExprContextSPtrs& ordering_expr, const std::vector& _is_asc_order, + const std::vector& _nulls_first, const size_t batch_size, int64_t limit, + size_t offset, RuntimeProfile* profile); VSortedRunMerger(const SortDescription& desc, const size_t batch_size, int64_t limit, size_t offset, RuntimeProfile* profile); @@ -64,7 +63,7 @@ public: Status get_next(Block* output_block, bool* eos); protected: - const std::vector _ordering_expr; + const VExprContextSPtrs _ordering_expr; SortDescription _desc; const std::vector _is_asc_order; const std::vector _nulls_first; diff --git a/be/src/vec/sink/vdata_stream_sender.cpp b/be/src/vec/sink/vdata_stream_sender.cpp index 6b03753c84..5886f82dad 100644 --- a/be/src/vec/sink/vdata_stream_sender.cpp +++ b/be/src/vec/sink/vdata_stream_sender.cpp @@ -431,8 +431,8 @@ Status VDataStreamSender::init(const TDataSink& tsink) { const TDataStreamSink& t_stream_sink = tsink.stream_sink; if (_part_type == TPartitionType::HASH_PARTITIONED || _part_type == TPartitionType::BUCKET_SHFFULE_HASH_PARTITIONED) { - RETURN_IF_ERROR(VExpr::create_expr_trees( - _pool, t_stream_sink.output_partition.partition_exprs, &_partition_expr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(t_stream_sink.output_partition.partition_exprs, + _partition_expr_ctxs)); } else if (_part_type == TPartitionType::RANGE_PARTITIONED) { return Status::InternalError("TPartitionType::RANGE_PARTITIONED should not be used"); } else { diff --git a/be/src/vec/sink/vdata_stream_sender.h b/be/src/vec/sink/vdata_stream_sender.h index 3ce04915b7..86d15782ae 100644 --- a/be/src/vec/sink/vdata_stream_sender.h +++ b/be/src/vec/sink/vdata_stream_sender.h @@ -196,7 +196,7 @@ protected: int _broadcast_pb_block_idx; // compute per-row partition values - std::vector _partition_expr_ctxs; + VExprContextSPtrs _partition_expr_ctxs; std::vector _channels; std::vector> _channel_shared_ptrs; diff --git a/be/src/vec/sink/vmemory_scratch_sink.cpp b/be/src/vec/sink/vmemory_scratch_sink.cpp index 480ad10e13..f54465c0ad 100644 --- a/be/src/vec/sink/vmemory_scratch_sink.cpp +++ b/be/src/vec/sink/vmemory_scratch_sink.cpp @@ -45,15 +45,14 @@ class TMemoryScratchSink; namespace doris::vectorized { MemoryScratchSink::MemoryScratchSink(const RowDescriptor& row_desc, - const std::vector& t_output_expr, - const TMemoryScratchSink& sink, ObjectPool* pool) - : _row_desc(row_desc), _t_output_expr(t_output_expr), _pool(pool) { + const std::vector& t_output_expr) + : _row_desc(row_desc), _t_output_expr(t_output_expr) { _name = "VMemoryScratchSink"; } Status MemoryScratchSink::_prepare_vexpr(RuntimeState* state) { // From the thrift expressions create the real exprs. - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, _t_output_expr, &_output_vexpr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(_t_output_expr, _output_vexpr_ctxs)); // Prepare the exprs to run. RETURN_IF_ERROR(VExpr::prepare(_output_vexpr_ctxs, state, _row_desc)); // generate the arrow schema diff --git a/be/src/vec/sink/vmemory_scratch_sink.h b/be/src/vec/sink/vmemory_scratch_sink.h index 47a9984b8f..b8352fed5d 100644 --- a/be/src/vec/sink/vmemory_scratch_sink.h +++ b/be/src/vec/sink/vmemory_scratch_sink.h @@ -23,6 +23,7 @@ #include "common/status.h" #include "exec/data_sink.h" #include "runtime/result_queue_mgr.h" +#include "vec/exprs/vexpr_fwd.h" namespace arrow { @@ -40,14 +41,12 @@ class TExpr; class TMemoryScratchSink; namespace vectorized { -class VExprContext; class Block; // used to push data to blocking queue class MemoryScratchSink final : public DataSink { public: - MemoryScratchSink(const RowDescriptor& row_desc, const std::vector& t_output_expr, - const TMemoryScratchSink& sink, ObjectPool* pool); + MemoryScratchSink(const RowDescriptor& row_desc, const std::vector& t_output_expr); ~MemoryScratchSink() override = default; @@ -76,9 +75,7 @@ private: // Owned by the RuntimeState. const std::vector& _t_output_expr; - std::vector _output_vexpr_ctxs; - - ObjectPool* _pool; + VExprContextSPtrs _output_vexpr_ctxs; }; } // namespace vectorized } // namespace doris diff --git a/be/src/vec/sink/vmysql_result_writer.cpp b/be/src/vec/sink/vmysql_result_writer.cpp index cc7b39aa5e..b001a10526 100644 --- a/be/src/vec/sink/vmysql_result_writer.cpp +++ b/be/src/vec/sink/vmysql_result_writer.cpp @@ -74,9 +74,9 @@ namespace doris { namespace vectorized { template -VMysqlResultWriter::VMysqlResultWriter( - BufferControlBlock* sinker, const std::vector& output_vexpr_ctxs, - RuntimeProfile* parent_profile) +VMysqlResultWriter::VMysqlResultWriter(BufferControlBlock* sinker, + const VExprContextSPtrs& output_vexpr_ctxs, + RuntimeProfile* parent_profile) : VResultWriter(), _sinker(sinker), _output_vexpr_ctxs(output_vexpr_ctxs), diff --git a/be/src/vec/sink/vmysql_result_writer.h b/be/src/vec/sink/vmysql_result_writer.h index 9626edfcd2..0e0b4d9313 100644 --- a/be/src/vec/sink/vmysql_result_writer.h +++ b/be/src/vec/sink/vmysql_result_writer.h @@ -27,6 +27,7 @@ #include "util/mysql_row_buffer.h" #include "util/runtime_profile.h" #include "vec/data_types/data_type.h" +#include "vec/exprs/vexpr_fwd.h" #include "vec/sink/vresult_writer.h" namespace doris { @@ -34,7 +35,6 @@ class BufferControlBlock; class RuntimeState; namespace vectorized { -class VExprContext; class Block; template @@ -42,8 +42,7 @@ class VMysqlResultWriter final : public VResultWriter { public: using ResultList = std::vector>; - VMysqlResultWriter(BufferControlBlock* sinker, - const std::vector& output_vexpr_ctxs, + VMysqlResultWriter(BufferControlBlock* sinker, const VExprContextSPtrs& output_vexpr_ctxs, RuntimeProfile* parent_profile); Status init(RuntimeState* state) override; @@ -69,7 +68,7 @@ private: BufferControlBlock* _sinker; - const std::vector& _output_vexpr_ctxs; + const VExprContextSPtrs& _output_vexpr_ctxs; RuntimeProfile* _parent_profile; // parent profile from result sink. not owned // total time cost on append batch operation diff --git a/be/src/vec/sink/vmysql_table_writer.cpp b/be/src/vec/sink/vmysql_table_writer.cpp index c4c7e48393..af1f920e4a 100644 --- a/be/src/vec/sink/vmysql_table_writer.cpp +++ b/be/src/vec/sink/vmysql_table_writer.cpp @@ -59,7 +59,7 @@ std::string MysqlConnInfo::debug_string() const { return ss.str(); } -VMysqlTableWriter::VMysqlTableWriter(const std::vector& output_expr_ctxs) +VMysqlTableWriter::VMysqlTableWriter(const VExprContextSPtrs& output_expr_ctxs) : _vec_output_expr_ctxs(output_expr_ctxs) {} VMysqlTableWriter::~VMysqlTableWriter() { diff --git a/be/src/vec/sink/vmysql_table_writer.h b/be/src/vec/sink/vmysql_table_writer.h index 54a8b470c9..51f62a4db5 100644 --- a/be/src/vec/sink/vmysql_table_writer.h +++ b/be/src/vec/sink/vmysql_table_writer.h @@ -25,6 +25,7 @@ #include #include "common/status.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { namespace vectorized { @@ -40,12 +41,11 @@ struct MysqlConnInfo { std::string debug_string() const; }; -class VExprContext; class Block; class VMysqlTableWriter { public: - VMysqlTableWriter(const std::vector& output_exprs); + VMysqlTableWriter(const VExprContextSPtrs& output_exprs); ~VMysqlTableWriter(); // connect to mysql server @@ -61,7 +61,7 @@ public: private: Status insert_row(vectorized::Block& block, size_t row); - const std::vector& _vec_output_expr_ctxs; + const VExprContextSPtrs& _vec_output_expr_ctxs; fmt::memory_buffer _insert_stmt_buffer; std::string _mysql_tbl; MYSQL* _mysql_conn; diff --git a/be/src/vec/sink/vresult_file_sink.cpp b/be/src/vec/sink/vresult_file_sink.cpp index 92b396d189..a65bda5af2 100644 --- a/be/src/vec/sink/vresult_file_sink.cpp +++ b/be/src/vec/sink/vresult_file_sink.cpp @@ -97,8 +97,7 @@ Status VResultFileSink::init(const TDataSink& tsink) { Status VResultFileSink::prepare_exprs(RuntimeState* state) { // From the thrift expressions create the real exprs. - RETURN_IF_ERROR( - VExpr::create_expr_trees(state->obj_pool(), _t_output_expr, &_output_vexpr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(_t_output_expr, _output_vexpr_ctxs)); // Prepare the exprs to run. RETURN_IF_ERROR(VExpr::prepare(_output_vexpr_ctxs, state, _row_desc)); return Status::OK(); diff --git a/be/src/vec/sink/vresult_file_sink.h b/be/src/vec/sink/vresult_file_sink.h index ed62b3c60a..eaea995584 100644 --- a/be/src/vec/sink/vresult_file_sink.h +++ b/be/src/vec/sink/vresult_file_sink.h @@ -78,7 +78,7 @@ private: // Owned by the RuntimeState. const std::vector& _t_output_expr; - std::vector _output_vexpr_ctxs; + VExprContextSPtrs _output_vexpr_ctxs; RowDescriptor _output_row_descriptor; std::unique_ptr _output_block = nullptr; diff --git a/be/src/vec/sink/vresult_sink.cpp b/be/src/vec/sink/vresult_sink.cpp index 297f201790..445d342a5b 100644 --- a/be/src/vec/sink/vresult_sink.cpp +++ b/be/src/vec/sink/vresult_sink.cpp @@ -64,10 +64,9 @@ VResultSink::~VResultSink() = default; Status VResultSink::prepare_exprs(RuntimeState* state) { // From the thrift expressions create the real exprs. - RETURN_IF_ERROR( - VExpr::create_expr_trees(state->obj_pool(), _t_output_expr, &_output_vexpr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(_t_output_expr, _output_vexpr_ctxs)); if (_fetch_option.use_two_phase_fetch) { - for (VExprContext* expr_ctx : _output_vexpr_ctxs) { + for (auto& expr_ctx : _output_vexpr_ctxs) { // Must materialize if it a slot, or the slot column id will be -1 expr_ctx->set_force_materialize_slot(); } diff --git a/be/src/vec/sink/vresult_sink.h b/be/src/vec/sink/vresult_sink.h index 97916544bf..93394930fb 100644 --- a/be/src/vec/sink/vresult_sink.h +++ b/be/src/vec/sink/vresult_sink.h @@ -28,6 +28,7 @@ #include "common/status.h" #include "exec/data_sink.h" +#include "vec/exprs/vexpr_fwd.h" #include "vec/sink/vresult_writer.h" namespace doris { @@ -42,7 +43,6 @@ namespace pipeline { class ResultSinkOperator; } namespace vectorized { -class VExprContext; class Block; class VResultWriter; @@ -152,7 +152,7 @@ private: // Owned by the RuntimeState. const std::vector& _t_output_expr; - std::vector _output_vexpr_ctxs; + VExprContextSPtrs _output_vexpr_ctxs; std::shared_ptr _sender; std::shared_ptr _writer; diff --git a/be/src/vec/sink/vtable_sink.cpp b/be/src/vec/sink/vtable_sink.cpp index 02259f0cfa..9640b3b4ec 100644 --- a/be/src/vec/sink/vtable_sink.cpp +++ b/be/src/vec/sink/vtable_sink.cpp @@ -41,7 +41,7 @@ VTableSink::VTableSink(ObjectPool* pool, const RowDescriptor& row_desc, Status VTableSink::init(const TDataSink& t_sink) { RETURN_IF_ERROR(DataSink::init(t_sink)); // From the thrift expressions create the real exprs. - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, _t_output_expr, &_output_vexpr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(_t_output_expr, _output_vexpr_ctxs)); return Status::OK(); } diff --git a/be/src/vec/sink/vtable_sink.h b/be/src/vec/sink/vtable_sink.h index 325d44566c..0c45d567f3 100644 --- a/be/src/vec/sink/vtable_sink.h +++ b/be/src/vec/sink/vtable_sink.h @@ -21,6 +21,7 @@ #include "common/status.h" #include "exec/data_sink.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { @@ -33,7 +34,6 @@ class TDataSink; namespace vectorized { class Block; -class VExprContext; class VTableSink : public DataSink { public: @@ -59,7 +59,7 @@ protected: ObjectPool* _pool; const RowDescriptor& _row_desc; const std::vector& _t_output_expr; - std::vector _output_vexpr_ctxs; + VExprContextSPtrs _output_vexpr_ctxs; RuntimeProfile* _profile; std::string _table_name; // whether use transaction diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp index 62954a97b4..a934ce86cf 100644 --- a/be/src/vec/sink/vtablet_sink.cpp +++ b/be/src/vec/sink/vtablet_sink.cpp @@ -966,7 +966,7 @@ VOlapTableSink::VOlapTableSink(ObjectPool* pool, const RowDescriptor& row_desc, const std::vector& texprs, Status* status) : _pool(pool), _input_row_desc(row_desc), _filter_bitmap(1024) { // From the thrift expressions create the real exprs. - *status = vectorized::VExpr::create_expr_trees(pool, texprs, &_output_vexpr_ctxs); + *status = vectorized::VExpr::create_expr_trees(texprs, _output_vexpr_ctxs); _name = "VOlapTableSink"; _transfer_large_data_by_brpc = config::transfer_large_data_by_brpc; } diff --git a/be/src/vec/sink/vtablet_sink.h b/be/src/vec/sink/vtablet_sink.h index ae2b497165..d6ac07068e 100644 --- a/be/src/vec/sink/vtablet_sink.h +++ b/be/src/vec/sink/vtablet_sink.h @@ -66,6 +66,7 @@ #include "vec/common/allocator.h" #include "vec/core/block.h" #include "vec/data_types/data_type.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class ObjectPool; @@ -79,10 +80,6 @@ class TupleDescriptor; template class RefCountClosure; -namespace vectorized { -class VExprContext; -} - namespace stream_load { class OpenPartitionClosure; @@ -358,7 +355,8 @@ protected: class IndexChannel { public: - IndexChannel(VOlapTableSink* parent, int64_t index_id, vectorized::VExprContext* where_clause) + IndexChannel(VOlapTableSink* parent, int64_t index_id, + const vectorized::VExprContextSPtr& where_clause) : _parent(parent), _index_id(index_id), _where_clause(where_clause) { _index_channel_tracker = std::make_unique("IndexChannel:indexID=" + std::to_string(_index_id)); @@ -397,7 +395,7 @@ public: // check whether the rows num written by different replicas is consistent Status check_tablet_received_rows_consistency(); - vectorized::VExprContext* get_where_clause() { return _where_clause; } + vectorized::VExprContextSPtr get_where_clause() { return _where_clause; } private: friend class VNodeChannel; @@ -405,7 +403,7 @@ private: VOlapTableSink* _parent; int64_t _index_id; - vectorized::VExprContext* _where_clause; + vectorized::VExprContextSPtr _where_clause; // from backend channel to tablet_id // ATTN: must be placed before `_node_channels` and `_channels_by_tablet`. @@ -607,7 +605,7 @@ private: FindTabletMode findTabletMode = FindTabletMode::FIND_TABLET_EVERY_ROW; VOlapTablePartitionParam* _vpartition = nullptr; - std::vector _output_vexpr_ctxs; + vectorized::VExprContextSPtrs _output_vexpr_ctxs; RuntimeState* _state = nullptr; diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp index 6530a66212..416987c31d 100644 --- a/be/src/vec/utils/util.hpp +++ b/be/src/vec/utils/util.hpp @@ -92,35 +92,6 @@ public: return data_types; } - static VExpr* dfs_peel_conjunct(RuntimeState* state, VExprContext* context, VExpr* expr, - int& leaf_index, std::function checker) { - static constexpr auto is_leaf = [](VExpr* expr) { return !expr->is_and_expr(); }; - - if (is_leaf(expr)) { - if (checker(leaf_index++)) { - expr->close(state, context, context->get_function_state_scope()); - return nullptr; - } - return expr; - } else { - VExpr* left_child = - dfs_peel_conjunct(state, context, expr->children()[0], leaf_index, checker); - VExpr* right_child = - dfs_peel_conjunct(state, context, expr->children()[1], leaf_index, checker); - - if (left_child != nullptr && right_child != nullptr) { - expr->set_children({left_child, right_child}); - return expr; - } else { - // here only close the and expr self, do not close the child - expr->set_children({}); - expr->close(state, context, context->get_function_state_scope()); - } - - return left_child != nullptr ? left_child : right_child; - } - } - static bool all_arguments_are_constant(const Block& block, const ColumnNumbers& args) { for (const auto& arg : args) { if (!is_column_const(*block.get_by_position(arg).column)) { diff --git a/be/test/exprs/mock_vexpr.h b/be/test/exprs/mock_vexpr.h index 569a6b1d43..b8260dba65 100644 --- a/be/test/exprs/mock_vexpr.h +++ b/be/test/exprs/mock_vexpr.h @@ -27,7 +27,7 @@ namespace vectorized { class MockVExpr : public VExpr { public: - MOCK_CONST_METHOD1(clone, VExpr*(ObjectPool* pool)); + MOCK_CONST_METHOD0(clone, VExprSPtr()); MOCK_CONST_METHOD0(expr_name, const std::string&()); MOCK_METHOD3(execute, Status(VExprContext* context, vectorized::Block* block, int* result_column_id)); diff --git a/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp b/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp index bffbc57cbb..8decfe1a09 100644 --- a/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp @@ -79,7 +79,7 @@ void serialize_and_deserialize_mysql_test() { {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, false}}; int row_num = 7; // make desc and generate block - std::vector _output_vexpr_ctxs; + vectorized::VExprContextSPtrs _output_vexpr_ctxs; _output_vexpr_ctxs.resize(cols.size()); doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(), doris::TQueryGlobals(), nullptr); @@ -256,8 +256,8 @@ void serialize_and_deserialize_mysql_test() { nodes[0].__set_type(create_type_desc(std::get<3>(t), type_desc.precision, type_desc.scale)); TExpr texpr; texpr.__set_nodes(nodes); - VExprContext* ctx = nullptr; - Status st = VExpr::create_expr_tree(&object_pool, texpr, &ctx); + VExprContextSPtr ctx = nullptr; + Status st = VExpr::create_expr_tree(texpr, ctx); std::cout << st.to_string() << std::endl; doris::DescriptorTblBuilder builder(&object_pool); builder.declare_tuple() << type_desc; diff --git a/be/test/vec/exec/parquet/parquet_reader_test.cpp b/be/test/vec/exec/parquet/parquet_reader_test.cpp index bae98ad9ee..20f4f4150c 100644 --- a/be/test/vec/exec/parquet/parquet_reader_test.cpp +++ b/be/test/vec/exec/parquet/parquet_reader_test.cpp @@ -140,11 +140,11 @@ TEST_F(ParquetReaderTest, normal) { std::unordered_map colname_to_value_range; p_reader->open(); - p_reader->init_reader(column_names, missing_column_names, nullptr, nullptr, nullptr, nullptr, + p_reader->init_reader(column_names, missing_column_names, nullptr, {}, nullptr, nullptr, nullptr, nullptr, nullptr); std::unordered_map> partition_columns; - std::unordered_map missing_columns; + std::unordered_map missing_columns; p_reader->set_fill_columns(partition_columns, missing_columns); BlockUPtr block = Block::create_unique(); for (const auto& slot_desc : tuple_desc->slots()) { diff --git a/be/test/vec/exprs/vexpr_test.cpp b/be/test/vec/exprs/vexpr_test.cpp index a97206741d..01546ff758 100644 --- a/be/test/vec/exprs/vexpr_test.cpp +++ b/be/test/vec/exprs/vexpr_test.cpp @@ -60,8 +60,8 @@ TEST(TEST_VEXPR, ABSTEST) { std::string expr_json = R"|({"1":{"lst":["rec",2,{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"abs"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"5":{"tf":0},"7":{"str":"abs(INT)"},"9":{"rec":{"1":{"str":"_ZN5doris13MathFunctions3absEPN9doris_udf15FunctionContextERKNS1_6IntValE"}}},"11":{"i64":0}}}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0}}},"20":{"i32":-1},"23":{"i32":-1}}]}})|"; doris::TExpr exprx = apache::thrift::from_json_string(expr_json); - doris::vectorized::VExprContext* context = nullptr; - doris::vectorized::VExpr::create_expr_tree(&object_pool, exprx, &context); + doris::vectorized::VExprContextSPtr context; + doris::vectorized::VExpr::create_expr_tree(exprx, context); doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(), doris::TQueryGlobals(), nullptr); @@ -154,8 +154,8 @@ TEST(TEST_VEXPR, ABSTEST2) { R"|({"1":{"lst":["rec",2,{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"abs"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"5":{"tf":0},"7":{"str":"abs(INT)"},"9":{"rec":{"1":{"str":"_ZN5doris13MathFunctions3absEPN9doris_udf15FunctionContextERKNS1_6IntValE"}}},"11":{"i64":0}}}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0}}},"20":{"i32":-1},"23":{"i32":-1}}]}})|"; TExpr exprx = apache::thrift::from_json_string(expr_json); - doris::vectorized::VExprContext* context = nullptr; - doris::vectorized::VExpr::create_expr_tree(&object_pool, exprx, &context); + doris::vectorized::VExprContextSPtr context; + doris::vectorized::VExpr::create_expr_tree(exprx, context); doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(), doris::TQueryGlobals(), nullptr); diff --git a/be/test/vec/function/table_function_test.cpp b/be/test/vec/function/table_function_test.cpp index a659c39538..2d08eadc81 100644 --- a/be/test/vec/function/table_function_test.cpp +++ b/be/test/vec/function/table_function_test.cpp @@ -59,21 +59,21 @@ protected: void init_expr_context(int child_num) { clear(); - _root = std::make_unique(); + _root = std::make_shared(); for (int i = 0; i < child_num; ++i) { _column_ids.push_back(i); - _children.push_back(std::make_unique()); + _children.push_back(std::make_shared()); EXPECT_CALL(*_children[i], execute(_, _, _)) .WillRepeatedly(DoAll(SetArgPointee<2>(_column_ids[i]), Return(Status::OK()))); - _root->add_child(_children[i].get()); + _root->add_child(_children[i]); } - _ctx = std::make_unique(_root.get()); + _ctx = std::make_shared(_root); } private: - std::unique_ptr _ctx; - std::unique_ptr _root; - std::vector> _children; + VExprContextSPtr _ctx; + std::shared_ptr _root; + std::vector> _children; std::vector _column_ids; }; @@ -81,7 +81,7 @@ TEST_F(TableFunctionTest, vexplode_outer) { init_expr_context(1); VExplodeTableFunction explode_outer; explode_outer.set_outer(); - explode_outer.set_vexpr_context(_ctx.get()); + explode_outer.set_expr_context(_ctx); // explode_outer(Array) { @@ -128,7 +128,7 @@ TEST_F(TableFunctionTest, vexplode_outer) { TEST_F(TableFunctionTest, vexplode) { init_expr_context(1); VExplodeTableFunction explode; - explode.set_vexpr_context(_ctx.get()); + explode.set_expr_context(_ctx); // explode(Array) { @@ -171,7 +171,7 @@ TEST_F(TableFunctionTest, vexplode) { TEST_F(TableFunctionTest, vexplode_numbers) { init_expr_context(1); VExplodeNumbersTableFunction tfn; - tfn.set_vexpr_context(_ctx.get()); + tfn.set_expr_context(_ctx); { InputTypeSet input_types = {TypeIndex::Int32}; @@ -187,7 +187,7 @@ TEST_F(TableFunctionTest, vexplode_numbers) { TEST_F(TableFunctionTest, vexplode_split) { init_expr_context(2); VExplodeSplitTableFunction tfn; - tfn.set_vexpr_context(_ctx.get()); + tfn.set_expr_context(_ctx); { // Case 1: explode_split(null) --- null diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CompoundPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CompoundPredicate.java index b98b982eb6..670cd7c9a8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CompoundPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CompoundPredicate.java @@ -291,7 +291,7 @@ public class CompoundPredicate extends Predicate { } @Override - public Expr replaceSubPredicate(Expr subExpr) throws AnalysisException { + public Expr replaceSubPredicate(Expr subExpr) { if (op.equals(Operator.AND)) { Expr lhs = children.get(0); Expr rhs = children.get(1); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java index 18b99065f4..8e8c213e9b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java @@ -2285,7 +2285,7 @@ public abstract class Expr extends TreeNode implements ParseNode, Cloneabl return false; } - public Expr replaceSubPredicate(Expr subExpr) throws AnalysisException { + public Expr replaceSubPredicate(Expr subExpr) { if (toSqlWithoutTbl().equals(subExpr.toSqlWithoutTbl())) { return null; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java index 2daed4e237..cd5f13cf97 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java @@ -166,7 +166,6 @@ public class LoadingTaskPlanner { fileStatusesList, filesAdded, strictMode, loadParallelism, userInfo); scanNode.init(analyzer); scanNode.finalize(analyzer); - scanNode.convertToVectorized(); scanNodes.add(scanNode); descTable.computeStatAndMemLayout(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index ac5c05eb26..8be56ffdca 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -291,7 +291,6 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor outputExprs.add(context.findSlotRef(exprId))); rootFragment.setOutputExprs(outputExprs); } - rootFragment.getPlanRoot().convertToVectorized(); for (PlanFragment fragment : context.getPlanFragments()) { fragment.finalize(null); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/FileLoadScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/FileLoadScanNode.java index 9215d409ae..ef581280c0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/FileLoadScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/FileLoadScanNode.java @@ -302,10 +302,8 @@ public class FileLoadScanNode extends FileScanNode { // Need re compute memory layout after set some slot descriptor to nullable srcTupleDesc.computeStatAndMemLayout(); - if (!preFilterConjuncts.isEmpty()) { - Expr vPreFilterExpr = convertConjunctsToAndCompoundPredicate(preFilterConjuncts); - initCompoundPredicate(vPreFilterExpr); - params.setPreFilterExprs(vPreFilterExpr.treeToThrift()); + for (Expr conjunct : preFilterConjuncts) { + params.addToPreFilterExprsList(conjunct.treeToThrift()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java index 1bbdae9d09..04a541ee4f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java @@ -53,7 +53,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; @@ -73,9 +72,7 @@ public class HashJoinNode extends JoinNodeBase { private List eqJoinConjuncts = Lists.newArrayList(); // join conjuncts from the JOIN clause that aren't equi-join predicates private List otherJoinConjuncts; - // join conjunct from the JOIN clause that aren't equi-join predicates, only use in - // vec exec engine - private Expr votherJoinConjunct = null; + private DistributionMode distrMode; private boolean isColocate = false; //the flag for colocate join private String colocateReason = ""; // if can not do colocate join, set reason here @@ -258,11 +255,6 @@ public class HashJoinNode extends JoinNodeBase { @Override protected void computeOtherConjuncts(Analyzer analyzer, ExprSubstitutionMap originToIntermediateSmap) { otherJoinConjuncts = Expr.substituteList(otherJoinConjuncts, originToIntermediateSmap, analyzer, false); - if (votherJoinConjunct != null) { - votherJoinConjunct = - Expr.substituteList(Arrays.asList(votherJoinConjunct), originToIntermediateSmap, analyzer, false) - .get(0); - } } @Override @@ -717,10 +709,6 @@ public class HashJoinNode extends JoinNodeBase { msg.hash_join_node.addToOtherJoinConjuncts(e.treeToThrift()); } - // use in vec exec engine to replace otherJoinConjuncts - if (votherJoinConjunct != null) { - msg.hash_join_node.setVotherJoinConjunct(votherJoinConjunct.treeToThrift()); - } if (hashOutputSlotIds != null) { for (SlotId slotId : hashOutputSlotIds) { msg.hash_join_node.addToHashOutputSlotIds(slotId.asInt()); @@ -829,15 +817,6 @@ public class HashJoinNode extends JoinNodeBase { } } - @Override - public void convertToVectorized() { - if (!otherJoinConjuncts.isEmpty()) { - votherJoinConjunct = convertConjunctsToAndCompoundPredicate(otherJoinConjuncts); - initCompoundPredicate(votherJoinConjunct); - } - super.convertToVectorized(); - } - /** * Used by nereids. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java b/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java index 14aafb4a84..11b9c7c4a0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java @@ -45,7 +45,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -430,9 +429,6 @@ public abstract class JoinNodeBase extends PlanNode { // 4. replace other conjuncts and conjuncts computeOtherConjuncts(analyzer, originToIntermediateSmap); conjuncts = Expr.substituteList(conjuncts, originToIntermediateSmap, analyzer, false); - if (vconjunct != null) { - vconjunct = Expr.substituteList(Arrays.asList(vconjunct), originToIntermediateSmap, analyzer, false).get(0); - } // 5. replace tuple is null expr TupleIsNullPredicate.substitueListForTupleIsNull(vSrcToOutputSMap.getLhs(), originTidsToIntermediateTidMap); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java index e26aac6653..375357e701 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java @@ -66,8 +66,6 @@ public class NestedLoopJoinNode extends JoinNodeBase { private List runtimeFilterExpr = Lists.newArrayList(); private List joinConjuncts; - private Expr vJoinConjunct; - public NestedLoopJoinNode(PlanNodeId id, PlanNode outer, PlanNode inner, TableRef innerRef) { super(id, "NESTED LOOP JOIN", StatisticalType.NESTED_LOOP_JOIN_NODE, outer, inner, innerRef); tupleIds.addAll(outer.getOutputTupleIds()); @@ -160,20 +158,6 @@ public class NestedLoopJoinNode extends JoinNodeBase { @Override protected void computeOtherConjuncts(Analyzer analyzer, ExprSubstitutionMap originToIntermediateSmap) { joinConjuncts = Expr.substituteList(joinConjuncts, originToIntermediateSmap, analyzer, false); - if (vJoinConjunct != null) { - vJoinConjunct = - Expr.substituteList(Collections.singletonList(vJoinConjunct), originToIntermediateSmap, analyzer, - false).get(0); - } - } - - @Override - public void convertToVectorized() { - if (!joinConjuncts.isEmpty()) { - vJoinConjunct = convertConjunctsToAndCompoundPredicate(joinConjuncts); - initCompoundPredicate(vJoinConjunct); - } - super.convertToVectorized(); } @Override @@ -185,8 +169,8 @@ public class NestedLoopJoinNode extends JoinNodeBase { protected void toThrift(TPlanNode msg) { msg.nested_loop_join_node = new TNestedLoopJoinNode(); msg.nested_loop_join_node.join_op = joinOp.toThrift(); - if (vJoinConjunct != null) { - msg.nested_loop_join_node.setVjoinConjunct(vJoinConjunct.treeToThrift()); + for (Expr conjunct : joinConjuncts) { + msg.nested_loop_join_node.addToJoinConjuncts(conjunct.treeToThrift()); } msg.nested_loop_join_node.setIsMark(isMarkJoin()); if (vSrcToOutputSMap != null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index af673a9847..b384375bf0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -98,6 +98,7 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -351,11 +352,12 @@ public class OlapScanNode extends ScanNode { return selectedIndexId; } - public void ignoreConjuncts(Expr whereExpr) throws AnalysisException { + public void ignoreConjuncts(Expr whereExpr) { if (whereExpr == null) { return; } - vconjunct = vconjunct.replaceSubPredicate(whereExpr); + conjuncts = conjuncts.stream().map(expr -> expr.replaceSubPredicate(whereExpr)) + .filter(Objects::nonNull).collect(Collectors.toList()); } /** @@ -1163,8 +1165,10 @@ public class OlapScanNode extends ScanNode { if (useTopnOpt) { output.append(prefix).append("TOPN OPT\n"); } - if (vconjunct != null) { - output.append(prefix).append("PREDICATES: ").append(vconjunct.toSql()).append("\n"); + + if (!conjuncts.isEmpty()) { + Expr expr = convertConjunctsToAndCompoundPredicate(conjuncts); + output.append(prefix).append("PREDICATES: ").append(expr.toSql()).append("\n"); } if (!runtimeFilters.isEmpty()) { output.append(prefix).append("runtime filters: "); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java index e322b619ab..024ea0647b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java @@ -165,11 +165,6 @@ public class OriginalPlanner extends Planner { plannerContext = new PlannerContext(analyzer, queryStmt, queryOptions, statement); singleNodePlanner = new SingleNodePlanner(plannerContext); PlanNode singleNodePlan = singleNodePlanner.createSingleNodePlan(); - // TODO change to vec should happen after distributed planner - if (VectorizedUtil.isVectorized()) { - singleNodePlan.convertToVectorized(); - } - ProjectPlanner projectPlanner = new ProjectPlanner(analyzer); projectPlanner.projectSingleNodePlan(queryStmt.getResultExprs(), singleNodePlan); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java index 4a807a6452..6b49b237c6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java @@ -101,8 +101,6 @@ public abstract class PlanNode extends TreeNode implements PlanStats { protected List conjuncts = Lists.newArrayList(); - protected Expr vconjunct = null; - // Conjuncts used to filter the original load file. // In the load execution plan, the difference between "preFilterConjuncts" and "conjuncts" is that // conjuncts are used to filter the data after column conversion and mapping, @@ -462,9 +460,6 @@ public abstract class PlanNode extends TreeNode implements PlanStats { } public void transferConjuncts(PlanNode recipient) { - recipient.vconjunct = vconjunct; - vconjunct = null; - recipient.conjuncts.addAll(conjuncts); conjuncts.clear(); } @@ -584,9 +579,9 @@ public abstract class PlanNode extends TreeNode implements PlanStats { msg.addToRowTuples(tid.asInt()); msg.addToNullableTuples(nullableTupleIds.contains(tid)); } - // `conjuncts` is never needed on vectorized engine except scan nodes which use them as push-down predicates. - if (this instanceof ScanNode || !VectorizedUtil.isVectorized()) { - for (Expr e : conjuncts) { + + for (Expr e : conjuncts) { + if (!(e instanceof BitmapFilterPredicate)) { msg.addToConjuncts(e.treeToThrift()); } } @@ -596,10 +591,6 @@ public abstract class PlanNode extends TreeNode implements PlanStats { msg.addToRuntimeFilters(filter.toThrift()); } - if (vconjunct != null) { - msg.vconjunct = vconjunct.treeToThrift(); - } - msg.compact_data = compactData; if (outputSlotIds != null) { for (SlotId slotId : outputSlotIds) { @@ -1051,28 +1042,6 @@ public abstract class PlanNode extends TreeNode implements PlanStats { return getRuntimeFilterExplainString(isBuildNode, false); } - public void convertToVectorized() { - List conjunctsExcludeBitmapFilter = Lists.newArrayList(); - for (Expr expr : conjuncts) { - if (!(expr instanceof BitmapFilterPredicate)) { - conjunctsExcludeBitmapFilter.add(expr); - } - } - if (!conjunctsExcludeBitmapFilter.isEmpty()) { - vconjunct = convertConjunctsToAndCompoundPredicate(conjunctsExcludeBitmapFilter); - initCompoundPredicate(vconjunct); - } - - if (!preFilterConjuncts.isEmpty()) { - vpreFilterConjunct = convertConjunctsToAndCompoundPredicate(preFilterConjuncts); - initCompoundPredicate(vpreFilterConjunct); - } - - for (PlanNode child : children) { - child.convertToVectorized(); - } - } - /** * If an plan node implements this method, the plan node itself supports project optimization. * @param requiredSlotIdSet: The upper plan node's requirement slot set for the current plan node. @@ -1165,8 +1134,8 @@ public abstract class PlanNode extends TreeNode implements PlanStats { return outputSlotIds; } - public void setVConjunct(Set exprs) { - vconjunct = convertConjunctsToAndCompoundPredicate(new ArrayList<>(exprs)); + public void setConjuncts(Set exprs) { + conjuncts = new ArrayList<>(exprs); } public void setCardinalityAfterFilter(long cardinalityAfterFilter) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java index 60d821166a..0ae4a35edb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java @@ -1372,16 +1372,16 @@ public class SingleNodePlanner { tupleSelectFailed = true; } else { try { - // mv index have where clause, so where expr on scan node is unused. - olapScanNode.ignoreConjuncts(olapScanNode.getOlapTable() - .getIndexMetaByIndexId(bestIndexInfo.getBestIndexId()) - .getWhereClause()); - // if the new selected index id is different from the old one, scan node will be // updated. olapScanNode.updateScanRangeInfoByNewMVSelector(bestIndexInfo.getBestIndexId(), bestIndexInfo.isPreAggregation(), bestIndexInfo.getReasonOfDisable()); + // mv index have where clause, so where expr on scan node is unused. + olapScanNode.ignoreConjuncts(olapScanNode.getOlapTable() + .getIndexMetaByIndexId(bestIndexInfo.getBestIndexId()) + .getWhereClause()); + if (selectStmt.getAggInfo() != null) { selectStmt.getAggInfo().updateTypeOfAggregateExprs(); } @@ -2207,7 +2207,9 @@ public class SingleNodePlanner { Analyzer viewAnalyzer = inlineViewRef.getAnalyzer(); Set exprs = viewAnalyzer.findMigrateFailedConjuncts(inlineViewRef); if (CollectionUtils.isNotEmpty(exprs)) { - scanNode.setVConjunct(exprs); + for (Expr expr : exprs) { + scanNode.addConjunct(expr); + } } } if (scanNode == null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java index f837665028..e5789e784f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java @@ -234,7 +234,6 @@ public class StreamLoadPlanner { scanNode.init(analyzer); scanNode.finalize(analyzer); - scanNode.convertToVectorized(); descTable.computeStatAndMemLayout(); int timeout = taskInfo.getTimeout(); diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index f538df77d9..6cbea5bae2 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -339,6 +339,7 @@ struct TFileScanRangeParams { 18: optional list column_idxs // Map of slot to its position in table schema. Only for Hive external table. 19: optional map slot_name_to_schema_pos + 20: optional list pre_filter_exprs_list } struct TFileRangeDesc { @@ -674,6 +675,8 @@ struct TNestedLoopJoinNode { 6: optional Exprs.TExpr vjoin_conjunct 7: optional bool is_mark + + 8: optional list join_conjuncts } struct TMergeJoinNode { diff --git a/regression-test/suites/query_p0/join/test_join.groovy b/regression-test/suites/query_p0/join/test_join.groovy index 1fddc4e208..e6d2906c0d 100644 --- a/regression-test/suites/query_p0/join/test_join.groovy +++ b/regression-test/suites/query_p0/join/test_join.groovy @@ -1260,11 +1260,14 @@ suite("test_join", "query,p0") { logger.info(ret.toString()) assertTrue(ret.toString().contains(" | join op: INNER JOIN(BROADCAST)")) + sql "drop table if exists `t0`" + sql "drop table if exists `t1`" + sql """ - CREATE TABLE t0(c0 BOOLEAN NOT NULL) DISTRIBUTED BY HASH (c0) BUCKETS 8 PROPERTIES ("replication_num" = "1"); + CREATE TABLE IF NOT EXISTS t0(c0 BOOLEAN NOT NULL) DISTRIBUTED BY HASH (c0) BUCKETS 8 PROPERTIES ("replication_num" = "1"); """ sql """ - CREATE TABLE t1(c0 DATETIME NOT NULL) DISTRIBUTED BY HASH (c0) BUCKETS 9 PROPERTIES ("replication_num" = "1"); + CREATE TABLE IF NOT EXISTS t1(c0 DATETIME NOT NULL) DISTRIBUTED BY HASH (c0) BUCKETS 9 PROPERTIES ("replication_num" = "1"); """ sql """INSERT INTO t1 (c0) VALUES (DATE '1970-02-15'), (DATE '1970-11-05'), (DATE '1970-07-10');""" sql """INSERT INTO t1 (c0) VALUES (DATE '1970-04-04');"""