// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include "exec/exec_node.h" #include "exec/olap_common.h" #include "exprs/function_filter.h" #include "exprs/runtime_filter.h" #include "vec/exec/scan/scanner_context.h" #include "vec/exprs/vectorized_fn_call.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vin_predicate.h" namespace doris::pipeline { class ScanOperator; } namespace doris::vectorized { class VScanner; class VSlotRef; struct FilterPredicates { // Save all runtime filter predicates which may be pushed down to data source. // column name -> bloom filter function std::vector>> bloom_filters; std::vector>> bitmap_filters; std::vector>> in_filters; }; class VScanNode : public ExecNode { public: VScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : ExecNode(pool, tnode, descs), _runtime_filter_descs(tnode.runtime_filters) {} virtual ~VScanNode() = default; friend class VScanner; friend class NewOlapScanner; friend class VFileScanner; friend class ScannerContext; friend class doris::pipeline::ScanOperator; Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override; Status prepare(RuntimeState* state) override; Status open(RuntimeState* state) override; virtual void set_scan_ranges(const std::vector& scan_ranges) {} // Get next block. // If eos is true, no more data will be read and block should be empty. Status get_next(RuntimeState* state, vectorized::Block* block, bool* eos) override; Status close(RuntimeState* state) override; void set_no_agg_finalize() { _need_agg_finalize = false; } // Try append late arrived runtime filters. // Return num of filters which are applied already. Status try_append_late_arrival_runtime_filter(int* arrived_rf_num); // Clone current vconjunct_ctx to _vconjunct_ctx, if exists. Status clone_vconjunct_ctx(VExprContext** _vconjunct_ctx); int runtime_filter_num() const { return (int)_runtime_filter_ctxs.size(); } TupleId input_tuple_id() const { return _input_tuple_id; } TupleId output_tuple_id() const { return _output_tuple_id; } const TupleDescriptor* input_tuple_desc() const { return _input_tuple_desc; } const TupleDescriptor* output_tuple_desc() const { return _output_tuple_desc; } Status alloc_resource(RuntimeState* state) override; void release_resource(RuntimeState* state) override; bool runtime_filters_are_ready_or_timeout(); enum class PushDownType { // The predicate can not be pushed down to data source UNACCEPTABLE, // The predicate can be pushed down to data source // and the data source can fully evaludate it ACCEPTABLE, // The predicate can be pushed down to data source // but the data source can not fully evaluate it. PARTIAL_ACCEPTABLE }; protected: // Different data sources register different profiles by implementing this method virtual Status _init_profile(); // Process predicates, extract the predicates in the conjuncts that can be pushed down // to the data source, and convert them into common expressions structure ColumnPredicate. // There are currently 3 types of predicates that can be pushed down to data sources: // // 1. Simple predicate, with column on left and constant on right, such as "a=1", "b in (1,2,3)" etc. // 2. Bloom Filter, predicate condition generated by runtime filter // 3. Function Filter, some data sources can accept function conditions, such as "a like 'abc%'" // // Predicates that can be fully processed by the data source will be removed from conjuncts virtual Status _process_conjuncts() { RETURN_IF_ERROR(_normalize_conjuncts()); return Status::OK(); } // Create a list of scanners. // The number of scanners is related to the implementation of the data source, // predicate conditions, and scheduling strategy. // So this method needs to be implemented separately by the subclass of ScanNode. // Finally, a set of scanners that have been prepared are returned. virtual Status _init_scanners(std::list* scanners) { return Status::OK(); } // Different data sources can implement the following 3 methods to determine whether a predicate // can be pushed down to the data source. // 3 types: // 1. binary predicate // 2. in/not in predicate // 3. function predicate // TODO: these interfaces should be change to become more common. virtual Status _should_push_down_binary_predicate( VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* constant_val, int* slot_ref_child, const std::function& fn_checker, PushDownType& pdt); virtual PushDownType _should_push_down_in_predicate(VInPredicate* in_pred, VExprContext* expr_ctx, bool is_not_in); virtual Status _should_push_down_function_filter(VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringVal* constant_str, doris_udf::FunctionContext** fn_ctx, PushDownType& pdt) { pdt = PushDownType::UNACCEPTABLE; return Status::OK(); } virtual PushDownType _should_push_down_bloom_filter() { return PushDownType::UNACCEPTABLE; } virtual PushDownType _should_push_down_bitmap_filter() { return PushDownType::UNACCEPTABLE; } virtual PushDownType _should_push_down_is_null_predicate() { return PushDownType::UNACCEPTABLE; } // Return true if it is a key column. // Only predicate on key column can be pushed down. virtual bool _is_key_column(const std::string& col_name) { return false; } protected: RuntimeState* _state; // For load scan node, there should be both input and output tuple descriptor. // For query scan node, there is only output_tuple_desc. TupleId _input_tuple_id = -1; TupleId _output_tuple_id = -1; const TupleDescriptor* _input_tuple_desc = nullptr; const TupleDescriptor* _output_tuple_desc = nullptr; // These two values are from query_options int _max_scan_key_num; int _max_pushdown_conditions_per_column; // For runtime filters struct RuntimeFilterContext { RuntimeFilterContext() : apply_mark(false), runtime_filter(nullptr) {} RuntimeFilterContext(IRuntimeFilter* rf) : apply_mark(false), runtime_filter(rf) {} // set to true if this runtime filter is already applied to vconjunct_ctx_ptr bool apply_mark; IRuntimeFilter* runtime_filter; }; std::vector _runtime_filter_ctxs; std::vector _runtime_filter_descs; // Set to true if the runtime filter is ready. std::vector _runtime_filter_ready_flag; std::mutex _rf_locks; std::map _conjunct_id_to_runtime_filter_ctxs; phmap::flat_hash_set _rf_vexpr_set; // True means all runtime filters are applied to scanners bool _is_all_rf_applied = true; // Each scan node will generates a ScannerContext to manage all Scanners. // See comments of ScannerContext for more details std::shared_ptr _scanner_ctx; // Save all scanner objects. ObjectPool _scanner_pool; // indicate this scan node has no more data to return bool _eos = false; bool _opened = false; FilterPredicates _filter_predicates {}; // Save all function predicates which may be pushed down to data source. std::vector _push_down_functions; // slot id -> ColumnValueRange // Parsed from conjuncts phmap::flat_hash_map> _slot_id_to_value_range; // column -> ColumnValueRange std::unordered_map _colname_to_value_range; // We use _colname_to_value_range to store a column and its corresponding value ranges. // But if a col is with value range, eg: 1 < col < 10, which is "!is_fixed_range", // in this case we can not merge "1 < col < 10" with "col not in (2)". // So we have to save "col not in (2)" to another structure: "_not_in_value_ranges". // When the data source try to use the value ranges, it should use both ranges in // "_colname_to_value_range" and in "_not_in_value_ranges" std::vector _not_in_value_ranges; bool _need_agg_finalize = true; bool _blocked_by_rf = false; // Every time vconjunct_ctx_ptr is updated, the old ctx will be stored in this vector // so that it will be destroyed uniformly at the end of the query. std::vector> _stale_vexpr_ctxs; // If sort info is set, push limit to each scanner; int64_t _limit_per_scanner = -1; protected: std::unique_ptr _scanner_profile; // rows read from the scanner (including those discarded by (pre)filters) RuntimeProfile::Counter* _rows_read_counter; // Wall based aggregate read throughput [rows/sec] RuntimeProfile::Counter* _total_throughput_counter; RuntimeProfile::Counter* _num_scanners; RuntimeProfile::Counter* _get_next_timer = nullptr; RuntimeProfile::Counter* _acquire_runtime_filter_timer = nullptr; // time of get block from scanner RuntimeProfile::Counter* _scan_timer = nullptr; RuntimeProfile::Counter* _scan_cpu_timer = nullptr; // time of prefilter input block from scanner RuntimeProfile::Counter* _prefilter_timer = nullptr; // time of convert input block to output block from scanner RuntimeProfile::Counter* _convert_block_timer = nullptr; // time of filter output block from scanner RuntimeProfile::Counter* _filter_timer = nullptr; RuntimeProfile::Counter* _scanner_sched_counter = nullptr; RuntimeProfile::Counter* _scanner_ctx_sched_counter = nullptr; RuntimeProfile::Counter* _scanner_wait_batch_timer = nullptr; RuntimeProfile::Counter* _scanner_wait_worker_timer = nullptr; // Num of pre allocated free blocks RuntimeProfile::Counter* _pre_alloc_free_blocks_num = nullptr; // Num of newly created free blocks when running query RuntimeProfile::Counter* _newly_create_free_blocks_num = nullptr; // Max num of scanner thread RuntimeProfile::Counter* _max_scanner_thread_num = nullptr; RuntimeProfile::HighWaterMarkCounter* _queued_blocks_memory_usage; RuntimeProfile::HighWaterMarkCounter* _free_blocks_memory_usage; private: // Register and get all runtime filters at Init phase. Status _register_runtime_filter(); // Get all arrived runtime filters at Open phase. Status _acquire_runtime_filter(bool wait = true); // Append late-arrival runtime filters to the vconjunct_ctx. Status _append_rf_into_conjuncts(std::vector& vexprs); Status _normalize_conjuncts(); Status _normalize_predicate(VExpr* conjunct_expr_root, VExpr** output_expr); Status _eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, PushDownType* pdt); Status _normalize_bloom_filter(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, PushDownType* pdt); Status _normalize_bitmap_filter(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, PushDownType* pdt); Status _normalize_function_filters(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, PushDownType* pdt); bool _is_predicate_acting_on_slot(VExpr* expr, const std::function&, const VSlotRef**, VExpr**)>& checker, SlotDescriptor** slot_desc, ColumnValueRangeType** range); template Status _normalize_in_and_eq_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, ColumnValueRange& range, PushDownType* pdt); template Status _normalize_not_in_and_not_eq_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, ColumnValueRange& range, PushDownType* pdt); template Status _normalize_noneq_binary_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, ColumnValueRange& range, PushDownType* pdt); template Status _normalize_is_null_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, ColumnValueRange& range, PushDownType* pdt); template static Status _change_value_range(ColumnValueRange& range, void* value, const ChangeFixedValueRangeFunc& func, const std::string& fn_name, int slot_ref_child = -1); // Submit the scanner to the thread pool and start execution Status _start_scanners(const std::list& scanners); }; } // namespace doris::vectorized