diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp b/be/src/vec/exec/scan/new_olap_scan_node.cpp index d4b1b6a7d7..973e6c23ee 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.cpp +++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp @@ -125,7 +125,7 @@ static std::string olap_filters_to_string(const std::vector& filters_string += "["; for (auto it = filters.cbegin(); it != filters.cend(); it++) { if (it != filters.cbegin()) { - filters_string += ","; + filters_string += ", "; } filters_string += olap_filter_to_string(*it); } @@ -181,6 +181,12 @@ Status NewOlapScanNode::_build_key_ranges_and_filters() { } } + // Append value ranges in "_not_in_value_ranges" + for (auto& range : _not_in_value_ranges) { + std::visit([&](auto&& the_range) { the_range.to_in_condition(_olap_filters, false); }, + range); + } + _runtime_profile->add_info_string("PushDownPredicates", olap_filters_to_string(_olap_filters)); _runtime_profile->add_info_string("KeyRanges", _scan_keys.debug_string()); VLOG_CRITICAL << _scan_keys.debug_string(); @@ -188,67 +194,12 @@ Status NewOlapScanNode::_build_key_ranges_and_filters() { return Status::OK(); } -bool NewOlapScanNode::_should_push_down_binary_predicate( - VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* constant_val, - int* slot_ref_child, const std::function& fn_checker) { - if (!fn_checker(fn_call->fn().name.function_name)) { - return false; - } - - const auto& children = fn_call->children(); - DCHECK(children.size() == 2); - for (size_t i = 0; i < children.size(); i++) { - if (VExpr::expr_without_cast(children[i])->node_type() != TExprNodeType::SLOT_REF) { - // not a slot ref(column) - continue; - } - if (!children[1 - i]->is_constant()) { - // only handle constant value - return false; - } else { - if (const ColumnConst* const_column = check_and_get_column( - children[1 - i]->get_const_col(expr_ctx)->column_ptr)) { - *slot_ref_child = i; - *constant_val = const_column->get_data_at(0); - } else { - return false; - } - } - } - return true; -} - -bool NewOlapScanNode::_should_push_down_in_predicate(VInPredicate* pred, VExprContext* expr_ctx, - bool is_not_in) { - if (pred->is_not_in() != is_not_in) { - return false; - } - InState* state = reinterpret_cast( - expr_ctx->fn_context(pred->fn_context_index()) - ->get_function_state(FunctionContext::FRAGMENT_LOCAL)); - HybridSetBase* set = state->hybrid_set.get(); - - // if there are too many elements in InPredicate, exceed the limit, - // we will not push any condition of this column to storage engine. - // because too many conditions pushed down to storage engine may even - // slow down the query process. - // ATTN: This is just an experience value. You may need to try - // different thresholds to improve performance. - if (set->size() > _max_pushdown_conditions_per_column) { - VLOG_NOTICE << "Predicate value num " << set->size() << " exceed limit " - << _max_pushdown_conditions_per_column; - return false; - } - return true; -} - -bool NewOlapScanNode::_should_push_down_function_filter(VectorizedFnCall* fn_call, - VExprContext* expr_ctx, - StringVal* constant_str, - doris_udf::FunctionContext** fn_ctx) { +VScanNode::PushDownType NewOlapScanNode::_should_push_down_function_filter( + VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringVal* constant_str, + doris_udf::FunctionContext** fn_ctx) { // Now only `like` function filters is supported to push down if (fn_call->fn().name.function_name != "like") { - return false; + return PushDownType::UNACCEPTABLE; } const auto& children = fn_call->children(); @@ -262,19 +213,19 @@ bool NewOlapScanNode::_should_push_down_function_filter(VectorizedFnCall* fn_cal } if (!children[1 - i]->is_constant()) { // only handle constant value - return false; + return PushDownType::UNACCEPTABLE; } else { DCHECK(children[1 - i]->type().is_string_type()); if (const ColumnConst* const_column = check_and_get_column( children[1 - i]->get_const_col(expr_ctx)->column_ptr)) { *constant_str = const_column->get_data_at(0).to_string_val(); } else { - return false; + return PushDownType::UNACCEPTABLE; } } } *fn_ctx = func_cxt; - return true; + return PushDownType::ACCEPTABLE; } // PlanFragmentExecutor will call this method to set scan range diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h b/be/src/vec/exec/scan/new_olap_scan_node.h index a721737585..a922b009b9 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.h +++ b/be/src/vec/exec/scan/new_olap_scan_node.h @@ -38,17 +38,13 @@ protected: Status _process_conjuncts() override; bool _is_key_column(const std::string& col_name) override; - bool _should_push_down_binary_predicate( - VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* constant_val, - int* slot_ref_child, - const std::function& fn_checker) override; + PushDownType _should_push_down_function_filter(VectorizedFnCall* fn_call, + VExprContext* expr_ctx, StringVal* constant_str, + doris_udf::FunctionContext** fn_ctx) override; - bool _should_push_down_in_predicate(VInPredicate* in_pred, VExprContext* expr_ctx, - bool is_not_in) override; + PushDownType _should_push_down_bloom_filter() override { return PushDownType::ACCEPTABLE; } - bool _should_push_down_function_filter(VectorizedFnCall* fn_call, VExprContext* expr_ctx, - StringVal* constant_str, - doris_udf::FunctionContext** fn_ctx) override; + PushDownType _should_push_down_is_null_predicate() override { return PushDownType::ACCEPTABLE; } Status _init_scanners(std::list* scanners) override; diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index 0dd48d58b4..23661f7c5b 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -30,11 +30,11 @@ namespace doris::vectorized { -#define RETURN_IF_PUSH_DOWN(stmt) \ - if (!push_down) { \ - stmt; \ - } else { \ - return; \ +#define RETURN_IF_PUSH_DOWN(stmt) \ + if (pdt == PushDownType::UNACCEPTABLE) { \ + stmt; \ + } else { \ + return; \ } static bool ignore_cast(SlotDescriptor* slot, VExpr* expr) { @@ -403,43 +403,44 @@ VExpr* VScanNode::_normalize_predicate(VExpr* conjunct_expr_root) { if (conjunct_expr_root != nullptr) { if (is_leaf(conjunct_expr_root)) { auto impl = conjunct_expr_root->get_impl(); + // If impl is not null, which means this a conjuncts from runtime filter. VExpr* cur_expr = impl ? const_cast(impl) : conjunct_expr_root; SlotDescriptor* slot; ColumnValueRangeType* range = nullptr; - bool push_down = false; - _eval_const_conjuncts(cur_expr, *(_vconjunct_ctx_ptr.get()), &push_down); - if (!push_down && + PushDownType pdt = PushDownType::UNACCEPTABLE; + _eval_const_conjuncts(cur_expr, *(_vconjunct_ctx_ptr.get()), &pdt); + if (pdt == PushDownType::UNACCEPTABLE && (_is_predicate_acting_on_slot(cur_expr, in_predicate_checker, &slot, &range) || _is_predicate_acting_on_slot(cur_expr, eq_predicate_checker, &slot, &range))) { std::visit( [&](auto& value_range) { RETURN_IF_PUSH_DOWN(_normalize_in_and_eq_predicate( cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range, - &push_down)); + &pdt)); RETURN_IF_PUSH_DOWN(_normalize_not_in_and_not_eq_predicate( cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range, - &push_down)); + &pdt)); RETURN_IF_PUSH_DOWN(_normalize_is_null_predicate( cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range, - &push_down)); + &pdt)); RETURN_IF_PUSH_DOWN(_normalize_noneq_binary_predicate( cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range, - &push_down)); + &pdt)); if (_is_key_column(slot->col_name())) { RETURN_IF_PUSH_DOWN(_normalize_bloom_filter( - cur_expr, *(_vconjunct_ctx_ptr.get()), slot, &push_down)); + cur_expr, *(_vconjunct_ctx_ptr.get()), slot, &pdt)); if (_state->enable_function_pushdown()) { RETURN_IF_PUSH_DOWN(_normalize_function_filters( - cur_expr, *(_vconjunct_ctx_ptr.get()), slot, - &push_down)); + cur_expr, *(_vconjunct_ctx_ptr.get()), slot, &pdt)); } } }, *range); } - if (push_down && _is_key_column(slot->col_name())) { + if (pdt == PushDownType::ACCEPTABLE && _is_key_column(slot->col_name())) { return nullptr; } else { + // for PARTIAL_ACCEPTABLE and UNACCEPTABLE, do not remove expr from the tree return conjunct_expr_root; } } else { @@ -464,17 +465,20 @@ VExpr* VScanNode::_normalize_predicate(VExpr* conjunct_expr_root) { } Status VScanNode::_normalize_bloom_filter(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, - bool* push_down) { + PushDownType* pdt) { if (TExprNodeType::BLOOM_PRED == expr->node_type()) { DCHECK(expr->children().size() == 1); - _bloom_filters_push_down.emplace_back(slot->col_name(), expr->get_bloom_filter_func()); - *push_down = true; + PushDownType temp_pdt = _should_push_down_bloom_filter(); + if (temp_pdt != PushDownType::UNACCEPTABLE) { + _bloom_filters_push_down.emplace_back(slot->col_name(), expr->get_bloom_filter_func()); + *pdt = temp_pdt; + } } return Status::OK(); } Status VScanNode::_normalize_function_filters(VExpr* expr, VExprContext* expr_ctx, - SlotDescriptor* slot, bool* push_down) { + SlotDescriptor* slot, PushDownType* pdt) { bool opposite = false; VExpr* fn_expr = expr; if (TExprNodeType::COMPOUND_PRED == expr->node_type() && @@ -486,11 +490,12 @@ Status VScanNode::_normalize_function_filters(VExpr* expr, VExprContext* expr_ct if (TExprNodeType::FUNCTION_CALL == fn_expr->node_type()) { doris_udf::FunctionContext* fn_ctx = nullptr; StringVal val; - if (_should_push_down_function_filter(reinterpret_cast(fn_expr), - expr_ctx, &val, &fn_ctx)) { + PushDownType temp_pdt = _should_push_down_function_filter( + reinterpret_cast(fn_expr), expr_ctx, &val, &fn_ctx); + if (temp_pdt != PushDownType::UNACCEPTABLE) { std::string col = slot->col_name(); _push_down_functions.emplace_back(opposite, col, fn_ctx, val); - *push_down = true; + *pdt = temp_pdt; } } return Status::OK(); @@ -523,14 +528,14 @@ bool VScanNode::_is_predicate_acting_on_slot( return true; } -void VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, bool* push_down) { +void VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, PushDownType* pdt) { char* constant_val = nullptr; if (vexpr->is_constant()) { if (const ColumnConst* const_column = check_and_get_column(vexpr->get_const_col(expr_ctx)->column_ptr)) { constant_val = const_cast(const_column->get_data_at(0).data); if (constant_val == nullptr || *reinterpret_cast(constant_val) == false) { - *push_down = true; + *pdt = PushDownType::ACCEPTABLE; _eos = true; } } else { @@ -543,14 +548,14 @@ void VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, bool template Status VScanNode::_normalize_in_and_eq_predicate(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, ColumnValueRange& range, - bool* push_down) { + PushDownType* pdt) { auto temp_range = ColumnValueRange::create_empty_column_value_range(slot->type().precision, slot->type().scale); - bool effect = false; // 1. Normalize in conjuncts like 'where col in (v1, v2, v3)' if (TExprNodeType::IN_PRED == expr->node_type()) { VInPredicate* pred = static_cast(expr); - if (!_should_push_down_in_predicate(pred, expr_ctx, false)) { + PushDownType temp_pdt = _should_push_down_in_predicate(pred, expr_ctx, false); + if (temp_pdt == PushDownType::UNACCEPTABLE) { return Status::OK(); } @@ -573,43 +578,45 @@ Status VScanNode::_normalize_in_and_eq_predicate(VExpr* expr, VExprContext* expr fn_name, !state->hybrid_set->is_date_v2())); iter->next(); } - range.intersection(temp_range); - effect = true; + *pdt = temp_pdt; } else if (TExprNodeType::BINARY_PRED == expr->node_type()) { DCHECK(expr->children().size() == 2); auto eq_checker = [](const std::string& fn_name) { return fn_name == "eq"; }; StringRef value; int slot_ref_child = -1; - if (_should_push_down_binary_predicate(reinterpret_cast(expr), expr_ctx, - &value, &slot_ref_child, eq_checker)) { - DCHECK(slot_ref_child >= 0); - // where A = nullptr should return empty result set - auto fn_name = std::string(""); - if (value.data != nullptr) { - if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || - T == TYPE_HLL) { - auto val = StringValue(value.data, value.size); - RETURN_IF_ERROR(_change_value_range( - temp_range, reinterpret_cast(&val), - ColumnValueRange::add_fixed_value_range, fn_name)); - } else { - RETURN_IF_ERROR(_change_value_range( - temp_range, reinterpret_cast(const_cast(value.data)), - ColumnValueRange::add_fixed_value_range, fn_name)); - } - range.intersection(temp_range); - effect = true; - } + + PushDownType temp_pdt = + _should_push_down_binary_predicate(reinterpret_cast(expr), + expr_ctx, &value, &slot_ref_child, eq_checker); + if (temp_pdt == PushDownType::UNACCEPTABLE) { + return Status::OK(); } + DCHECK(slot_ref_child >= 0); + // where A = nullptr should return empty result set + auto fn_name = std::string(""); + if (value.data != nullptr) { + if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || + T == TYPE_HLL) { + auto val = StringValue(value.data, value.size); + RETURN_IF_ERROR(_change_value_range( + temp_range, reinterpret_cast(&val), + ColumnValueRange::add_fixed_value_range, fn_name)); + } else { + RETURN_IF_ERROR(_change_value_range( + temp_range, reinterpret_cast(const_cast(value.data)), + ColumnValueRange::add_fixed_value_range, fn_name)); + } + range.intersection(temp_range); + } + *pdt = temp_pdt; } // exceed limit, no conditions will be pushed down to storage engine. if (range.get_fixed_value_size() > _max_pushdown_conditions_per_column) { range.set_whole_value_range(); - } else { - *push_down = effect; + *pdt = PushDownType::UNACCEPTABLE; } return Status::OK(); } @@ -618,14 +625,15 @@ template Status VScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, ColumnValueRange& range, - bool* push_down) { + PushDownType* pdt) { bool is_fixed_range = range.is_fixed_value_range(); auto not_in_range = ColumnValueRange::create_empty_column_value_range(range.column_name()); - bool effect = false; + PushDownType temp_pdt = PushDownType::UNACCEPTABLE; // 1. Normalize in conjuncts like 'where col in (v1, v2, v3)' if (TExprNodeType::IN_PRED == expr->node_type()) { VInPredicate* pred = static_cast(expr); - if (!_should_push_down_in_predicate(pred, expr_ctx, true)) { + if ((temp_pdt = _should_push_down_in_predicate(pred, expr_ctx, true)) == + PushDownType::UNACCEPTABLE) { return Status::OK(); } @@ -652,55 +660,56 @@ Status VScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr, VExprConte } iter->next(); } - effect = true; } else if (TExprNodeType::BINARY_PRED == expr->node_type()) { DCHECK(expr->children().size() == 2); auto ne_checker = [](const std::string& fn_name) { return fn_name == "ne"; }; StringRef value; int slot_ref_child = -1; - if (_should_push_down_binary_predicate(reinterpret_cast(expr), expr_ctx, - &value, &slot_ref_child, ne_checker)) { - DCHECK(slot_ref_child >= 0); - // where A = nullptr should return empty result set - if (value.data != nullptr) { - auto fn_name = std::string(""); - if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || - T == TYPE_HLL) { - auto val = StringValue(value.data, value.size); - if (is_fixed_range) { - RETURN_IF_ERROR(_change_value_range( - range, reinterpret_cast(&val), - ColumnValueRange::remove_fixed_value_range, fn_name)); - } else { - RETURN_IF_ERROR(_change_value_range( - not_in_range, reinterpret_cast(&val), - ColumnValueRange::add_fixed_value_range, fn_name)); - } + if ((temp_pdt = _should_push_down_binary_predicate( + reinterpret_cast(expr), expr_ctx, &value, &slot_ref_child, + ne_checker)) == PushDownType::UNACCEPTABLE) { + return Status::OK(); + } + + DCHECK(slot_ref_child >= 0); + // where A = nullptr should return empty result set + if (value.data != nullptr) { + auto fn_name = std::string(""); + if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || + T == TYPE_HLL) { + auto val = StringValue(value.data, value.size); + if (is_fixed_range) { + RETURN_IF_ERROR(_change_value_range( + range, reinterpret_cast(&val), + ColumnValueRange::remove_fixed_value_range, fn_name)); } else { - if (is_fixed_range) { - RETURN_IF_ERROR(_change_value_range( - range, reinterpret_cast(const_cast(value.data)), - ColumnValueRange::remove_fixed_value_range, fn_name)); - } else { - RETURN_IF_ERROR(_change_value_range( - not_in_range, - reinterpret_cast(const_cast(value.data)), - ColumnValueRange::add_fixed_value_range, fn_name)); - } + RETURN_IF_ERROR(_change_value_range( + not_in_range, reinterpret_cast(&val), + ColumnValueRange::add_fixed_value_range, fn_name)); + } + } else { + if (is_fixed_range) { + RETURN_IF_ERROR(_change_value_range( + range, reinterpret_cast(const_cast(value.data)), + ColumnValueRange::remove_fixed_value_range, fn_name)); + } else { + RETURN_IF_ERROR(_change_value_range( + not_in_range, reinterpret_cast(const_cast(value.data)), + ColumnValueRange::add_fixed_value_range, fn_name)); } - effect = true; } } + } else { + return Status::OK(); } if (is_fixed_range || not_in_range.get_fixed_value_size() <= _max_pushdown_conditions_per_column) { if (!is_fixed_range) { - // push down not in condition to storage engine - not_in_range.to_in_condition(_olap_filters, false); + _not_in_value_ranges.push_back(not_in_range); } - *push_down = effect; + *pdt = temp_pdt; } return Status::OK(); } @@ -708,21 +717,26 @@ Status VScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr, VExprConte template Status VScanNode::_normalize_is_null_predicate(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, ColumnValueRange& range, - bool* push_down) { + PushDownType* pdt) { + PushDownType temp_pdt = _should_push_down_is_null_predicate(); + if (temp_pdt == PushDownType::UNACCEPTABLE) { + return Status::OK(); + } + if (TExprNodeType::FUNCTION_CALL == expr->node_type()) { if (reinterpret_cast(expr)->fn().name.function_name == "is_null_pred") { auto temp_range = ColumnValueRange::create_empty_column_value_range( slot->type().precision, slot->type().scale); temp_range.set_contain_null(true); range.intersection(temp_range); - *push_down = true; + *pdt = temp_pdt; } else if (reinterpret_cast(expr)->fn().name.function_name == "is_not_null_pred") { auto temp_range = ColumnValueRange::create_empty_column_value_range( slot->type().precision, slot->type().scale); temp_range.set_contain_null(false); range.intersection(temp_range); - *push_down = true; + *pdt = temp_pdt; } } return Status::OK(); @@ -731,7 +745,7 @@ Status VScanNode::_normalize_is_null_predicate(VExpr* expr, VExprContext* expr_c template Status VScanNode::_normalize_noneq_binary_predicate(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, - ColumnValueRange& range, bool* push_down) { + ColumnValueRange& range, PushDownType* pdt) { if (TExprNodeType::BINARY_PRED == expr->node_type()) { DCHECK(expr->children().size() == 2); @@ -740,15 +754,16 @@ Status VScanNode::_normalize_noneq_binary_predicate(VExpr* expr, VExprContext* e }; StringRef value; int slot_ref_child = -1; - if (_should_push_down_binary_predicate(reinterpret_cast(expr), expr_ctx, - &value, &slot_ref_child, noneq_checker)) { + PushDownType temp_pdt = _should_push_down_binary_predicate( + reinterpret_cast(expr), expr_ctx, &value, &slot_ref_child, + noneq_checker); + if (temp_pdt != PushDownType::UNACCEPTABLE) { DCHECK(slot_ref_child >= 0); const std::string& fn_name = reinterpret_cast(expr)->fn().name.function_name; // where A = nullptr should return empty result set if (value.data != nullptr) { - *push_down = true; if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || T == TYPE_HLL) { auto val = StringValue(value.data, value.size); @@ -760,6 +775,7 @@ Status VScanNode::_normalize_noneq_binary_predicate(VExpr* expr, VExprContext* e range, reinterpret_cast(const_cast(value.data)), ColumnValueRange::add_value_range, fn_name, true, slot_ref_child)); } + *pdt = temp_pdt; } } } @@ -906,4 +922,59 @@ Status VScanNode::clone_vconjunct_ctx(VExprContext** _vconjunct_ctx) { return Status::OK(); } +VScanNode::PushDownType VScanNode::_should_push_down_binary_predicate( + VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* constant_val, + int* slot_ref_child, const std::function& fn_checker) { + if (!fn_checker(fn_call->fn().name.function_name)) { + return PushDownType::UNACCEPTABLE; + } + + const auto& children = fn_call->children(); + DCHECK(children.size() == 2); + for (size_t i = 0; i < children.size(); i++) { + if (VExpr::expr_without_cast(children[i])->node_type() != TExprNodeType::SLOT_REF) { + // not a slot ref(column) + continue; + } + if (!children[1 - i]->is_constant()) { + // only handle constant value + return PushDownType::UNACCEPTABLE; + } else { + if (const ColumnConst* const_column = check_and_get_column( + children[1 - i]->get_const_col(expr_ctx)->column_ptr)) { + *slot_ref_child = i; + *constant_val = const_column->get_data_at(0); + } else { + return PushDownType::UNACCEPTABLE; + } + } + } + return PushDownType::ACCEPTABLE; +} + +VScanNode::PushDownType VScanNode::_should_push_down_in_predicate(VInPredicate* pred, + VExprContext* expr_ctx, + bool is_not_in) { + if (pred->is_not_in() != is_not_in) { + return PushDownType::UNACCEPTABLE; + } + InState* state = reinterpret_cast( + expr_ctx->fn_context(pred->fn_context_index()) + ->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + HybridSetBase* set = state->hybrid_set.get(); + + // if there are too many elements in InPredicate, exceed the limit, + // we will not push any condition of this column to storage engine. + // because too many conditions pushed down to storage engine may even + // slow down the query process. + // ATTN: This is just an experience value. You may need to try + // different thresholds to improve performance. + if (set->size() > _max_pushdown_conditions_per_column) { + VLOG_NOTICE << "Predicate value num " << set->size() << " exceed limit " + << _max_pushdown_conditions_per_column; + return PushDownType::UNACCEPTABLE; + } + return PushDownType::ACCEPTABLE; +} + } // namespace doris::vectorized diff --git a/be/src/vec/exec/scan/vscan_node.h b/be/src/vec/exec/scan/vscan_node.h index a37074948a..33386ffdab 100644 --- a/be/src/vec/exec/scan/vscan_node.h +++ b/be/src/vec/exec/scan/vscan_node.h @@ -73,6 +73,17 @@ public: const TupleDescriptor* input_tuple_desc() const { return _input_tuple_desc; } const TupleDescriptor* output_tuple_desc() const { return _output_tuple_desc; } + enum class PushDownType { + // The predicate can not be pushed down to data source + UNACCEPTABLE, + // The predicate can be pushed down to data source + // and the data source can fully evaludate it + ACCEPTABLE, + // The predicate can be pushed down to data source + // but the data source can not fully evaluate it. + PARTIAL_ACCEPTABLE + }; + protected: // Different data sources register different profiles by implementing this method virtual Status _init_profile(); @@ -105,21 +116,24 @@ protected: // 2. in/not in predicate // 3. function predicate // TODO: these interfaces should be change to become more common. - virtual bool _should_push_down_binary_predicate( + virtual PushDownType _should_push_down_binary_predicate( VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* constant_val, - int* slot_ref_child, const std::function& fn_checker) { - return false; + int* slot_ref_child, const std::function& fn_checker); + + virtual PushDownType _should_push_down_in_predicate(VInPredicate* in_pred, + VExprContext* expr_ctx, bool is_not_in); + + virtual PushDownType _should_push_down_function_filter(VectorizedFnCall* fn_call, + VExprContext* expr_ctx, + StringVal* constant_str, + doris_udf::FunctionContext** fn_ctx) { + return PushDownType::UNACCEPTABLE; } - virtual bool _should_push_down_in_predicate(VInPredicate* in_pred, VExprContext* expr_ctx, - bool is_not_in) { - return false; - } + virtual PushDownType _should_push_down_bloom_filter() { return PushDownType::UNACCEPTABLE; } - virtual bool _should_push_down_function_filter(VectorizedFnCall* fn_call, - VExprContext* expr_ctx, StringVal* constant_str, - doris_udf::FunctionContext** fn_ctx) { - return false; + virtual PushDownType _should_push_down_is_null_predicate() { + return PushDownType::UNACCEPTABLE; } // Return true if it is a key column. @@ -175,11 +189,18 @@ protected: std::vector _push_down_functions; // slot id -> ColumnValueRange - // Parsed from conjunts + // Parsed from conjuncts phmap::flat_hash_map> _slot_id_to_value_range; // column -> ColumnValueRange std::map _colname_to_value_range; + // We use _colname_to_value_range to store a column and its conresponding value ranges. + // But if a col is with value range, eg: 1 < col < 10, which is "!is_fixed_range", + // in this case we can not merge "1 < col < 10" with "col not in (2)". + // So we have to save "col not in (2)" to another structure: "_not_in_value_ranges". + // When the data source try to use the value ranges, it should use both ranges in + // "_colname_to_value_range" and in "_not_in_value_ranges" + std::vector _not_in_value_ranges; bool _need_agg_finalize = true; @@ -233,13 +254,13 @@ private: Status _normalize_conjuncts(); VExpr* _normalize_predicate(VExpr* conjunct_expr_root); - void _eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, bool* push_down); + void _eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, PushDownType* pdt); Status _normalize_bloom_filter(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, - bool* push_down); + PushDownType* pdt); Status _normalize_function_filters(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, - bool* push_down); + PushDownType* pdt); bool _is_predicate_acting_on_slot(VExpr* expr, const std::function&, @@ -249,21 +270,21 @@ private: template Status _normalize_in_and_eq_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, ColumnValueRange& range, - bool* push_down); + PushDownType* pdt); template Status _normalize_not_in_and_not_eq_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, ColumnValueRange& range, - bool* push_down); + PushDownType* pdt); template Status _normalize_noneq_binary_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, ColumnValueRange& range, - bool* push_down); + PushDownType* pdt); template Status _normalize_is_null_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot, ColumnValueRange& range, - bool* push_down); + PushDownType* pdt); template static Status _change_value_range(ColumnValueRange& range, void* value,