From 70daa1f85d0b2ec080b34c02f9de010d2f0f49f0 Mon Sep 17 00:00:00 2001 From: zzzxl <33418555+zzzxl1993@users.noreply.github.com> Date: Fri, 30 Aug 2024 10:32:43 +0800 Subject: [PATCH] [opt](inverted index) Controls whether the in_list can execute fast_execute. (#40141) https://github.com/apache/doris/pull/40022 --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 13 ++++++++++--- be/src/vec/exprs/vexpr.cpp | 3 +++ be/src/vec/exprs/vexpr.h | 1 + be/src/vec/exprs/vin_predicate.cpp | 2 ++ .../java/org/apache/doris/qe/SessionVariable.java | 10 ++++++++++ gensrc/thrift/PaloInternalService.thrift | 1 + .../test_index_inlist_fault_injection.out | 6 ++++++ .../test_index_inlist_fault_injection.groovy | 6 ++++++ 8 files changed, 39 insertions(+), 3 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 86476139a4..b9f9615f00 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -917,10 +917,17 @@ bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool return false; } - if ((pred->type() == PredicateType::IN_LIST || pred->type() == PredicateType::NOT_IN_LIST) && - pred->predicate_params()->marked_by_runtime_filter) { + if (pred->type() == PredicateType::IN_LIST || pred->type() == PredicateType::NOT_IN_LIST) { + auto predicate_param = pred->predicate_params(); // in_list or not_in_list predicate produced by runtime filter - return false; + if (predicate_param->marked_by_runtime_filter) { + return false; + } + // the in_list or not_in_list value count cannot be greater than threshold + int32_t threshold = _opts.runtime_state->query_options().in_list_value_count_threshold; + if (pred_in_compound && predicate_param->values.size() > threshold) { + return false; + } } // UNTOKENIZED strings exceed ignore_above, they are written as null, causing range query errors diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index b66c8aa80a..5cb0607411 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -652,6 +652,9 @@ std::string VExpr::gen_predicate_result_sign(Block& block, const ColumnNumbers& pred_result_sign += BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_" + function_name + "_"; if (function_name == "in" || function_name == "not_in") { + if (arguments.size() - 1 > _in_list_value_count_threshold) { + return pred_result_sign; + } // Generating 'result_sign' from 'inlist' requires sorting the values. std::set values; for (size_t i = 1; i < arguments.size(); i++) { diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index 777d485156..88b18c6787 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -307,6 +307,7 @@ protected: uint32_t _index_unique_id = 0; bool _can_fast_execute = false; bool _enable_inverted_index_query = true; + uint32_t _in_list_value_count_threshold = 10; }; } // namespace vectorized diff --git a/be/src/vec/exprs/vin_predicate.cpp b/be/src/vec/exprs/vin_predicate.cpp index 4affec791a..4d518f9f92 100644 --- a/be/src/vec/exprs/vin_predicate.cpp +++ b/be/src/vec/exprs/vin_predicate.cpp @@ -28,6 +28,7 @@ #include #include "common/status.h" +#include "runtime/runtime_state.h" #include "vec/core/block.h" #include "vec/core/column_numbers.h" #include "vec/core/column_with_type_and_name.h" @@ -79,6 +80,7 @@ Status VInPredicate::prepare(RuntimeState* state, const RowDescriptor& desc, VExpr::register_function_context(state, context); _prepare_finished = true; _can_fast_execute = can_fast_execute(); + _in_list_value_count_threshold = state->query_options().in_list_value_count_threshold; return Status::OK(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 4822a41fc4..b7d977fd38 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -621,6 +621,8 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_MATCH_WITHOUT_INVERTED_INDEX = "enable_match_without_inverted_index"; public static final String ENABLE_FALLBACK_ON_MISSING_INVERTED_INDEX = "enable_fallback_on_missing_inverted_index"; + public static final String IN_LIST_VALUE_COUNT_THRESHOLD = "in_list_value_count_threshold"; + /** * If set false, user couldn't submit analyze SQL and FE won't allocate any related resources. */ @@ -2022,6 +2024,13 @@ public class SessionVariable implements Serializable, Writable { }) public boolean enableFallbackOnMissingInvertedIndex = true; + @VariableMgr.VarAttr(name = IN_LIST_VALUE_COUNT_THRESHOLD, description = { + "in条件value数量大于这个threshold后将不会走fast_execute", + "When the number of values in the IN condition exceeds this threshold," + + " fast_execute will not be used." + }) + public int inListValueCountThreshold = 10; + public void setEnableEsParallelScroll(boolean enableESParallelScroll) { this.enableESParallelScroll = enableESParallelScroll; } @@ -3525,6 +3534,7 @@ public class SessionVariable implements Serializable, Writable { tResult.setEnableFallbackOnMissingInvertedIndex(enableFallbackOnMissingInvertedIndex); tResult.setKeepCarriageReturn(keepCarriageReturn); + tResult.setInListValueCountThreshold(inListValueCountThreshold); return tResult; } diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index 3ffa27788a..b26e271b91 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -322,6 +322,7 @@ struct TQueryOptions { 126: optional i32 runtime_bloom_filter_max_size = 16777216; + 127: optional i32 in_list_value_count_threshold = 10; 128: optional bool enable_verbose_profile = false; 129: optional i32 rpc_verbose_profile_max_instance_count = 0; diff --git a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out index 528b400808..8409a168a0 100644 --- a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out +++ b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out @@ -65,3 +65,9 @@ -- !sql -- 2 +-- !sql -- +852 + +-- !sql -- +852 + diff --git a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy index 8d22c001ed..a9b3d51352 100644 --- a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy +++ b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy @@ -117,6 +117,12 @@ suite("test_index_inlist_fault_injection", "nonConcurrent") { qt_sql """ select count() from ${indexTbName} where (clientip = '2.1.0.0' or clientip = NULL and clientip = '40.135.0.0'); """ sql """ set enable_common_expr_pushdown = true; """ + + sql """ set in_list_value_count_threshold = 0; """ + qt_sql """ select count() from ${indexTbName} where (clientip in ('40.135.0.0', '232.0.0.0', '26.1.0.0', '247.37.0.0') or status = 200); """ + sql """ set in_list_value_count_threshold = 10; """ + qt_sql """ select count() from ${indexTbName} where (clientip in ('40.135.0.0', '232.0.0.0', '26.1.0.0', '247.37.0.0') or status = 200); """ + } finally { } } finally {