From 394b420180a1fb47010f4f4019e8577947c0fa02 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Thu, 7 Dec 2023 17:54:44 +0800 Subject: [PATCH] [Update](inverted index) use session variable for inverted index try query threshold (#28052) * [Update](inverted index) use session variable for inverted index try query threshold * remove unused config * update clucene --- be/src/clucene | 2 +- be/src/common/config.cpp | 1 - be/src/common/config.h | 1 - .../segment_v2/inverted_index_reader.cpp | 20 +++++++++++-------- .../rowset/segment_v2/inverted_index_reader.h | 2 +- .../org/apache/doris/qe/SessionVariable.java | 11 ++++++++++ gensrc/thrift/PaloInternalService.thrift | 2 ++ 7 files changed, 27 insertions(+), 12 deletions(-) diff --git a/be/src/clucene b/be/src/clucene index 3c5d1e4a4b..26206be160 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit 3c5d1e4a4ba1ccfdad7c70aac8c303847b1f7a6d +Subproject commit 26206be1608775a58071d77f2d47ad363afd8028 diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index c67291f309..1a75659c9e 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1005,7 +1005,6 @@ DEFINE_String(inverted_index_query_cache_limit, "10%"); // inverted index DEFINE_mDouble(inverted_index_ram_buffer_size, "512"); -DEFINE_Int32(query_bkd_inverted_index_limit_percent, "5"); // 5% // dict path for chinese analyzer DEFINE_String(inverted_index_dict_path, "${DORIS_HOME}/dict"); DEFINE_Int32(inverted_index_read_buffer_size, "4096"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 09e3f070c8..b40f7aa303 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1050,7 +1050,6 @@ DECLARE_String(inverted_index_query_cache_limit); // inverted index DECLARE_mDouble(inverted_index_ram_buffer_size); -DECLARE_Int32(query_bkd_inverted_index_limit_percent); // 5% // dict path for chinese analyzer DECLARE_String(inverted_index_dict_path); DECLARE_Int32(inverted_index_read_buffer_size); diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index 64427bf061..a567859a3b 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -1270,14 +1270,18 @@ Status InvertedIndexIterator::read_from_inverted_index(const std::string& column uint32_t segment_num_rows, roaring::Roaring* bit_map, bool skip_try) { if (!skip_try && _reader->type() == InvertedIndexReaderType::BKD) { - auto query_bkd_limit_percent = config::query_bkd_inverted_index_limit_percent; - uint32_t hit_count = 0; - RETURN_IF_ERROR( - try_read_from_inverted_index(column_name, query_value, query_type, &hit_count)); - if (hit_count > segment_num_rows * query_bkd_limit_percent / 100) { - return Status::Error( - "hit count: {}, bkd inverted reached limit {}%, segment num rows:{}", hit_count, - query_bkd_limit_percent, segment_num_rows); + if (_runtime_state->query_options().inverted_index_skip_threshold > 0 && + _runtime_state->query_options().inverted_index_skip_threshold < 100) { + auto query_bkd_limit_percent = + _runtime_state->query_options().inverted_index_skip_threshold; + uint32_t hit_count = 0; + RETURN_IF_ERROR( + try_read_from_inverted_index(column_name, query_value, query_type, &hit_count)); + if (hit_count > segment_num_rows * query_bkd_limit_percent / 100) { + return Status::Error( + "hit count: {}, bkd inverted reached limit {}%, segment num rows:{}", + hit_count, query_bkd_limit_percent, segment_num_rows); + } } } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index 1dc5095b54..e14e4bcc47 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -286,7 +286,7 @@ public: Status read_from_inverted_index(const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, uint32_t segment_num_rows, - roaring::Roaring* bit_map, bool skip_try = true); + roaring::Roaring* bit_map, bool skip_try = false); Status try_read_from_inverted_index(const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, uint32_t* count); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 0169256867..4daff23a0c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -426,6 +426,8 @@ public class SessionVariable implements Serializable, Writable { public static final String INVERTED_INDEX_CONJUNCTION_OPT_THRESHOLD = "inverted_index_conjunction_opt_threshold"; public static final String INVERTED_INDEX_MAX_EXPANSIONS = "inverted_index_max_expansions"; + public static final String INVERTED_INDEX_SKIP_THRESHOLD = "inverted_index_skip_threshold"; + public static final String AUTO_ANALYZE_START_TIME = "auto_analyze_start_time"; public static final String AUTO_ANALYZE_END_TIME = "auto_analyze_end_time"; @@ -1332,6 +1334,13 @@ public class SessionVariable implements Serializable, Writable { + " thereby controlling query performance"}) public int invertedIndexMaxExpansions = 50; + @VariableMgr.VarAttr(name = INVERTED_INDEX_SKIP_THRESHOLD, + description = {"在倒排索引中如果预估命中量占比总量超过百分比阈值,则跳过索引直接进行匹配。", + "In the inverted index," + + " if the estimated hit ratio exceeds the percentage threshold of the total amount, " + + " then skip the index and proceed directly to matching."}) + public int invertedIndexSkipThreshold = 50; + @VariableMgr.VarAttr(name = SQL_DIALECT, needForward = true, checker = "checkSqlDialect", description = {"解析sql使用的方言", "The dialect used to parse sql."}) public String sqlDialect = "doris"; @@ -2659,6 +2668,8 @@ public class SessionVariable implements Serializable, Writable { tResult.setSkipMissingVersion(skipMissingVersion); + tResult.setInvertedIndexSkipThreshold(invertedIndexSkipThreshold); + return tResult; } diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index c13473e9db..fb702f6113 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -261,6 +261,8 @@ struct TQueryOptions { 92: optional i32 wait_full_block_schedule_times = 1; 93: optional i32 inverted_index_max_expansions = 50; + + 94: optional i32 inverted_index_skip_threshold = 50; }