[opt](inverted index) Controls whether the in_list can execute fast_execute. (#40141)

https://github.com/apache/doris/pull/40022
This commit is contained in:
zzzxl
2024-08-30 10:32:43 +08:00
committed by GitHub
parent ca07a00c93
commit 70daa1f85d
8 changed files with 39 additions and 3 deletions

View File

@ -917,10 +917,17 @@ bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool
return false;
}
if ((pred->type() == PredicateType::IN_LIST || pred->type() == PredicateType::NOT_IN_LIST) &&
pred->predicate_params()->marked_by_runtime_filter) {
if (pred->type() == PredicateType::IN_LIST || pred->type() == PredicateType::NOT_IN_LIST) {
auto predicate_param = pred->predicate_params();
// in_list or not_in_list predicate produced by runtime filter
return false;
if (predicate_param->marked_by_runtime_filter) {
return false;
}
// the in_list or not_in_list value count cannot be greater than threshold
int32_t threshold = _opts.runtime_state->query_options().in_list_value_count_threshold;
if (pred_in_compound && predicate_param->values.size() > threshold) {
return false;
}
}
// UNTOKENIZED strings exceed ignore_above, they are written as null, causing range query errors

View File

@ -652,6 +652,9 @@ std::string VExpr::gen_predicate_result_sign(Block& block, const ColumnNumbers&
pred_result_sign +=
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_" + function_name + "_";
if (function_name == "in" || function_name == "not_in") {
if (arguments.size() - 1 > _in_list_value_count_threshold) {
return pred_result_sign;
}
// Generating 'result_sign' from 'inlist' requires sorting the values.
std::set<std::string> values;
for (size_t i = 1; i < arguments.size(); i++) {

View File

@ -307,6 +307,7 @@ protected:
uint32_t _index_unique_id = 0;
bool _can_fast_execute = false;
bool _enable_inverted_index_query = true;
uint32_t _in_list_value_count_threshold = 10;
};
} // namespace vectorized

View File

@ -28,6 +28,7 @@
#include <vector>
#include "common/status.h"
#include "runtime/runtime_state.h"
#include "vec/core/block.h"
#include "vec/core/column_numbers.h"
#include "vec/core/column_with_type_and_name.h"
@ -79,6 +80,7 @@ Status VInPredicate::prepare(RuntimeState* state, const RowDescriptor& desc,
VExpr::register_function_context(state, context);
_prepare_finished = true;
_can_fast_execute = can_fast_execute();
_in_list_value_count_threshold = state->query_options().in_list_value_count_threshold;
return Status::OK();
}

View File

@ -621,6 +621,8 @@ public class SessionVariable implements Serializable, Writable {
public static final String ENABLE_MATCH_WITHOUT_INVERTED_INDEX = "enable_match_without_inverted_index";
public static final String ENABLE_FALLBACK_ON_MISSING_INVERTED_INDEX = "enable_fallback_on_missing_inverted_index";
public static final String IN_LIST_VALUE_COUNT_THRESHOLD = "in_list_value_count_threshold";
/**
* If set false, user couldn't submit analyze SQL and FE won't allocate any related resources.
*/
@ -2022,6 +2024,13 @@ public class SessionVariable implements Serializable, Writable {
})
public boolean enableFallbackOnMissingInvertedIndex = true;
@VariableMgr.VarAttr(name = IN_LIST_VALUE_COUNT_THRESHOLD, description = {
"in条件value数量大于这个threshold后将不会走fast_execute",
"When the number of values in the IN condition exceeds this threshold,"
+ " fast_execute will not be used."
})
public int inListValueCountThreshold = 10;
public void setEnableEsParallelScroll(boolean enableESParallelScroll) {
this.enableESParallelScroll = enableESParallelScroll;
}
@ -3525,6 +3534,7 @@ public class SessionVariable implements Serializable, Writable {
tResult.setEnableFallbackOnMissingInvertedIndex(enableFallbackOnMissingInvertedIndex);
tResult.setKeepCarriageReturn(keepCarriageReturn);
tResult.setInListValueCountThreshold(inListValueCountThreshold);
return tResult;
}

View File

@ -322,6 +322,7 @@ struct TQueryOptions {
126: optional i32 runtime_bloom_filter_max_size = 16777216;
127: optional i32 in_list_value_count_threshold = 10;
128: optional bool enable_verbose_profile = false;
129: optional i32 rpc_verbose_profile_max_instance_count = 0;

View File

@ -65,3 +65,9 @@
-- !sql --
2
-- !sql --
852
-- !sql --
852

View File

@ -117,6 +117,12 @@ suite("test_index_inlist_fault_injection", "nonConcurrent") {
qt_sql """ select count() from ${indexTbName} where (clientip = '2.1.0.0' or clientip = NULL and clientip = '40.135.0.0'); """
sql """ set enable_common_expr_pushdown = true; """
sql """ set in_list_value_count_threshold = 0; """
qt_sql """ select count() from ${indexTbName} where (clientip in ('40.135.0.0', '232.0.0.0', '26.1.0.0', '247.37.0.0') or status = 200); """
sql """ set in_list_value_count_threshold = 10; """
qt_sql """ select count() from ${indexTbName} where (clientip in ('40.135.0.0', '232.0.0.0', '26.1.0.0', '247.37.0.0') or status = 200); """
} finally {
}
} finally {