[opt](inverted index) Controls whether the in_list can execute fast_execute. (#40141)
https://github.com/apache/doris/pull/40022
This commit is contained in:
@ -917,10 +917,17 @@ bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((pred->type() == PredicateType::IN_LIST || pred->type() == PredicateType::NOT_IN_LIST) &&
|
||||
pred->predicate_params()->marked_by_runtime_filter) {
|
||||
if (pred->type() == PredicateType::IN_LIST || pred->type() == PredicateType::NOT_IN_LIST) {
|
||||
auto predicate_param = pred->predicate_params();
|
||||
// in_list or not_in_list predicate produced by runtime filter
|
||||
return false;
|
||||
if (predicate_param->marked_by_runtime_filter) {
|
||||
return false;
|
||||
}
|
||||
// the in_list or not_in_list value count cannot be greater than threshold
|
||||
int32_t threshold = _opts.runtime_state->query_options().in_list_value_count_threshold;
|
||||
if (pred_in_compound && predicate_param->values.size() > threshold) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// UNTOKENIZED strings exceed ignore_above, they are written as null, causing range query errors
|
||||
|
||||
@ -652,6 +652,9 @@ std::string VExpr::gen_predicate_result_sign(Block& block, const ColumnNumbers&
|
||||
pred_result_sign +=
|
||||
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_" + function_name + "_";
|
||||
if (function_name == "in" || function_name == "not_in") {
|
||||
if (arguments.size() - 1 > _in_list_value_count_threshold) {
|
||||
return pred_result_sign;
|
||||
}
|
||||
// Generating 'result_sign' from 'inlist' requires sorting the values.
|
||||
std::set<std::string> values;
|
||||
for (size_t i = 1; i < arguments.size(); i++) {
|
||||
|
||||
@ -307,6 +307,7 @@ protected:
|
||||
uint32_t _index_unique_id = 0;
|
||||
bool _can_fast_execute = false;
|
||||
bool _enable_inverted_index_query = true;
|
||||
uint32_t _in_list_value_count_threshold = 10;
|
||||
};
|
||||
|
||||
} // namespace vectorized
|
||||
|
||||
@ -28,6 +28,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "common/status.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "vec/core/block.h"
|
||||
#include "vec/core/column_numbers.h"
|
||||
#include "vec/core/column_with_type_and_name.h"
|
||||
@ -79,6 +80,7 @@ Status VInPredicate::prepare(RuntimeState* state, const RowDescriptor& desc,
|
||||
VExpr::register_function_context(state, context);
|
||||
_prepare_finished = true;
|
||||
_can_fast_execute = can_fast_execute();
|
||||
_in_list_value_count_threshold = state->query_options().in_list_value_count_threshold;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@ -621,6 +621,8 @@ public class SessionVariable implements Serializable, Writable {
|
||||
public static final String ENABLE_MATCH_WITHOUT_INVERTED_INDEX = "enable_match_without_inverted_index";
|
||||
public static final String ENABLE_FALLBACK_ON_MISSING_INVERTED_INDEX = "enable_fallback_on_missing_inverted_index";
|
||||
|
||||
public static final String IN_LIST_VALUE_COUNT_THRESHOLD = "in_list_value_count_threshold";
|
||||
|
||||
/**
|
||||
* If set false, user couldn't submit analyze SQL and FE won't allocate any related resources.
|
||||
*/
|
||||
@ -2022,6 +2024,13 @@ public class SessionVariable implements Serializable, Writable {
|
||||
})
|
||||
public boolean enableFallbackOnMissingInvertedIndex = true;
|
||||
|
||||
@VariableMgr.VarAttr(name = IN_LIST_VALUE_COUNT_THRESHOLD, description = {
|
||||
"in条件value数量大于这个threshold后将不会走fast_execute",
|
||||
"When the number of values in the IN condition exceeds this threshold,"
|
||||
+ " fast_execute will not be used."
|
||||
})
|
||||
public int inListValueCountThreshold = 10;
|
||||
|
||||
public void setEnableEsParallelScroll(boolean enableESParallelScroll) {
|
||||
this.enableESParallelScroll = enableESParallelScroll;
|
||||
}
|
||||
@ -3525,6 +3534,7 @@ public class SessionVariable implements Serializable, Writable {
|
||||
tResult.setEnableFallbackOnMissingInvertedIndex(enableFallbackOnMissingInvertedIndex);
|
||||
|
||||
tResult.setKeepCarriageReturn(keepCarriageReturn);
|
||||
tResult.setInListValueCountThreshold(inListValueCountThreshold);
|
||||
return tResult;
|
||||
}
|
||||
|
||||
|
||||
@ -322,6 +322,7 @@ struct TQueryOptions {
|
||||
|
||||
126: optional i32 runtime_bloom_filter_max_size = 16777216;
|
||||
|
||||
127: optional i32 in_list_value_count_threshold = 10;
|
||||
128: optional bool enable_verbose_profile = false;
|
||||
129: optional i32 rpc_verbose_profile_max_instance_count = 0;
|
||||
|
||||
|
||||
@ -65,3 +65,9 @@
|
||||
-- !sql --
|
||||
2
|
||||
|
||||
-- !sql --
|
||||
852
|
||||
|
||||
-- !sql --
|
||||
852
|
||||
|
||||
|
||||
@ -117,6 +117,12 @@ suite("test_index_inlist_fault_injection", "nonConcurrent") {
|
||||
qt_sql """ select count() from ${indexTbName} where (clientip = '2.1.0.0' or clientip = NULL and clientip = '40.135.0.0'); """
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
|
||||
sql """ set in_list_value_count_threshold = 0; """
|
||||
qt_sql """ select count() from ${indexTbName} where (clientip in ('40.135.0.0', '232.0.0.0', '26.1.0.0', '247.37.0.0') or status = 200); """
|
||||
sql """ set in_list_value_count_threshold = 10; """
|
||||
qt_sql """ select count() from ${indexTbName} where (clientip in ('40.135.0.0', '232.0.0.0', '26.1.0.0', '247.37.0.0') or status = 200); """
|
||||
|
||||
} finally {
|
||||
}
|
||||
} finally {
|
||||
|
||||
Reference in New Issue
Block a user