[test](inverted index) add an Inverted Index Testing Switch (#38077) (#38947)

https://github.com/apache/doris/pull/38077
This commit is contained in:
zzzxl
2024-08-07 11:25:36 +08:00
committed by GitHub
parent fc0222a64c
commit 8cb5aa64f4
5 changed files with 84 additions and 49 deletions

View File

@ -998,7 +998,9 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() {
}
bool SegmentIterator::_downgrade_without_index(Status res, bool need_remaining) {
if (res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND ||
bool is_fallback =
_opts.runtime_state->query_options().enable_fallback_on_missing_inverted_index;
if ((res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND && is_fallback) ||
res.code() == ErrorCode::INVERTED_INDEX_BYPASS ||
res.code() == ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED ||
(res.code() == ErrorCode::INVERTED_INDEX_NO_TERMS && need_remaining)) {

View File

@ -95,7 +95,7 @@ Status FunctionMatchBase::execute_impl(FunctionContext* context, Block& block,
// set default value to 0, and match functions only need to set 1/true
vec_res.resize_fill(input_rows_count);
RETURN_IF_ERROR(execute_match(
column_name, match_query_str, input_rows_count, values, inverted_index_ctx,
context, column_name, match_query_str, input_rows_count, values, inverted_index_ctx,
(array_col ? &(array_col->get_offsets()) : nullptr), vec_res));
block.replace_by_position(result, std::move(res));
} else {
@ -116,6 +116,10 @@ inline doris::segment_v2::InvertedIndexQueryType FunctionMatchBase::get_query_ty
return doris::segment_v2::InvertedIndexQueryType::MATCH_ALL_QUERY;
} else if (fn_name == MATCH_PHRASE_FUNCTION) {
return doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY;
} else if (fn_name == MATCH_PHRASE_PREFIX_FUNCTION) {
return doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY;
} else if (fn_name == MATCH_PHRASE_REGEXP_FUNCTION) {
return doris::segment_v2::InvertedIndexQueryType::MATCH_REGEXP_QUERY;
}
return doris::segment_v2::InvertedIndexQueryType::UNKNOWN_QUERY;
}
@ -151,16 +155,27 @@ inline std::vector<std::string> FunctionMatchBase::analyse_data_token(
return data_tokens;
}
Status FunctionMatchAny::execute_match(const std::string& column_name,
Status FunctionMatchBase::check(FunctionContext* context, const std::string& function_name) const {
if (!context->state()->query_options().enable_match_without_inverted_index) {
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
"{} not support execute_match", function_name);
}
DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", {
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
"{} not support execute_match", function_name);
});
return Status::OK();
}
Status FunctionMatchAny::execute_match(FunctionContext* context, const std::string& column_name,
const std::string& match_query_str, size_t input_rows_count,
const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const {
DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", {
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
"FunctionMatchAny not support execute_match");
})
RETURN_IF_ERROR(check(context, name));
doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN;
if (inverted_index_ctx) {
@ -201,16 +216,13 @@ Status FunctionMatchAny::execute_match(const std::string& column_name,
return Status::OK();
}
Status FunctionMatchAll::execute_match(const std::string& column_name,
Status FunctionMatchAll::execute_match(FunctionContext* context, const std::string& column_name,
const std::string& match_query_str, size_t input_rows_count,
const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const {
DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", {
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
"FunctionMatchAll not support execute_match");
})
RETURN_IF_ERROR(check(context, name));
doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN;
if (inverted_index_ctx) {
@ -257,16 +269,13 @@ Status FunctionMatchAll::execute_match(const std::string& column_name,
return Status::OK();
}
Status FunctionMatchPhrase::execute_match(const std::string& column_name,
Status FunctionMatchPhrase::execute_match(FunctionContext* context, const std::string& column_name,
const std::string& match_query_str,
size_t input_rows_count, const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const {
DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", {
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
"FunctionMatchPhrase not support execute_match");
})
RETURN_IF_ERROR(check(context, name));
doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN;
if (inverted_index_ctx) {
@ -330,13 +339,11 @@ Status FunctionMatchPhrase::execute_match(const std::string& column_name,
}
Status FunctionMatchPhrasePrefix::execute_match(
const std::string& column_name, const std::string& match_query_str, size_t input_rows_count,
const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const {
DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", {
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
"FunctionMatchPhrasePrefix not support execute_match");
})
FunctionContext* context, const std::string& column_name,
const std::string& match_query_str, size_t input_rows_count, const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const {
RETURN_IF_ERROR(check(context, name));
doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN;
if (inverted_index_ctx) {
@ -400,16 +407,13 @@ Status FunctionMatchPhrasePrefix::execute_match(
return Status::OK();
}
Status FunctionMatchRegexp::execute_match(const std::string& column_name,
Status FunctionMatchRegexp::execute_match(FunctionContext* context, const std::string& column_name,
const std::string& match_query_str,
size_t input_rows_count, const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const {
DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", {
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
"FunctionMatchRegexp not support execute_match");
})
RETURN_IF_ERROR(check(context, name));
doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN;
if (inverted_index_ctx) {

View File

@ -53,6 +53,8 @@ namespace doris::vectorized {
const std::string MATCH_ANY_FUNCTION = "match_any";
const std::string MATCH_ALL_FUNCTION = "match_all";
const std::string MATCH_PHRASE_FUNCTION = "match_phrase";
const std::string MATCH_PHRASE_PREFIX_FUNCTION = "match_phrase_prefix";
const std::string MATCH_PHRASE_REGEXP_FUNCTION = "match_regexp";
class FunctionMatchBase : public IFunction {
public:
@ -70,8 +72,9 @@ public:
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override;
virtual Status execute_match(const std::string& column_name, const std::string& match_query_str,
size_t input_rows_count, const ColumnString* string_col,
virtual Status execute_match(FunctionContext* context, const std::string& column_name,
const std::string& match_query_str, size_t input_rows_count,
const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const = 0;
@ -84,6 +87,8 @@ public:
int32_t current_block_row_idx,
const ColumnArray::Offsets64* array_offsets,
int32_t& current_src_array_offset) const;
Status check(FunctionContext* context, const std::string& function_name) const;
};
class FunctionMatchAny : public FunctionMatchBase {
@ -93,9 +98,9 @@ public:
String get_name() const override { return name; }
Status execute_match(const std::string& column_name, const std::string& match_query_str,
size_t input_rows_count, const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
Status execute_match(FunctionContext* context, const std::string& column_name,
const std::string& match_query_str, size_t input_rows_count,
const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const override;
};
@ -107,9 +112,9 @@ public:
String get_name() const override { return name; }
Status execute_match(const std::string& column_name, const std::string& match_query_str,
size_t input_rows_count, const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
Status execute_match(FunctionContext* context, const std::string& column_name,
const std::string& match_query_str, size_t input_rows_count,
const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const override;
};
@ -121,9 +126,9 @@ public:
String get_name() const override { return name; }
Status execute_match(const std::string& column_name, const std::string& match_query_str,
size_t input_rows_count, const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
Status execute_match(FunctionContext* context, const std::string& column_name,
const std::string& match_query_str, size_t input_rows_count,
const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const override;
};
@ -135,9 +140,9 @@ public:
String get_name() const override { return name; }
Status execute_match(const std::string& column_name, const std::string& match_query_str,
size_t input_rows_count, const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
Status execute_match(FunctionContext* context, const std::string& column_name,
const std::string& match_query_str, size_t input_rows_count,
const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const override;
};
@ -149,9 +154,9 @@ public:
String get_name() const override { return name; }
Status execute_match(const std::string& column_name, const std::string& match_query_str,
size_t input_rows_count, const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
Status execute_match(FunctionContext* context, const std::string& column_name,
const std::string& match_query_str, size_t input_rows_count,
const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const override;
};
@ -163,9 +168,9 @@ public:
String get_name() const override { return name; }
Status execute_match(const std::string& column_name, const std::string& match_query_str,
size_t input_rows_count, const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
Status execute_match(FunctionContext* context, const std::string& column_name,
const std::string& match_query_str, size_t input_rows_count,
const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const override {
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(

View File

@ -615,6 +615,9 @@ public class SessionVariable implements Serializable, Writable {
public static final String DISABLE_EMPTY_PARTITION_PRUNE = "disable_empty_partition_prune";
// CLOUD_VARIABLES_BEGIN
public static final String ENABLE_MATCH_WITHOUT_INVERTED_INDEX = "enable_match_without_inverted_index";
public static final String ENABLE_FALLBACK_ON_MISSING_INVERTED_INDEX = "enable_fallback_on_missing_inverted_index";
/**
* If set false, user couldn't submit analyze SQL and FE won't allocate any related resources.
*/
@ -1989,6 +1992,20 @@ public class SessionVariable implements Serializable, Writable {
})
public boolean enableESParallelScroll = true;
@VariableMgr.VarAttr(name = ENABLE_MATCH_WITHOUT_INVERTED_INDEX, description = {
"开启无索引match查询功能,建议正式环境保持开启",
"Enable no-index match query functionality."
+ " it is recommended to keep this enabled in the production environment."
})
public boolean enableMatchWithoutInvertedIndex = true;
@VariableMgr.VarAttr(name = ENABLE_FALLBACK_ON_MISSING_INVERTED_INDEX, description = {
"开启后在没有找到索引的情况下直接查询报错,建议正式环境保持开启",
"After enabling, it will directly query and report an error if no index is found."
+ " It is recommended to keep this enabled in the production environment."
})
public boolean enableFallbackOnMissingInvertedIndex = true;
public void setEnableEsParallelScroll(boolean enableESParallelScroll) {
this.enableESParallelScroll = enableESParallelScroll;
}
@ -3472,6 +3489,10 @@ public class SessionVariable implements Serializable, Writable {
tResult.setSerdeDialect(getSerdeDialect());
tResult.setHiveOrcUseColumnNames(hiveOrcUseColumnNames);
tResult.setHiveParquetUseColumnNames(hiveParquetUseColumnNames);
tResult.setEnableMatchWithoutInvertedIndex(enableMatchWithoutInvertedIndex);
tResult.setEnableFallbackOnMissingInvertedIndex(enableFallbackOnMissingInvertedIndex);
tResult.setKeepCarriageReturn(keepCarriageReturn);
return tResult;
}

View File

@ -317,6 +317,9 @@ struct TQueryOptions {
123: optional bool hive_parquet_use_column_names = true;
124: optional bool hive_orc_use_column_names = true;
125: optional bool enable_match_without_inverted_index = true;
126: optional bool enable_fallback_on_missing_inverted_index = true;
// For cloud, to control if the content would be written into file cache
1000: optional bool disable_file_cache = false
}