## Proposed changes pick from #38989
This commit is contained in:
@ -1150,6 +1150,9 @@ lucene::util::bkd::relation InvertedIndexVisitor<QT>::compare(std::vector<uint8_
|
||||
Status InvertedIndexIterator::read_from_inverted_index(
|
||||
const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type,
|
||||
uint32_t segment_num_rows, std::shared_ptr<roaring::Roaring>& bit_map, bool skip_try) {
|
||||
DBUG_EXECUTE_IF("return_inverted_index_bypass", {
|
||||
return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>("inverted index bypass");
|
||||
});
|
||||
if (UNLIKELY(_reader == nullptr)) {
|
||||
throw CLuceneError(CL_ERR_NullPointer, "bkd index reader is null", false);
|
||||
}
|
||||
|
||||
@ -17,6 +17,12 @@
|
||||
|
||||
#include "vec/exprs/vmatch_predicate.h"
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wshadow-field"
|
||||
#endif
|
||||
|
||||
#include <CLucene/analysis/LanguageBasedAnalyzer.h>
|
||||
#include <fmt/format.h>
|
||||
#include <fmt/ranges.h> // IWYU pragma: keep
|
||||
#include <gen_cpp/Exprs_types.h>
|
||||
@ -29,6 +35,7 @@
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include "CLucene/analysis/standard95/StandardAnalyzer.h"
|
||||
#include "common/status.h"
|
||||
#include "olap/rowset/segment_v2/inverted_index_reader.h"
|
||||
#include "vec/core/block.h"
|
||||
@ -53,6 +60,12 @@ VMatchPredicate::VMatchPredicate(const TExprNode& node) : VExpr(node) {
|
||||
_inverted_index_ctx->parser_mode = node.match_predicate.parser_mode;
|
||||
_inverted_index_ctx->char_filter_map = node.match_predicate.char_filter_map;
|
||||
_analyzer = InvertedIndexReader::create_analyzer(_inverted_index_ctx.get());
|
||||
_analyzer->set_lowercase(node.match_predicate.parser_lowercase);
|
||||
if (node.match_predicate.parser_stopwords == "none") {
|
||||
_analyzer->set_stopwords(nullptr);
|
||||
} else {
|
||||
_analyzer->set_stopwords(&lucene::analysis::standard95::stop_words);
|
||||
}
|
||||
_inverted_index_ctx->analyzer = _analyzer.get();
|
||||
}
|
||||
|
||||
|
||||
@ -120,10 +120,29 @@ inline doris::segment_v2::InvertedIndexQueryType FunctionMatchBase::get_query_ty
|
||||
return doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY;
|
||||
} else if (fn_name == MATCH_PHRASE_REGEXP_FUNCTION) {
|
||||
return doris::segment_v2::InvertedIndexQueryType::MATCH_REGEXP_QUERY;
|
||||
} else if (fn_name == MATCH_PHRASE_EDGE_FUNCTION) {
|
||||
return doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_EDGE_QUERY;
|
||||
}
|
||||
return doris::segment_v2::InvertedIndexQueryType::UNKNOWN_QUERY;
|
||||
}
|
||||
|
||||
void FunctionMatchBase::analyse_query_str_token(std::vector<std::string>* query_tokens,
|
||||
InvertedIndexCtx* inverted_index_ctx,
|
||||
const std::string& match_query_str,
|
||||
const std::string& column_name) const {
|
||||
VLOG_DEBUG << "begin to run " << get_name() << ", parser_type: "
|
||||
<< inverted_index_parser_type_to_string(inverted_index_ctx->parser_type);
|
||||
if (inverted_index_ctx->parser_type == InvertedIndexParserType::PARSER_NONE) {
|
||||
query_tokens->emplace_back(match_query_str);
|
||||
return;
|
||||
}
|
||||
auto reader = doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
|
||||
match_query_str);
|
||||
doris::segment_v2::InvertedIndexReader::get_analyse_result(
|
||||
*query_tokens, reader.get(), inverted_index_ctx->analyzer, column_name,
|
||||
get_query_type_from_fn_name());
|
||||
}
|
||||
|
||||
inline std::vector<std::string> FunctionMatchBase::analyse_data_token(
|
||||
const std::string& column_name, InvertedIndexCtx* inverted_index_ctx,
|
||||
const ColumnString* string_col, int32_t current_block_row_idx,
|
||||
@ -134,10 +153,15 @@ inline std::vector<std::string> FunctionMatchBase::analyse_data_token(
|
||||
for (auto next_src_array_offset = (*array_offsets)[current_block_row_idx];
|
||||
current_src_array_offset < next_src_array_offset; ++current_src_array_offset) {
|
||||
const auto& str_ref = string_col->get_data_at(current_src_array_offset);
|
||||
if (inverted_index_ctx->parser_type == InvertedIndexParserType::PARSER_NONE) {
|
||||
data_tokens.emplace_back(str_ref.to_string());
|
||||
continue;
|
||||
}
|
||||
auto reader = doris::segment_v2::InvertedIndexReader::create_reader(
|
||||
inverted_index_ctx, str_ref.to_string());
|
||||
|
||||
std::vector<std::string> element_tokens;
|
||||
|
||||
doris::segment_v2::InvertedIndexReader::get_analyse_result(
|
||||
element_tokens, reader.get(), inverted_index_ctx->analyzer, column_name,
|
||||
query_type, false);
|
||||
@ -145,12 +169,15 @@ inline std::vector<std::string> FunctionMatchBase::analyse_data_token(
|
||||
}
|
||||
} else {
|
||||
const auto& str_ref = string_col->get_data_at(current_block_row_idx);
|
||||
auto reader = doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
|
||||
str_ref.to_string());
|
||||
|
||||
doris::segment_v2::InvertedIndexReader::get_analyse_result(data_tokens, reader.get(),
|
||||
inverted_index_ctx->analyzer,
|
||||
column_name, query_type, false);
|
||||
if (inverted_index_ctx->parser_type == InvertedIndexParserType::PARSER_NONE) {
|
||||
data_tokens.emplace_back(str_ref.to_string());
|
||||
} else {
|
||||
auto reader = doris::segment_v2::InvertedIndexReader::create_reader(
|
||||
inverted_index_ctx, str_ref.to_string());
|
||||
doris::segment_v2::InvertedIndexReader::get_analyse_result(
|
||||
data_tokens, reader.get(), inverted_index_ctx->analyzer, column_name,
|
||||
query_type, false);
|
||||
}
|
||||
}
|
||||
return data_tokens;
|
||||
}
|
||||
@ -177,23 +204,14 @@ Status FunctionMatchAny::execute_match(FunctionContext* context, const std::stri
|
||||
ColumnUInt8::Container& result) const {
|
||||
RETURN_IF_ERROR(check(context, name));
|
||||
|
||||
doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN;
|
||||
if (inverted_index_ctx) {
|
||||
parser_type = inverted_index_ctx->parser_type;
|
||||
}
|
||||
VLOG_DEBUG << "begin to run FunctionMatchAny::execute_match, parser_type: "
|
||||
<< inverted_index_parser_type_to_string(parser_type);
|
||||
auto reader = doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
|
||||
match_query_str);
|
||||
std::vector<std::string> query_tokens;
|
||||
doris::segment_v2::InvertedIndexReader::get_analyse_result(
|
||||
query_tokens, reader.get(), inverted_index_ctx->analyzer, column_name,
|
||||
doris::segment_v2::InvertedIndexQueryType::MATCH_ANY_QUERY);
|
||||
analyse_query_str_token(&query_tokens, inverted_index_ctx, match_query_str, column_name);
|
||||
if (query_tokens.empty()) {
|
||||
VLOG_DEBUG << fmt::format(
|
||||
"token parser result is empty for query, "
|
||||
"please check your query: '{}' and index parser: '{}'",
|
||||
match_query_str, inverted_index_parser_type_to_string(parser_type));
|
||||
match_query_str,
|
||||
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -224,23 +242,14 @@ Status FunctionMatchAll::execute_match(FunctionContext* context, const std::stri
|
||||
ColumnUInt8::Container& result) const {
|
||||
RETURN_IF_ERROR(check(context, name));
|
||||
|
||||
doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN;
|
||||
if (inverted_index_ctx) {
|
||||
parser_type = inverted_index_ctx->parser_type;
|
||||
}
|
||||
VLOG_DEBUG << "begin to run FunctionMatchAll::execute_match, parser_type: "
|
||||
<< inverted_index_parser_type_to_string(parser_type);
|
||||
auto reader = doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
|
||||
match_query_str);
|
||||
std::vector<std::string> query_tokens;
|
||||
doris::segment_v2::InvertedIndexReader::get_analyse_result(
|
||||
query_tokens, reader.get(), inverted_index_ctx->analyzer, column_name,
|
||||
doris::segment_v2::InvertedIndexQueryType::MATCH_ALL_QUERY);
|
||||
analyse_query_str_token(&query_tokens, inverted_index_ctx, match_query_str, column_name);
|
||||
if (query_tokens.empty()) {
|
||||
VLOG_DEBUG << fmt::format(
|
||||
"token parser result is empty for query, "
|
||||
"please check your query: '{}' and index parser: '{}'",
|
||||
match_query_str, inverted_index_parser_type_to_string(parser_type));
|
||||
match_query_str,
|
||||
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -277,23 +286,14 @@ Status FunctionMatchPhrase::execute_match(FunctionContext* context, const std::s
|
||||
ColumnUInt8::Container& result) const {
|
||||
RETURN_IF_ERROR(check(context, name));
|
||||
|
||||
doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN;
|
||||
if (inverted_index_ctx) {
|
||||
parser_type = inverted_index_ctx->parser_type;
|
||||
}
|
||||
VLOG_DEBUG << "begin to run FunctionMatchPhrase::execute_match, parser_type: "
|
||||
<< inverted_index_parser_type_to_string(parser_type);
|
||||
auto reader = doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
|
||||
match_query_str);
|
||||
std::vector<std::string> query_tokens;
|
||||
doris::segment_v2::InvertedIndexReader::get_analyse_result(
|
||||
query_tokens, reader.get(), inverted_index_ctx->analyzer, column_name,
|
||||
doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY);
|
||||
analyse_query_str_token(&query_tokens, inverted_index_ctx, match_query_str, column_name);
|
||||
if (query_tokens.empty()) {
|
||||
VLOG_DEBUG << fmt::format(
|
||||
"token parser result is empty for query, "
|
||||
"please check your query: '{}' and index parser: '{}'",
|
||||
match_query_str, inverted_index_parser_type_to_string(parser_type));
|
||||
match_query_str,
|
||||
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -345,25 +345,14 @@ Status FunctionMatchPhrasePrefix::execute_match(
|
||||
ColumnUInt8::Container& result) const {
|
||||
RETURN_IF_ERROR(check(context, name));
|
||||
|
||||
doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN;
|
||||
if (inverted_index_ctx) {
|
||||
parser_type = inverted_index_ctx->parser_type;
|
||||
}
|
||||
VLOG_DEBUG << "begin to run FunctionMatchPhrasePrefix::execute_match, parser_type: "
|
||||
<< inverted_index_parser_type_to_string(parser_type);
|
||||
|
||||
auto reader = doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
|
||||
match_query_str);
|
||||
std::vector<std::string> query_tokens;
|
||||
doris::segment_v2::InvertedIndexReader::get_analyse_result(
|
||||
query_tokens, reader.get(), inverted_index_ctx->analyzer, column_name,
|
||||
doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY);
|
||||
|
||||
analyse_query_str_token(&query_tokens, inverted_index_ctx, match_query_str, column_name);
|
||||
if (query_tokens.empty()) {
|
||||
VLOG_DEBUG << fmt::format(
|
||||
"token parser result is empty for query, "
|
||||
"please check your query: '{}' and index parser: '{}'",
|
||||
match_query_str, inverted_index_parser_type_to_string(parser_type));
|
||||
match_query_str,
|
||||
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -415,18 +404,15 @@ Status FunctionMatchRegexp::execute_match(FunctionContext* context, const std::s
|
||||
ColumnUInt8::Container& result) const {
|
||||
RETURN_IF_ERROR(check(context, name));
|
||||
|
||||
doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN;
|
||||
if (inverted_index_ctx) {
|
||||
parser_type = inverted_index_ctx->parser_type;
|
||||
}
|
||||
VLOG_DEBUG << "begin to run FunctionMatchRegexp::execute_match, parser_type: "
|
||||
<< inverted_index_parser_type_to_string(parser_type);
|
||||
<< inverted_index_parser_type_to_string(inverted_index_ctx->parser_type);
|
||||
|
||||
if (match_query_str.empty()) {
|
||||
VLOG_DEBUG << fmt::format(
|
||||
"token parser result is empty for query, "
|
||||
"please check your query: '{}' and index parser: '{}'",
|
||||
match_query_str, inverted_index_parser_type_to_string(parser_type));
|
||||
match_query_str,
|
||||
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@ -55,6 +55,7 @@ const std::string MATCH_ALL_FUNCTION = "match_all";
|
||||
const std::string MATCH_PHRASE_FUNCTION = "match_phrase";
|
||||
const std::string MATCH_PHRASE_PREFIX_FUNCTION = "match_phrase_prefix";
|
||||
const std::string MATCH_PHRASE_REGEXP_FUNCTION = "match_regexp";
|
||||
const std::string MATCH_PHRASE_EDGE_FUNCTION = "match_phrase_edge";
|
||||
|
||||
class FunctionMatchBase : public IFunction {
|
||||
public:
|
||||
@ -81,6 +82,11 @@ public:
|
||||
|
||||
doris::segment_v2::InvertedIndexQueryType get_query_type_from_fn_name() const;
|
||||
|
||||
void analyse_query_str_token(std::vector<std::string>* query_tokens,
|
||||
InvertedIndexCtx* inverted_index_ctx,
|
||||
const std::string& match_query_str,
|
||||
const std::string& field_name) const;
|
||||
|
||||
std::vector<std::string> analyse_data_token(const std::string& column_name,
|
||||
InvertedIndexCtx* inverted_index_ctx,
|
||||
const ColumnString* string_col,
|
||||
|
||||
@ -101,6 +101,18 @@ public class InvertedIndexUtil {
|
||||
return charFilterMap;
|
||||
}
|
||||
|
||||
public static boolean getInvertedIndexParserLowercase(Map<String, String> properties) {
|
||||
String lowercase = properties == null ? null : properties.get(INVERTED_INDEX_PARSER_LOWERCASE_KEY);
|
||||
// default is true if not set
|
||||
return lowercase != null ? Boolean.parseBoolean(lowercase) : true;
|
||||
}
|
||||
|
||||
public static String getInvertedIndexParserStopwords(Map<String, String> properties) {
|
||||
String stopwrods = properties == null ? null : properties.get(INVERTED_INDEX_PARSER_STOPWORDS_KEY);
|
||||
// default is "" if not set
|
||||
return stopwrods != null ? stopwrods : "";
|
||||
}
|
||||
|
||||
public static void checkInvertedIndexParser(String indexColName, PrimitiveType colType,
|
||||
Map<String, String> properties) throws AnalysisException {
|
||||
String parser = null;
|
||||
|
||||
@ -150,6 +150,8 @@ public class MatchPredicate extends Predicate {
|
||||
private String invertedIndexParser;
|
||||
private String invertedIndexParserMode;
|
||||
private Map<String, String> invertedIndexCharFilter;
|
||||
private boolean invertedIndexParserLowercase = true;
|
||||
private String invertedIndexParserStopwords = "";
|
||||
|
||||
public MatchPredicate(Operator op, Expr e1, Expr e2) {
|
||||
super();
|
||||
@ -170,23 +172,22 @@ public class MatchPredicate extends Predicate {
|
||||
invertedIndexParser = other.invertedIndexParser;
|
||||
invertedIndexParserMode = other.invertedIndexParserMode;
|
||||
invertedIndexCharFilter = other.invertedIndexCharFilter;
|
||||
invertedIndexParserLowercase = other.invertedIndexParserLowercase;
|
||||
invertedIndexParserStopwords = other.invertedIndexParserStopwords;
|
||||
}
|
||||
|
||||
/**
|
||||
* use for Nereids ONLY
|
||||
*/
|
||||
public MatchPredicate(Operator op, Expr e1, Expr e2, Type retType,
|
||||
NullableMode nullableMode, String invertedIndexParser, String invertedIndexParserMode,
|
||||
Map<String, String> invertedIndexCharFilter) {
|
||||
NullableMode nullableMode, Index invertedIndex) {
|
||||
this(op, e1, e2);
|
||||
if (invertedIndexParser != null) {
|
||||
this.invertedIndexParser = invertedIndexParser;
|
||||
}
|
||||
if (invertedIndexParserMode != null) {
|
||||
this.invertedIndexParserMode = invertedIndexParserMode;
|
||||
}
|
||||
if (invertedIndexParserMode != null) {
|
||||
this.invertedIndexCharFilter = invertedIndexCharFilter;
|
||||
if (invertedIndex != null) {
|
||||
this.invertedIndexParser = invertedIndex.getInvertedIndexParser();
|
||||
this.invertedIndexParserMode = invertedIndex.getInvertedIndexParserMode();
|
||||
this.invertedIndexCharFilter = invertedIndex.getInvertedIndexCharFilter();
|
||||
this.invertedIndexParserLowercase = invertedIndex.getInvertedIndexParserLowercase();
|
||||
this.invertedIndexParserStopwords = invertedIndex.getInvertedIndexParserStopwords();
|
||||
}
|
||||
fn = new Function(new FunctionName(op.name), Lists.newArrayList(e1.getType(), e2.getType()), retType,
|
||||
false, true, nullableMode);
|
||||
@ -220,6 +221,8 @@ public class MatchPredicate extends Predicate {
|
||||
msg.setOpcode(op.getOpcode());
|
||||
msg.match_predicate = new TMatchPredicate(invertedIndexParser, invertedIndexParserMode);
|
||||
msg.match_predicate.setCharFilterMap(invertedIndexCharFilter);
|
||||
msg.match_predicate.setParserLowercase(invertedIndexParserLowercase);
|
||||
msg.match_predicate.setParserStopwords(invertedIndexParserStopwords);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -264,6 +267,8 @@ public class MatchPredicate extends Predicate {
|
||||
invertedIndexParser = index.getInvertedIndexParser();
|
||||
invertedIndexParserMode = index.getInvertedIndexParserMode();
|
||||
invertedIndexCharFilter = index.getInvertedIndexCharFilter();
|
||||
invertedIndexParserLowercase = index.getInvertedIndexParserLowercase();
|
||||
invertedIndexParserStopwords = index.getInvertedIndexParserStopwords();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -158,6 +158,18 @@ public class Index implements Writable {
|
||||
return InvertedIndexUtil.getInvertedIndexCharFilter(properties);
|
||||
}
|
||||
|
||||
public boolean getInvertedIndexParserLowercase() {
|
||||
return InvertedIndexUtil.getInvertedIndexParserLowercase(properties);
|
||||
}
|
||||
|
||||
public String getInvertedIndexParserStopwords() {
|
||||
return InvertedIndexUtil.getInvertedIndexParserStopwords(properties);
|
||||
}
|
||||
|
||||
public boolean isLightIndexChangeSupported() {
|
||||
return indexType == IndexDef.IndexType.INVERTED;
|
||||
}
|
||||
|
||||
public String getComment() {
|
||||
return getComment(false);
|
||||
}
|
||||
|
||||
@ -32,7 +32,6 @@ import org.apache.doris.analysis.FunctionCallExpr;
|
||||
import org.apache.doris.analysis.FunctionName;
|
||||
import org.apache.doris.analysis.FunctionParams;
|
||||
import org.apache.doris.analysis.IndexDef;
|
||||
import org.apache.doris.analysis.InvertedIndexUtil;
|
||||
import org.apache.doris.analysis.IsNullPredicate;
|
||||
import org.apache.doris.analysis.LambdaFunctionCallExpr;
|
||||
import org.apache.doris.analysis.LambdaFunctionExpr;
|
||||
@ -106,9 +105,7 @@ import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@ -213,9 +210,7 @@ public class ExpressionTranslator extends DefaultExpressionVisitor<Expr, PlanTra
|
||||
|
||||
@Override
|
||||
public Expr visitMatch(Match match, PlanTranslatorContext context) {
|
||||
String invertedIndexParser = InvertedIndexUtil.INVERTED_INDEX_PARSER_UNKNOWN;
|
||||
String invertedIndexParserMode = InvertedIndexUtil.INVERTED_INDEX_PARSER_COARSE_GRANULARITY;
|
||||
Map<String, String> invertedIndexCharFilter = new HashMap<>();
|
||||
Index invertedIndex = null;
|
||||
// Get the first slot from match's left expr
|
||||
SlotRef left = (SlotRef) match.left().getInputSlots().stream().findFirst().get().accept(this, context);
|
||||
OlapTable olapTbl = Optional.ofNullable(getOlapTableFromSlotDesc(left.getDesc()))
|
||||
@ -231,9 +226,7 @@ public class ExpressionTranslator extends DefaultExpressionVisitor<Expr, PlanTra
|
||||
if (index.getIndexType() == IndexDef.IndexType.INVERTED) {
|
||||
List<String> columns = index.getColumns();
|
||||
if (columns != null && !columns.isEmpty() && left.getColumnName().equals(columns.get(0))) {
|
||||
invertedIndexParser = index.getInvertedIndexParser();
|
||||
invertedIndexParserMode = index.getInvertedIndexParserMode();
|
||||
invertedIndexCharFilter = index.getInvertedIndexCharFilter();
|
||||
invertedIndex = index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -243,8 +236,7 @@ public class ExpressionTranslator extends DefaultExpressionVisitor<Expr, PlanTra
|
||||
MatchPredicate.Operator op = match.op();
|
||||
MatchPredicate matchPredicate = new MatchPredicate(op, match.left().accept(this, context),
|
||||
match.right().accept(this, context), match.getDataType().toCatalogDataType(),
|
||||
NullableMode.DEPEND_ON_ARGUMENT, invertedIndexParser, invertedIndexParserMode,
|
||||
invertedIndexCharFilter);
|
||||
NullableMode.DEPEND_ON_ARGUMENT, invertedIndex);
|
||||
matchPredicate.setNullableFromNereids(match.nullable());
|
||||
return matchPredicate;
|
||||
}
|
||||
|
||||
@ -159,6 +159,8 @@ struct TMatchPredicate {
|
||||
1: required string parser_type;
|
||||
2: required string parser_mode;
|
||||
3: optional map<string, string> char_filter_map;
|
||||
4: optional bool parser_lowercase = true;
|
||||
5: optional string parser_stopwords = "";
|
||||
}
|
||||
|
||||
struct TLiteralPredicate {
|
||||
|
||||
@ -0,0 +1,89 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
|
||||
suite("test_match_without_index", "p0") {
|
||||
|
||||
def testTable = "test_match_without_index"
|
||||
sql "DROP TABLE IF EXISTS ${testTable}"
|
||||
sql """
|
||||
CREATE TABLE ${testTable} (
|
||||
`@timestamp` int(11) NULL COMMENT "",
|
||||
`clientip` string NULL COMMENT "",
|
||||
`request` string NULL COMMENT "",
|
||||
`status` string NULL COMMENT "",
|
||||
`size` int NULL COMMENT "",
|
||||
INDEX clientip_idx (`clientip`) USING INVERTED COMMENT '',
|
||||
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser"="unicode", "lower_case" = "false") COMMENT '',
|
||||
INDEX status_idx (`status`) USING INVERTED COMMENT '',
|
||||
INDEX size_idx (`size`) USING INVERTED COMMENT ''
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(`@timestamp`)
|
||||
COMMENT "OLAP"
|
||||
DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
);
|
||||
"""
|
||||
|
||||
sql """ INSERT INTO ${testTable} VALUES (123, '17.0.0.0', 'HTTP GET', '200', 20); """
|
||||
sql """ INSERT INTO ${testTable} VALUES (123, '17.0.0.0', 'Life is like a box of chocolates, you never know what you are going to get.', '200', 20); """
|
||||
// sql """ """
|
||||
|
||||
List<Object> match_res_without_index = new ArrayList<>();
|
||||
List<Object> match_res_with_index =new ArrayList<>();
|
||||
def create_sql = {
|
||||
List<String> list = new ArrayList<>()
|
||||
list.add(" select count() from ${testTable} where clientip match_phrase '17' ");
|
||||
list.add(" select count() from ${testTable} where clientip match_all '17' ");
|
||||
list.add(" select count() from ${testTable} where clientip match_any '17' ");
|
||||
list.add(" select count() from ${testTable} where request match_any 'get' ");
|
||||
list.add(" select count() from ${testTable} where request match_phrase_prefix 'like box' ");
|
||||
return list;
|
||||
}
|
||||
|
||||
def execute_sql = { resultList, sqlList ->
|
||||
for (sqlStr in sqlList) {
|
||||
def sqlResult = sql """ ${sqlStr} """
|
||||
resultList.add(sqlResult)
|
||||
}
|
||||
}
|
||||
|
||||
def compare_result = { executedSql ->
|
||||
assertEquals(match_res_without_index.size(), match_res_with_index.size())
|
||||
for (int i = 0; i < match_res_without_index.size(); i++) {
|
||||
if (match_res_without_index[i] != match_res_with_index[i]) {
|
||||
logger.info("sql is {}", executedSql[i])
|
||||
logger.info("match_res_without_index is {}", match_res_without_index[i])
|
||||
logger.info("match_res_with_index is {}", match_res_with_index[i])
|
||||
assertTrue(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def index_sql = create_sql.call()
|
||||
try {
|
||||
GetDebugPoint().enableDebugPointForAllBEs("return_inverted_index_bypass")
|
||||
execute_sql.call(match_res_without_index, index_sql)
|
||||
|
||||
} finally {
|
||||
GetDebugPoint().disableDebugPointForAllBEs("return_inverted_index_bypass")
|
||||
execute_sql.call(match_res_with_index, index_sql)
|
||||
compare_result.call(index_sql)
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user