[feature](invert index) does not create an inverted index to support the match_phrase_prefix feature. (#30414)

This commit is contained in:
zzzxl
2024-01-29 10:42:12 +08:00
committed by yiguolei
parent 7667fe8570
commit ae38f28280
4 changed files with 111 additions and 15 deletions

View File

@ -311,6 +311,65 @@ Status FunctionMatchPhrase::execute_match(const std::string& column_name,
return Status::OK();
}
Status FunctionMatchPhrasePrefix::execute_match(
const std::string& column_name, const std::string& match_query_str, size_t input_rows_count,
const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const {
doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN;
if (inverted_index_ctx) {
parser_type = inverted_index_ctx->parser_type;
}
auto reader = doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
match_query_str);
std::vector<std::string> query_tokens;
doris::segment_v2::InvertedIndexReader::get_analyse_result(
query_tokens, reader.get(), inverted_index_ctx->analyzer, column_name,
doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY);
if (query_tokens.empty()) {
LOG(WARNING) << fmt::format(
"token parser result is empty for query, "
"please check your query: '{}' and index parser: '{}'",
match_query_str, inverted_index_parser_type_to_string(parser_type));
return Status::OK();
}
int32_t current_src_array_offset = 0;
for (size_t i = 0; i < input_rows_count; i++) {
std::vector<std::string> data_tokens =
analyse_data_token(column_name, inverted_index_ctx, string_col, i, array_offsets,
current_src_array_offset);
for (size_t j = 0; j < data_tokens.size() - query_tokens.size() + 1; j++) {
if (data_tokens[j] == query_tokens[0] || query_tokens.size() == 1) {
bool match = true;
for (size_t k = 0; k < query_tokens.size(); k++) {
const std::string& data_token = data_tokens[j + k];
const std::string& query_token = query_tokens[k];
if (k == query_tokens.size() - 1) {
if (data_token.compare(0, query_token.size(), query_token) != 0) {
match = false;
break;
}
} else {
if (data_token != query_token) {
match = false;
break;
}
}
}
if (match) {
result[i] = true;
break;
}
}
}
}
return Status::OK();
}
void register_function_match(SimpleFunctionFactory& factory) {
factory.register_function<FunctionMatchAny>();
factory.register_function<FunctionMatchAll>();

View File

@ -139,10 +139,7 @@ public:
size_t input_rows_count, const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const override {
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
"FunctionMatchPhrasePrefix not support execute_match");
}
ColumnUInt8::Container& result) const override;
};
class FunctionMatchRegexp : public FunctionMatchBase {

View File

@ -5,12 +5,21 @@
-- !sql --
863
-- !sql --
863
-- !sql --
235
-- !sql --
235
-- !sql --
235
-- !sql --
166
-- !sql --
166
@ -23,6 +32,12 @@
-- !sql --
56
-- !sql --
56
-- !sql --
7
-- !sql --
7

View File

@ -18,8 +18,10 @@
suite("test_index_match_phrase_prefix", "p0"){
def indexTbName1 = "test_index_match_phrase_prefix"
def indexTbName2 = "test_index_match_phrase_prefix2"
sql "DROP TABLE IF EXISTS ${indexTbName1}"
sql "DROP TABLE IF EXISTS ${indexTbName2}"
sql """
CREATE TABLE ${indexTbName1} (
@ -38,6 +40,22 @@ suite("test_index_match_phrase_prefix", "p0"){
);
"""
sql """
CREATE TABLE ${indexTbName2} (
`@timestamp` int(11) NULL COMMENT "",
`clientip` varchar(20) NULL COMMENT "",
`request` text NULL COMMENT "",
`status` int(11) NULL COMMENT "",
`size` int(11) NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`@timestamp`)
COMMENT "OLAP"
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
"""
def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
expected_succ_rows = -1, load_to_single_tablet = 'true' ->
@ -76,24 +94,31 @@ suite("test_index_match_phrase_prefix", "p0"){
}
try {
load_httplogs_data.call(indexTbName1, 'test_index_match_phrase_prefix', 'true', 'json', 'documents-1000.json')
load_httplogs_data.call(indexTbName1, indexTbName1, 'true', 'json', 'documents-1000.json')
load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 'documents-1000.json')
sql "sync"
qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix 'ima'; """
qt_sql """ select count() from test_index_match_phrase_prefix where request like '%ima%'; """
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'ima'; """
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'ima'; """
qt_sql """ select count() from ${indexTbName1} where request like '%ima%'; """
qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix 'images/h'; """
qt_sql """ select count() from test_index_match_phrase_prefix where request like '%images/h%'; """
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'images/h'; """
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'images/h'; """
qt_sql """ select count() from ${indexTbName1} where request like '%images/h%'; """
qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix 'images/hm'; """
qt_sql """ select count() from test_index_match_phrase_prefix where request like '%images/hm%'; """
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'images/hm'; """
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'images/hm'; """
qt_sql """ select count() from ${indexTbName1} where request like '%images/hm%'; """
qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix '/french/images/n'; """
qt_sql """ select count() from test_index_match_phrase_prefix where request like '%/french/images/n%'; """
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix '/french/images/n'; """
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix '/french/images/n'; """
qt_sql """ select count() from ${indexTbName1} where request like '%/french/images/n%'; """
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix '/french/tickets/images/ti'; """
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix '/french/tickets/images/ti'; """
qt_sql """ select count() from ${indexTbName1} where request like '%/french/tickets/images/ti%'; """
qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix '/french/tickets/images/ti'; """
qt_sql """ select count() from test_index_match_phrase_prefix where request like '%/french/tickets/images/ti%'; """
} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
}