diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp index 38145342a0..35fdb7a42b 100644 --- a/be/src/vec/functions/match.cpp +++ b/be/src/vec/functions/match.cpp @@ -311,6 +311,65 @@ Status FunctionMatchPhrase::execute_match(const std::string& column_name, return Status::OK(); } +Status FunctionMatchPhrasePrefix::execute_match( + const std::string& column_name, const std::string& match_query_str, size_t input_rows_count, + const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx, + const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const { + doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN; + if (inverted_index_ctx) { + parser_type = inverted_index_ctx->parser_type; + } + + auto reader = doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx, + match_query_str); + std::vector query_tokens; + doris::segment_v2::InvertedIndexReader::get_analyse_result( + query_tokens, reader.get(), inverted_index_ctx->analyzer, column_name, + doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY); + + if (query_tokens.empty()) { + LOG(WARNING) << fmt::format( + "token parser result is empty for query, " + "please check your query: '{}' and index parser: '{}'", + match_query_str, inverted_index_parser_type_to_string(parser_type)); + return Status::OK(); + } + + int32_t current_src_array_offset = 0; + for (size_t i = 0; i < input_rows_count; i++) { + std::vector data_tokens = + analyse_data_token(column_name, inverted_index_ctx, string_col, i, array_offsets, + current_src_array_offset); + + for (size_t j = 0; j < data_tokens.size() - query_tokens.size() + 1; j++) { + if (data_tokens[j] == query_tokens[0] || query_tokens.size() == 1) { + bool match = true; + for (size_t k = 0; k < query_tokens.size(); k++) { + const std::string& data_token = data_tokens[j + k]; + const std::string& query_token = query_tokens[k]; + if (k == query_tokens.size() - 1) { + if (data_token.compare(0, query_token.size(), query_token) != 0) { + match = false; + break; + } + } else { + if (data_token != query_token) { + match = false; + break; + } + } + } + if (match) { + result[i] = true; + break; + } + } + } + } + + return Status::OK(); +} + void register_function_match(SimpleFunctionFactory& factory) { factory.register_function(); factory.register_function(); diff --git a/be/src/vec/functions/match.h b/be/src/vec/functions/match.h index 5ca981e702..ebd6a48ba2 100644 --- a/be/src/vec/functions/match.h +++ b/be/src/vec/functions/match.h @@ -139,10 +139,7 @@ public: size_t input_rows_count, const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64* array_offsets, - ColumnUInt8::Container& result) const override { - return Status::Error( - "FunctionMatchPhrasePrefix not support execute_match"); - } + ColumnUInt8::Container& result) const override; }; class FunctionMatchRegexp : public FunctionMatchBase { diff --git a/regression-test/data/inverted_index_p0/test_index_match_phrase_prefix.out b/regression-test/data/inverted_index_p0/test_index_match_phrase_prefix.out index 140fd5ee93..11af32e55e 100644 --- a/regression-test/data/inverted_index_p0/test_index_match_phrase_prefix.out +++ b/regression-test/data/inverted_index_p0/test_index_match_phrase_prefix.out @@ -5,12 +5,21 @@ -- !sql -- 863 +-- !sql -- +863 + -- !sql -- 235 -- !sql -- 235 +-- !sql -- +235 + +-- !sql -- +166 + -- !sql -- 166 @@ -23,6 +32,12 @@ -- !sql -- 56 +-- !sql -- +56 + +-- !sql -- +7 + -- !sql -- 7 diff --git a/regression-test/suites/inverted_index_p0/test_index_match_phrase_prefix.groovy b/regression-test/suites/inverted_index_p0/test_index_match_phrase_prefix.groovy index 68f9624035..1f83d48a97 100644 --- a/regression-test/suites/inverted_index_p0/test_index_match_phrase_prefix.groovy +++ b/regression-test/suites/inverted_index_p0/test_index_match_phrase_prefix.groovy @@ -18,8 +18,10 @@ suite("test_index_match_phrase_prefix", "p0"){ def indexTbName1 = "test_index_match_phrase_prefix" + def indexTbName2 = "test_index_match_phrase_prefix2" sql "DROP TABLE IF EXISTS ${indexTbName1}" + sql "DROP TABLE IF EXISTS ${indexTbName2}" sql """ CREATE TABLE ${indexTbName1} ( @@ -38,6 +40,22 @@ suite("test_index_match_phrase_prefix", "p0"){ ); """ + sql """ + CREATE TABLE ${indexTbName2} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` varchar(20) NULL COMMENT "", + `request` text NULL COMMENT "", + `status` int(11) NULL COMMENT "", + `size` int(11) NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false, expected_succ_rows = -1, load_to_single_tablet = 'true' -> @@ -76,24 +94,31 @@ suite("test_index_match_phrase_prefix", "p0"){ } try { - load_httplogs_data.call(indexTbName1, 'test_index_match_phrase_prefix', 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(indexTbName1, indexTbName1, 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 'documents-1000.json') sql "sync" - qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix 'ima'; """ - qt_sql """ select count() from test_index_match_phrase_prefix where request like '%ima%'; """ + qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'ima'; """ + qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'ima'; """ + qt_sql """ select count() from ${indexTbName1} where request like '%ima%'; """ - qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix 'images/h'; """ - qt_sql """ select count() from test_index_match_phrase_prefix where request like '%images/h%'; """ + qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'images/h'; """ + qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'images/h'; """ + qt_sql """ select count() from ${indexTbName1} where request like '%images/h%'; """ - qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix 'images/hm'; """ - qt_sql """ select count() from test_index_match_phrase_prefix where request like '%images/hm%'; """ + qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'images/hm'; """ + qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'images/hm'; """ + qt_sql """ select count() from ${indexTbName1} where request like '%images/hm%'; """ - qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix '/french/images/n'; """ - qt_sql """ select count() from test_index_match_phrase_prefix where request like '%/french/images/n%'; """ + qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix '/french/images/n'; """ + qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix '/french/images/n'; """ + qt_sql """ select count() from ${indexTbName1} where request like '%/french/images/n%'; """ + + qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix '/french/tickets/images/ti'; """ + qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix '/french/tickets/images/ti'; """ + qt_sql """ select count() from ${indexTbName1} where request like '%/french/tickets/images/ti%'; """ - qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix '/french/tickets/images/ti'; """ - qt_sql """ select count() from test_index_match_phrase_prefix where request like '%/french/tickets/images/ti%'; """ } finally { //try_sql("DROP TABLE IF EXISTS ${testTable}") }