[feature](invert index) does not create an inverted index to support the match_phrase_prefix feature. (#30414)
This commit is contained in:
@ -311,6 +311,65 @@ Status FunctionMatchPhrase::execute_match(const std::string& column_name,
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status FunctionMatchPhrasePrefix::execute_match(
|
||||
const std::string& column_name, const std::string& match_query_str, size_t input_rows_count,
|
||||
const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx,
|
||||
const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const {
|
||||
doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN;
|
||||
if (inverted_index_ctx) {
|
||||
parser_type = inverted_index_ctx->parser_type;
|
||||
}
|
||||
|
||||
auto reader = doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
|
||||
match_query_str);
|
||||
std::vector<std::string> query_tokens;
|
||||
doris::segment_v2::InvertedIndexReader::get_analyse_result(
|
||||
query_tokens, reader.get(), inverted_index_ctx->analyzer, column_name,
|
||||
doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY);
|
||||
|
||||
if (query_tokens.empty()) {
|
||||
LOG(WARNING) << fmt::format(
|
||||
"token parser result is empty for query, "
|
||||
"please check your query: '{}' and index parser: '{}'",
|
||||
match_query_str, inverted_index_parser_type_to_string(parser_type));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
int32_t current_src_array_offset = 0;
|
||||
for (size_t i = 0; i < input_rows_count; i++) {
|
||||
std::vector<std::string> data_tokens =
|
||||
analyse_data_token(column_name, inverted_index_ctx, string_col, i, array_offsets,
|
||||
current_src_array_offset);
|
||||
|
||||
for (size_t j = 0; j < data_tokens.size() - query_tokens.size() + 1; j++) {
|
||||
if (data_tokens[j] == query_tokens[0] || query_tokens.size() == 1) {
|
||||
bool match = true;
|
||||
for (size_t k = 0; k < query_tokens.size(); k++) {
|
||||
const std::string& data_token = data_tokens[j + k];
|
||||
const std::string& query_token = query_tokens[k];
|
||||
if (k == query_tokens.size() - 1) {
|
||||
if (data_token.compare(0, query_token.size(), query_token) != 0) {
|
||||
match = false;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (data_token != query_token) {
|
||||
match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (match) {
|
||||
result[i] = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void register_function_match(SimpleFunctionFactory& factory) {
|
||||
factory.register_function<FunctionMatchAny>();
|
||||
factory.register_function<FunctionMatchAll>();
|
||||
|
||||
@ -139,10 +139,7 @@ public:
|
||||
size_t input_rows_count, const ColumnString* string_col,
|
||||
InvertedIndexCtx* inverted_index_ctx,
|
||||
const ColumnArray::Offsets64* array_offsets,
|
||||
ColumnUInt8::Container& result) const override {
|
||||
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
|
||||
"FunctionMatchPhrasePrefix not support execute_match");
|
||||
}
|
||||
ColumnUInt8::Container& result) const override;
|
||||
};
|
||||
|
||||
class FunctionMatchRegexp : public FunctionMatchBase {
|
||||
|
||||
@ -5,12 +5,21 @@
|
||||
-- !sql --
|
||||
863
|
||||
|
||||
-- !sql --
|
||||
863
|
||||
|
||||
-- !sql --
|
||||
235
|
||||
|
||||
-- !sql --
|
||||
235
|
||||
|
||||
-- !sql --
|
||||
235
|
||||
|
||||
-- !sql --
|
||||
166
|
||||
|
||||
-- !sql --
|
||||
166
|
||||
|
||||
@ -23,6 +32,12 @@
|
||||
-- !sql --
|
||||
56
|
||||
|
||||
-- !sql --
|
||||
56
|
||||
|
||||
-- !sql --
|
||||
7
|
||||
|
||||
-- !sql --
|
||||
7
|
||||
|
||||
|
||||
@ -18,8 +18,10 @@
|
||||
|
||||
suite("test_index_match_phrase_prefix", "p0"){
|
||||
def indexTbName1 = "test_index_match_phrase_prefix"
|
||||
def indexTbName2 = "test_index_match_phrase_prefix2"
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName1}"
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName2}"
|
||||
|
||||
sql """
|
||||
CREATE TABLE ${indexTbName1} (
|
||||
@ -38,6 +40,22 @@ suite("test_index_match_phrase_prefix", "p0"){
|
||||
);
|
||||
"""
|
||||
|
||||
sql """
|
||||
CREATE TABLE ${indexTbName2} (
|
||||
`@timestamp` int(11) NULL COMMENT "",
|
||||
`clientip` varchar(20) NULL COMMENT "",
|
||||
`request` text NULL COMMENT "",
|
||||
`status` int(11) NULL COMMENT "",
|
||||
`size` int(11) NULL COMMENT ""
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(`@timestamp`)
|
||||
COMMENT "OLAP"
|
||||
DISTRIBUTED BY RANDOM BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
);
|
||||
"""
|
||||
|
||||
def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
|
||||
expected_succ_rows = -1, load_to_single_tablet = 'true' ->
|
||||
|
||||
@ -76,24 +94,31 @@ suite("test_index_match_phrase_prefix", "p0"){
|
||||
}
|
||||
|
||||
try {
|
||||
load_httplogs_data.call(indexTbName1, 'test_index_match_phrase_prefix', 'true', 'json', 'documents-1000.json')
|
||||
load_httplogs_data.call(indexTbName1, indexTbName1, 'true', 'json', 'documents-1000.json')
|
||||
load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 'documents-1000.json')
|
||||
|
||||
sql "sync"
|
||||
|
||||
qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix 'ima'; """
|
||||
qt_sql """ select count() from test_index_match_phrase_prefix where request like '%ima%'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'ima'; """
|
||||
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'ima'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request like '%ima%'; """
|
||||
|
||||
qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix 'images/h'; """
|
||||
qt_sql """ select count() from test_index_match_phrase_prefix where request like '%images/h%'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'images/h'; """
|
||||
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'images/h'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request like '%images/h%'; """
|
||||
|
||||
qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix 'images/hm'; """
|
||||
qt_sql """ select count() from test_index_match_phrase_prefix where request like '%images/hm%'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'images/hm'; """
|
||||
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'images/hm'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request like '%images/hm%'; """
|
||||
|
||||
qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix '/french/images/n'; """
|
||||
qt_sql """ select count() from test_index_match_phrase_prefix where request like '%/french/images/n%'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix '/french/images/n'; """
|
||||
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix '/french/images/n'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request like '%/french/images/n%'; """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix '/french/tickets/images/ti'; """
|
||||
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix '/french/tickets/images/ti'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request like '%/french/tickets/images/ti%'; """
|
||||
|
||||
qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix '/french/tickets/images/ti'; """
|
||||
qt_sql """ select count() from test_index_match_phrase_prefix where request like '%/french/tickets/images/ti%'; """
|
||||
} finally {
|
||||
//try_sql("DROP TABLE IF EXISTS ${testTable}")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user