[fix](inverted index) Fix match_regexp to correctly handle empty string patterns (#40659)

https://github.com/apache/doris/pull/39503
This commit is contained in:
zzzxl
2024-09-11 18:10:33 +08:00
committed by GitHub
parent 52e13c9e6c
commit ebe031c019
5 changed files with 24 additions and 23 deletions

View File

@ -407,15 +407,6 @@ Status FunctionMatchRegexp::execute_match(FunctionContext* context, const std::s
VLOG_DEBUG << "begin to run FunctionMatchRegexp::execute_match, parser_type: "
<< inverted_index_parser_type_to_string(inverted_index_ctx->parser_type);
if (match_query_str.empty()) {
VLOG_DEBUG << fmt::format(
"token parser result is empty for query, "
"please check your query: '{}' and index parser: '{}'",
match_query_str,
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type));
return Status::OK();
}
const std::string& pattern = match_query_str;
hs_database_t* database = nullptr;

View File

@ -2,6 +2,9 @@
-- !sql --
1000
-- !sql --
1000
-- !sql --
54

View File

@ -20,3 +20,6 @@
-- !sql --
0
-- !sql --
1000

View File

@ -80,6 +80,7 @@ suite("test_index_match_regexp", "p0"){
sql "sync"
qt_sql """ select count() from test_index_match_regexp where request match_regexp ''; """
qt_sql """ select count() from test_index_match_regexp where request match_regexp '^h'; """
qt_sql """ select count() from test_index_match_regexp where request match_regexp '^team'; """
qt_sql """ select count() from test_index_match_regexp where request match_regexp 's\$'; """

View File

@ -18,7 +18,7 @@
suite("test_no_index_match", "p0") {
// define a sql table
def testTable_unique = "httplogs_unique"
def testTable = "test_no_index_match"
def create_httplogs_unique_table = {testTablex ->
// multi-line sql
@ -77,35 +77,38 @@ suite("test_no_index_match", "p0") {
}
try {
sql "DROP TABLE IF EXISTS ${testTable_unique}"
create_httplogs_unique_table.call(testTable_unique)
load_httplogs_data.call(testTable_unique, 'httplogs_unique', 'true', 'json', 'documents-1000.json')
sql "DROP TABLE IF EXISTS ${testTable}"
create_httplogs_unique_table.call(testTable)
load_httplogs_data.call(testTable, 'test_no_index_match', 'true', 'json', 'documents-1000.json')
sql """ INSERT INTO ${testTable_unique} VALUES (1, '1', '', 1, 1); """
sql """ INSERT INTO ${testTable} VALUES (1, '1', '', 1, 1); """
sql 'sync'
sql """ set enable_common_expr_pushdown = true """
try {
qt_sql """ select count() from ${testTable_unique} where (request match_any 'hm bg'); """
qt_sql """ select count() from ${testTable_unique} where (request match_all 'hm bg'); """
qt_sql """ select count() from ${testTable_unique} where (request match_phrase 'hm bg'); """
qt_sql """ select count() from ${testTable_unique} where (request match_phrase_prefix 'hm b'); """
qt_sql """ select count() from ${testTable_unique} where (request match_regexp 'la'); """
qt_sql """ select count() from ${testTable} where (request match_any 'hm bg'); """
qt_sql """ select count() from ${testTable} where (request match_all 'hm bg'); """
qt_sql """ select count() from ${testTable} where (request match_phrase 'hm bg'); """
qt_sql """ select count() from ${testTable} where (request match_phrase_prefix 'hm b'); """
qt_sql """ select count() from ${testTable} where (request match_regexp 'la'); """
qt_sql """ select count() from ${testTable_unique} where (request match_phrase '欧冶工业品'); """
qt_sql """ select count() from ${testTable_unique} where (request match_phrase_prefix '欧冶工业品'); """
qt_sql """ select count() from ${testTable} where (request match_phrase '欧冶工业品'); """
qt_sql """ select count() from ${testTable} where (request match_phrase_prefix '欧冶工业品'); """
qt_sql """ select count() from ${testTable} where (request match_regexp ''); """
} finally {
}
try {
sql """ select /*+ SET_VAR(enable_match_without_inverted_index = 0) */ count() from ${testTable_unique} where (request match_phrase 'hm bg'); """
sql """ select /*+ SET_VAR(enable_match_without_inverted_index = 0) */ count() from ${testTable} where (request match_phrase 'hm bg'); """
} catch (Exception e) {
log.info(e.getMessage());
assertTrue(e.getMessage().contains("match_phrase not support execute_match"))
}
try {
sql """ select /*+ SET_VAR(enable_match_without_inverted_index = 0) */ count() from ${testTable_unique} where (request match_phrase_prefix 'hm b'); """
sql """ select /*+ SET_VAR(enable_match_without_inverted_index = 0) */ count() from ${testTable} where (request match_phrase_prefix 'hm b'); """
} catch (Exception e) {
log.info(e.getMessage());
assertTrue(e.getMessage().contains("match_phrase_prefix not support execute_match"))