[fix](match) Optimize the logic for match_phrase function filter (#21622)

This commit is contained in:
YueW
2023-07-25 14:22:37 +08:00
committed by GitHub
parent c251a574e8
commit c01230f99a
3 changed files with 30 additions and 9 deletions

View File

@ -236,21 +236,24 @@ Status FunctionMatchPhrase::execute_match(const std::string& column_name,
// TODO: more efficient impl
bool matched = false;
auto it = data_tokens.begin();
while (it != data_tokens.end()) {
auto data_it = data_tokens.begin();
while (data_it != data_tokens.end()) {
// find position of first token
it = std::find(it, data_tokens.end(), query_tokens[0]);
if (it != data_tokens.end()) {
data_it = std::find(data_it, data_tokens.end(), query_tokens[0]);
if (data_it != data_tokens.end()) {
matched = true;
it++;
auto it_more = it;
auto data_it_next = ++data_it;
auto query_it = query_tokens.begin() + 1;
// compare query_tokens after the first to data_tokens one by one
for (size_t idx = 1; idx < query_tokens.size(); idx++) {
if (it_more == data_tokens.end() || *it_more != query_tokens[idx]) {
while (query_it != query_tokens.end()) {
if (data_it_next == data_tokens.end() || *data_it_next != *query_it) {
matched = false;
break;
}
it_more++;
query_it++;
data_it_next++;
}
if (matched) {
break;
}

View File

@ -17,6 +17,14 @@
-- !sql --
2 我爱你中国
-- !sql --
2 我爱你中国
-- !sql --
2 我爱你中国
-- !sql --
-- !sql --
-- !sql --
@ -33,6 +41,11 @@
-- !sql --
2 我爱你中国
-- !sql --
-- !sql --
2 我爱你中国
-- !sql --
1 我来到北京清华大学

View File

@ -73,6 +73,9 @@ suite("test_chinese_analyzer"){
sql "INSERT INTO $indexTblName2 VALUES (1, '我来到北京清华大学'), (2, '我爱你中国'), (3, '人民可以得到更多实惠');"
qt_sql "SELECT * FROM $indexTblName2 WHERE c MATCH '我爱你' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName2 WHERE c MATCH_PHRASE '我爱你' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName2 WHERE c MATCH_PHRASE '我爱你 中国' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName2 WHERE c MATCH_PHRASE '北京 大学' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName2 WHERE c MATCH '清华' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName2 WHERE c MATCH '大学' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName2 WHERE c MATCH '清华大学' ORDER BY id;"
@ -98,6 +101,8 @@ suite("test_chinese_analyzer"){
sql "INSERT INTO $indexTblName3 VALUES (1, '我来到北京清华大学'), (2, '我爱你中国'), (3, '人民可以得到更多实惠'), (4, '陕西省西安市高新区创业大厦A座,我的手机号码是12345678901,邮箱是12345678@qq.com,,ip是1.1.1.1,this information is created automatically.');"
qt_sql "SELECT * FROM $indexTblName3 WHERE c MATCH_PHRASE '我爱你' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName3 WHERE c MATCH_PHRASE '我爱你 中国' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName3 WHERE c MATCH_PHRASE '北京 大学' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName3 WHERE c MATCH_ALL'我爱你' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName3 WHERE c MATCH_ALL '清华' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName3 WHERE c MATCH_ALL '大学' ORDER BY id;"