[fix](phrase_prefix) fix match_phrase_prefix query incorrect result (#29946)

This commit is contained in:
zzzxl
2024-01-22 10:34:25 +08:00
committed by yiguolei
parent 9dd368f8dc
commit e5f1d8d7ec
3 changed files with 79 additions and 1 deletions

View File

@ -17,6 +17,7 @@
#include "phrase_prefix_query.h"
#include "CLucene/util/stringUtil.h"
#include "olap/rowset//segment_v2/inverted_index/query/prefix_query.h"
namespace doris::segment_v2 {
@ -43,7 +44,9 @@ void PhrasePrefixQuery::add(const std::wstring& field_name, const std::vector<st
PrefixQuery::get_prefix_terms(_searcher->getReader(), field_name, terms[i],
prefix_terms, _max_expansions);
if (prefix_terms.empty()) {
continue;
std::wstring ws_term = StringUtil::string_to_wstring(terms[i]);
Term* t = _CLNEW Term(field_name.c_str(), ws_term.c_str());
prefix_terms.push_back(t);
}
_query->add(prefix_terms);
for (auto& t : prefix_terms) {

View File

@ -0,0 +1,13 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
7
-- !sql --
7
-- !sql --
6
-- !sql --
6

View File

@ -0,0 +1,62 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_index_match_phrase_prefix_1", "p0"){
def indexTbName1 = "test_index_match_phrase_prefix_1"
sql "DROP TABLE IF EXISTS ${indexTbName1}"
sql """
CREATE TABLE ${indexTbName1} (
`a` int(11) NULL COMMENT "",
`b` string NULL COMMENT "",
`c` string NULL COMMENT "",
`d` string NULL COMMENT "",
INDEX b_idx (`b`) USING INVERTED COMMENT '',
INDEX c_idx (`c`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT '',
INDEX d_idx (`d`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`a`)
COMMENT "OLAP"
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
"""
sql """ INSERT INTO ${indexTbName1} VALUES (1, "O1704361998540E2Cemx9S", "O1704361998540E2Cemx9S", "O1704361998540E2Cemx9S"); """
sql """ INSERT INTO ${indexTbName1} VALUES (2, "O1704361998540E2Cemx9S)123456789", "O1704361998540E2Cemx9S)123456789", "O1704361998540E2Cemx9S)123456789"); """
sql """ INSERT INTO ${indexTbName1} VALUES (3, "O1704361998540E2Cemx9S=123456789", "O1704361998540E2Cemx9S=123456789", "O1704361998540E2Cemx9S=123456789"); """
sql """ INSERT INTO ${indexTbName1} VALUES (4, "O1704361998540E2Cemx9S+123456789", "O1704361998540E2Cemx9S+123456789", "O1704361998540E2Cemx9S+123456789"); """
sql """ INSERT INTO ${indexTbName1} VALUES (5, "O1704361998540E2Cemx9S!123456789", "O1704361998540E2Cemx9S!123456789", "O1704361998540E2Cemx9S!123456789"); """
sql """ INSERT INTO ${indexTbName1} VALUES (6, "O1704361998540E2Cemx9S 123456789", "O1704361998540E2Cemx9S 123456789", "O1704361998540E2Cemx9S 123456789"); """
sql """ INSERT INTO ${indexTbName1} VALUES (7, "O1704361998540E2Cemx9S*123456789", "O1704361998540E2Cemx9S*123456789", "O1704361998540E2Cemx9S*123456789"); """
try {
sql "sync"
qt_sql """ select count() from ${indexTbName1} where c match_phrase_prefix 'O1704361998540E2Cemx9S'; """
qt_sql """ select count() from ${indexTbName1} where d match_phrase_prefix 'O1704361998540E2Cemx9S'; """
qt_sql """ select count() from ${indexTbName1} where c match_phrase_prefix 'O1704361998540E2Cemx9S=123456789'; """
qt_sql """ select count() from ${indexTbName1} where d match_phrase_prefix 'O1704361998540E2Cemx9S=123456789'; """
} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
}
}