[fix](inverted index) fix match_phrase_ edge query result error #38327 (#38740)

This commit is contained in:
zzzxl
2024-08-01 23:17:53 +08:00
committed by GitHub
parent 4d980b8235
commit 0da388ade5
4 changed files with 40 additions and 10 deletions

View File

@ -31,7 +31,9 @@ namespace doris::segment_v2 {
PhraseEdgeQuery::PhraseEdgeQuery(const std::shared_ptr<lucene::search::IndexSearcher>& searcher,
const TQueryOptions& query_options)
: _searcher(searcher), _query(std::make_unique<CL_NS(search)::MultiPhraseQuery>()) {}
: _searcher(searcher),
_query(std::make_unique<CL_NS(search)::MultiPhraseQuery>()),
_max_expansions(query_options.inverted_index_max_expansions) {}
void PhraseEdgeQuery::add(const std::wstring& field_name, const std::vector<std::string>& terms) {
if (terms.empty()) {
@ -50,9 +52,9 @@ void PhraseEdgeQuery::search(roaring::Roaring& roaring) {
}
void PhraseEdgeQuery::search_one_term(roaring::Roaring& roaring) {
size_t count = 0;
bool first = true;
std::wstring sub_term = StringUtil::string_to_wstring(_terms[0]);
find_words([this, &count, &sub_term, &roaring](Term* term) {
find_words([this, &first, &sub_term, &roaring](Term* term) {
std::wstring_view ws_term(term->text(), term->textLength());
if (ws_term.find(sub_term) == std::wstring::npos) {
return;
@ -70,12 +72,12 @@ void PhraseEdgeQuery::search_one_term(roaring::Roaring& roaring) {
}
_CLDELETE(term_doc);
if (count) {
if (!first) {
roaring.swap(result);
first = false;
} else {
roaring |= result;
}
count++;
});
}
@ -86,15 +88,19 @@ void PhraseEdgeQuery::search_multi_term(roaring::Roaring& roaring) {
std::vector<CL_NS(index)::Term*> suffix_terms;
std::vector<CL_NS(index)::Term*> prefix_terms;
find_words([&suffix_term, &suffix_terms, &prefix_term, &prefix_terms](Term* term) {
find_words([this, &suffix_term, &suffix_terms, &prefix_term, &prefix_terms](Term* term) {
std::wstring_view ws_term(term->text(), term->textLength());
if (ws_term.ends_with(suffix_term)) {
suffix_terms.push_back(_CL_POINTER(term));
if (_max_expansions == 0 || suffix_terms.size() < _max_expansions) {
if (ws_term.ends_with(suffix_term)) {
suffix_terms.push_back(_CL_POINTER(term));
}
}
if (ws_term.starts_with(prefix_term)) {
prefix_terms.push_back(_CL_POINTER(term));
if (_max_expansions == 0 || prefix_terms.size() < _max_expansions) {
if (ws_term.starts_with(prefix_term)) {
prefix_terms.push_back(_CL_POINTER(term));
}
}
});

View File

@ -52,6 +52,7 @@ private:
std::wstring _field_name;
std::vector<std::string> _terms;
std::unique_ptr<CL_NS(search)::MultiPhraseQuery> _query;
int32_t _max_expansions = 50;
};
} // namespace doris::segment_v2

View File

@ -29,3 +29,15 @@
-- !sql --
10 nav_tickets_off.gif 习惯于生活中很多 nav tickets off gif 虚假 nav tickets off gif 美化的人来说
-- !sql --
2
-- !sql --
4
-- !sql --
11
-- !sql --
6

View File

@ -48,6 +48,12 @@ suite("test_index_match_phrase_edge", "p0"){
sql """ INSERT INTO ${indexTbName1} VALUES (9, "hm_bg.jpg", "前几日 hm bg jpg 在别处 hm bg jpg 购得"); """
sql """ INSERT INTO ${indexTbName1} VALUES (10, "nav_tickets_off.gif", "习惯于生活中很多 nav tickets off gif 虚假 nav tickets off gif 美化的人来说"); """
sql """ INSERT INTO ${indexTbName1} VALUES (11, "40.135.0.0", "GET /images/hm_bg.jpg HTTP/1.0"); """
sql """ INSERT INTO ${indexTbName1} VALUES (12, "232.0.0.0", "GET /images/hm_bg.jpg HTTP/1.0"); """
sql """ INSERT INTO ${indexTbName1} VALUES (13, "26.1.0.0", "GET /images/hm_bg.jpg HTTP/1.0"); """
sql """ INSERT INTO ${indexTbName1} VALUES (14, "247.37.0.0", "GET /french/splash_inet.html HTTP/1.0"); """
sql """ INSERT INTO ${indexTbName1} VALUES (15, "247.37.0.0", "GET /images/hm_nbg.jpg HTTP/1.0"); """
try {
sql "sync"
@ -63,6 +69,11 @@ suite("test_index_match_phrase_edge", "p0"){
qt_sql """ select * from ${indexTbName1} where c match_phrase_edge 'ue off gif 家长 na'; """
qt_sql """ select * from ${indexTbName1} where c match_phrase_edge 'if 虚假 na'; """
qt_sql """ select count() from ${indexTbName1} where b match_phrase_edge '1'; """
qt_sql """ select count() from ${indexTbName1} where b match_phrase_edge '3'; """
qt_sql """ select count() from ${indexTbName1} where c match_phrase_edge 'n'; """
qt_sql """ select count() from ${indexTbName1} where c match_phrase_edge 'b'; """
} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
}