This commit is contained in:
@ -31,7 +31,9 @@ namespace doris::segment_v2 {
|
||||
|
||||
PhraseEdgeQuery::PhraseEdgeQuery(const std::shared_ptr<lucene::search::IndexSearcher>& searcher,
|
||||
const TQueryOptions& query_options)
|
||||
: _searcher(searcher), _query(std::make_unique<CL_NS(search)::MultiPhraseQuery>()) {}
|
||||
: _searcher(searcher),
|
||||
_query(std::make_unique<CL_NS(search)::MultiPhraseQuery>()),
|
||||
_max_expansions(query_options.inverted_index_max_expansions) {}
|
||||
|
||||
void PhraseEdgeQuery::add(const std::wstring& field_name, const std::vector<std::string>& terms) {
|
||||
if (terms.empty()) {
|
||||
@ -50,9 +52,9 @@ void PhraseEdgeQuery::search(roaring::Roaring& roaring) {
|
||||
}
|
||||
|
||||
void PhraseEdgeQuery::search_one_term(roaring::Roaring& roaring) {
|
||||
size_t count = 0;
|
||||
bool first = true;
|
||||
std::wstring sub_term = StringUtil::string_to_wstring(_terms[0]);
|
||||
find_words([this, &count, &sub_term, &roaring](Term* term) {
|
||||
find_words([this, &first, &sub_term, &roaring](Term* term) {
|
||||
std::wstring_view ws_term(term->text(), term->textLength());
|
||||
if (ws_term.find(sub_term) == std::wstring::npos) {
|
||||
return;
|
||||
@ -70,12 +72,12 @@ void PhraseEdgeQuery::search_one_term(roaring::Roaring& roaring) {
|
||||
}
|
||||
_CLDELETE(term_doc);
|
||||
|
||||
if (count) {
|
||||
if (!first) {
|
||||
roaring.swap(result);
|
||||
first = false;
|
||||
} else {
|
||||
roaring |= result;
|
||||
}
|
||||
count++;
|
||||
});
|
||||
}
|
||||
|
||||
@ -86,15 +88,19 @@ void PhraseEdgeQuery::search_multi_term(roaring::Roaring& roaring) {
|
||||
std::vector<CL_NS(index)::Term*> suffix_terms;
|
||||
std::vector<CL_NS(index)::Term*> prefix_terms;
|
||||
|
||||
find_words([&suffix_term, &suffix_terms, &prefix_term, &prefix_terms](Term* term) {
|
||||
find_words([this, &suffix_term, &suffix_terms, &prefix_term, &prefix_terms](Term* term) {
|
||||
std::wstring_view ws_term(term->text(), term->textLength());
|
||||
|
||||
if (ws_term.ends_with(suffix_term)) {
|
||||
suffix_terms.push_back(_CL_POINTER(term));
|
||||
if (_max_expansions == 0 || suffix_terms.size() < _max_expansions) {
|
||||
if (ws_term.ends_with(suffix_term)) {
|
||||
suffix_terms.push_back(_CL_POINTER(term));
|
||||
}
|
||||
}
|
||||
|
||||
if (ws_term.starts_with(prefix_term)) {
|
||||
prefix_terms.push_back(_CL_POINTER(term));
|
||||
if (_max_expansions == 0 || prefix_terms.size() < _max_expansions) {
|
||||
if (ws_term.starts_with(prefix_term)) {
|
||||
prefix_terms.push_back(_CL_POINTER(term));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@ -52,6 +52,7 @@ private:
|
||||
std::wstring _field_name;
|
||||
std::vector<std::string> _terms;
|
||||
std::unique_ptr<CL_NS(search)::MultiPhraseQuery> _query;
|
||||
int32_t _max_expansions = 50;
|
||||
};
|
||||
|
||||
} // namespace doris::segment_v2
|
||||
@ -29,3 +29,15 @@
|
||||
-- !sql --
|
||||
10 nav_tickets_off.gif 习惯于生活中很多 nav tickets off gif 虚假 nav tickets off gif 美化的人来说
|
||||
|
||||
-- !sql --
|
||||
2
|
||||
|
||||
-- !sql --
|
||||
4
|
||||
|
||||
-- !sql --
|
||||
11
|
||||
|
||||
-- !sql --
|
||||
6
|
||||
|
||||
|
||||
@ -48,6 +48,12 @@ suite("test_index_match_phrase_edge", "p0"){
|
||||
sql """ INSERT INTO ${indexTbName1} VALUES (9, "hm_bg.jpg", "前几日 hm bg jpg 在别处 hm bg jpg 购得"); """
|
||||
sql """ INSERT INTO ${indexTbName1} VALUES (10, "nav_tickets_off.gif", "习惯于生活中很多 nav tickets off gif 虚假 nav tickets off gif 美化的人来说"); """
|
||||
|
||||
sql """ INSERT INTO ${indexTbName1} VALUES (11, "40.135.0.0", "GET /images/hm_bg.jpg HTTP/1.0"); """
|
||||
sql """ INSERT INTO ${indexTbName1} VALUES (12, "232.0.0.0", "GET /images/hm_bg.jpg HTTP/1.0"); """
|
||||
sql """ INSERT INTO ${indexTbName1} VALUES (13, "26.1.0.0", "GET /images/hm_bg.jpg HTTP/1.0"); """
|
||||
sql """ INSERT INTO ${indexTbName1} VALUES (14, "247.37.0.0", "GET /french/splash_inet.html HTTP/1.0"); """
|
||||
sql """ INSERT INTO ${indexTbName1} VALUES (15, "247.37.0.0", "GET /images/hm_nbg.jpg HTTP/1.0"); """
|
||||
|
||||
try {
|
||||
sql "sync"
|
||||
|
||||
@ -63,6 +69,11 @@ suite("test_index_match_phrase_edge", "p0"){
|
||||
qt_sql """ select * from ${indexTbName1} where c match_phrase_edge 'ue off gif 家长 na'; """
|
||||
qt_sql """ select * from ${indexTbName1} where c match_phrase_edge 'if 虚假 na'; """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where b match_phrase_edge '1'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where b match_phrase_edge '3'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where c match_phrase_edge 'n'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where c match_phrase_edge 'b'; """
|
||||
|
||||
} finally {
|
||||
//try_sql("DROP TABLE IF EXISTS ${testTable}")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user