diff --git a/be/src/common/status.h b/be/src/common/status.h index 46494db111..f16f65d087 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -251,6 +251,7 @@ E(INVERTED_INDEX_INVALID_PARAMETERS, -6000); E(INVERTED_INDEX_NOT_SUPPORTED, -6001); E(INVERTED_INDEX_CLUCENE_ERROR, -6002); E(INVERTED_INDEX_FILE_NOT_FOUND, -6003); +E(INVERTED_INDEX_FILE_HIT_LIMIT, -6004); #undef E }; // namespace ErrorCode @@ -276,7 +277,8 @@ static constexpr bool capture_stacktrace() { && code != ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS && code != ErrorCode::INVERTED_INDEX_NOT_SUPPORTED && code != ErrorCode::INVERTED_INDEX_CLUCENE_ERROR - && code != ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND; + && code != ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND + && code != ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT; } // clang-format on diff --git a/be/src/exprs/match_predicate.cpp b/be/src/exprs/match_predicate.cpp index 45b359f26d..972a1d6728 100644 --- a/be/src/exprs/match_predicate.cpp +++ b/be/src/exprs/match_predicate.cpp @@ -60,7 +60,7 @@ Status MatchPredicate::evaluate(const Schema& schema, InvertedIndexIterator* ite char buf[column_desc->get_sub_field(0)->type_info()->size()]; column_desc->get_sub_field(0)->from_string(buf, _value); s = iterator->read_from_inverted_index(column_desc->name(), buf, inverted_index_query_type, - num_rows, &roaring); + num_rows, &roaring, true); } *bitmap &= roaring; return s; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index 23e0d51b99..cff316f6ac 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -359,6 +359,26 @@ Status BkdIndexReader::bkd_query(const std::string& column_name, const void* que return Status::OK(); } +Status BkdIndexReader::try_query(const std::string& column_name, const void* query_value, + InvertedIndexQueryType query_type, + InvertedIndexParserType analyser_type, uint32_t* count) { + uint64_t start = UnixMillis(); + auto visitor = std::make_unique(nullptr, query_type, true); + std::shared_ptr r; + try { + RETURN_IF_ERROR( + bkd_query(column_name, query_value, query_type, std::move(r), visitor.get())); + *count = r->estimate_point_count(visitor.get()); + } catch (const CLuceneError& e) { + LOG(WARNING) << "BKD Query CLuceneError Occurred, error msg: " << e.what(); + return Status::Error(); + } + + LOG(INFO) << "BKD index try search time taken: " << UnixMillis() - start << "ms " + << " column: " << column_name << " result: " << *count; + return Status::OK(); +} + Status BkdIndexReader::query(const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, InvertedIndexParserType analyser_type, roaring::Roaring* bit_map) { @@ -598,7 +618,19 @@ Status InvertedIndexIterator::read_from_inverted_index(const std::string& column const void* query_value, InvertedIndexQueryType query_type, uint32_t segment_num_rows, - roaring::Roaring* bit_map) { + roaring::Roaring* bit_map, bool skip_try) { + if (!skip_try && _reader->type() == InvertedIndexReaderType::BKD) { + auto query_bkd_limit_percent = config::query_bkd_inverted_index_limit_percent; + uint32_t hit_count = 0; + RETURN_IF_ERROR( + try_read_from_inverted_index(column_name, query_value, query_type, &hit_count)); + if (hit_count > segment_num_rows * query_bkd_limit_percent / 100) { + LOG(INFO) << "hit count: " << hit_count << "for bkd inverted reached limit " + << query_bkd_limit_percent << "%, segment num rows: " << segment_num_rows; + return Status::Error(); + } + } + RETURN_IF_ERROR(_reader->query(column_name, query_value, query_type, _analyser_type, bit_map)); return Status::OK(); } @@ -607,6 +639,15 @@ Status InvertedIndexIterator::try_read_from_inverted_index(const std::string& co const void* query_value, InvertedIndexQueryType query_type, uint32_t* count) { + // NOTE: only bkd index support try read now. + if (query_type == InvertedIndexQueryType::GREATER_EQUAL_QUERY || + query_type == InvertedIndexQueryType::GREATER_THAN_QUERY || + query_type == InvertedIndexQueryType::LESS_EQUAL_QUERY || + query_type == InvertedIndexQueryType::LESS_THAN_QUERY || + query_type == InvertedIndexQueryType::EQUAL_QUERY) { + RETURN_IF_ERROR( + _reader->try_query(column_name, query_value, query_type, _analyser_type, count)); + } return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index 775900b5e5..5ff1d22222 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -186,9 +186,7 @@ public: roaring::Roaring* bit_map) override; Status try_query(const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, InvertedIndexParserType analyser_type, - uint32_t* count) override { - return Status::Error(); - } + uint32_t* count) override; Status bkd_query(const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr&& r, @@ -213,7 +211,7 @@ public: Status read_from_inverted_index(const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, uint32_t segment_num_rows, - roaring::Roaring* bit_map); + roaring::Roaring* bit_map, bool skip_try = false); Status try_read_from_inverted_index(const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, uint32_t* count); diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 7d3bde6c5d..110f6f87fc 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -584,8 +584,9 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() { } if (!res.ok()) { - if (res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND && - pred->type() != PredicateType::MATCH) { + if ((res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND && + pred->type() != PredicateType::MATCH) || + res.code() == ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT) { // downgrade without index query continue; } @@ -655,8 +656,9 @@ Status SegmentIterator::_apply_inverted_index() { Status res = pred->evaluate(_schema, _inverted_index_iterators[unique_id], num_rows(), &bitmap); if (!res.ok()) { - if (res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND && - pred->type() != PredicateType::MATCH) { + if ((res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND && + pred->type() != PredicateType::MATCH) || + res.code() == ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT) { //downgrade without index query remaining_predicates.push_back(pred); continue;