[Feature-WIP](inverted index)(bkd) Support try query before query bkd to improve query efficiency (#16075)

This commit is contained in:
YueW
2023-01-20 11:19:36 +08:00
committed by GitHub
parent 6c5470b163
commit 6485221ffb
5 changed files with 54 additions and 11 deletions

View File

@ -251,6 +251,7 @@ E(INVERTED_INDEX_INVALID_PARAMETERS, -6000);
E(INVERTED_INDEX_NOT_SUPPORTED, -6001);
E(INVERTED_INDEX_CLUCENE_ERROR, -6002);
E(INVERTED_INDEX_FILE_NOT_FOUND, -6003);
E(INVERTED_INDEX_FILE_HIT_LIMIT, -6004);
#undef E
}; // namespace ErrorCode
@ -276,7 +277,8 @@ static constexpr bool capture_stacktrace() {
&& code != ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS
&& code != ErrorCode::INVERTED_INDEX_NOT_SUPPORTED
&& code != ErrorCode::INVERTED_INDEX_CLUCENE_ERROR
&& code != ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND;
&& code != ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND
&& code != ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT;
}
// clang-format on

View File

@ -60,7 +60,7 @@ Status MatchPredicate::evaluate(const Schema& schema, InvertedIndexIterator* ite
char buf[column_desc->get_sub_field(0)->type_info()->size()];
column_desc->get_sub_field(0)->from_string(buf, _value);
s = iterator->read_from_inverted_index(column_desc->name(), buf, inverted_index_query_type,
num_rows, &roaring);
num_rows, &roaring, true);
}
*bitmap &= roaring;
return s;

View File

@ -359,6 +359,26 @@ Status BkdIndexReader::bkd_query(const std::string& column_name, const void* que
return Status::OK();
}
Status BkdIndexReader::try_query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type,
InvertedIndexParserType analyser_type, uint32_t* count) {
uint64_t start = UnixMillis();
auto visitor = std::make_unique<InvertedIndexVisitor>(nullptr, query_type, true);
std::shared_ptr<lucene::util::bkd::bkd_reader> r;
try {
RETURN_IF_ERROR(
bkd_query(column_name, query_value, query_type, std::move(r), visitor.get()));
*count = r->estimate_point_count(visitor.get());
} catch (const CLuceneError& e) {
LOG(WARNING) << "BKD Query CLuceneError Occurred, error msg: " << e.what();
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>();
}
LOG(INFO) << "BKD index try search time taken: " << UnixMillis() - start << "ms "
<< " column: " << column_name << " result: " << *count;
return Status::OK();
}
Status BkdIndexReader::query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type,
InvertedIndexParserType analyser_type, roaring::Roaring* bit_map) {
@ -598,7 +618,19 @@ Status InvertedIndexIterator::read_from_inverted_index(const std::string& column
const void* query_value,
InvertedIndexQueryType query_type,
uint32_t segment_num_rows,
roaring::Roaring* bit_map) {
roaring::Roaring* bit_map, bool skip_try) {
if (!skip_try && _reader->type() == InvertedIndexReaderType::BKD) {
auto query_bkd_limit_percent = config::query_bkd_inverted_index_limit_percent;
uint32_t hit_count = 0;
RETURN_IF_ERROR(
try_read_from_inverted_index(column_name, query_value, query_type, &hit_count));
if (hit_count > segment_num_rows * query_bkd_limit_percent / 100) {
LOG(INFO) << "hit count: " << hit_count << "for bkd inverted reached limit "
<< query_bkd_limit_percent << "%, segment num rows: " << segment_num_rows;
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT>();
}
}
RETURN_IF_ERROR(_reader->query(column_name, query_value, query_type, _analyser_type, bit_map));
return Status::OK();
}
@ -607,6 +639,15 @@ Status InvertedIndexIterator::try_read_from_inverted_index(const std::string& co
const void* query_value,
InvertedIndexQueryType query_type,
uint32_t* count) {
// NOTE: only bkd index support try read now.
if (query_type == InvertedIndexQueryType::GREATER_EQUAL_QUERY ||
query_type == InvertedIndexQueryType::GREATER_THAN_QUERY ||
query_type == InvertedIndexQueryType::LESS_EQUAL_QUERY ||
query_type == InvertedIndexQueryType::LESS_THAN_QUERY ||
query_type == InvertedIndexQueryType::EQUAL_QUERY) {
RETURN_IF_ERROR(
_reader->try_query(column_name, query_value, query_type, _analyser_type, count));
}
return Status::OK();
}

View File

@ -186,9 +186,7 @@ public:
roaring::Roaring* bit_map) override;
Status try_query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, InvertedIndexParserType analyser_type,
uint32_t* count) override {
return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>();
}
uint32_t* count) override;
Status bkd_query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type,
std::shared_ptr<lucene::util::bkd::bkd_reader>&& r,
@ -213,7 +211,7 @@ public:
Status read_from_inverted_index(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, uint32_t segment_num_rows,
roaring::Roaring* bit_map);
roaring::Roaring* bit_map, bool skip_try = false);
Status try_read_from_inverted_index(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, uint32_t* count);

View File

@ -584,8 +584,9 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() {
}
if (!res.ok()) {
if (res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND &&
pred->type() != PredicateType::MATCH) {
if ((res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND &&
pred->type() != PredicateType::MATCH) ||
res.code() == ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT) {
// downgrade without index query
continue;
}
@ -655,8 +656,9 @@ Status SegmentIterator::_apply_inverted_index() {
Status res = pred->evaluate(_schema, _inverted_index_iterators[unique_id], num_rows(),
&bitmap);
if (!res.ok()) {
if (res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND &&
pred->type() != PredicateType::MATCH) {
if ((res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND &&
pred->type() != PredicateType::MATCH) ||
res.code() == ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT) {
//downgrade without index query
remaining_predicates.push_back(pred);
continue;