[Feature-WIP](inverted index)(bkd) Support try query before query bkd to improve query efficiency (#16075)
This commit is contained in:
@ -251,6 +251,7 @@ E(INVERTED_INDEX_INVALID_PARAMETERS, -6000);
|
||||
E(INVERTED_INDEX_NOT_SUPPORTED, -6001);
|
||||
E(INVERTED_INDEX_CLUCENE_ERROR, -6002);
|
||||
E(INVERTED_INDEX_FILE_NOT_FOUND, -6003);
|
||||
E(INVERTED_INDEX_FILE_HIT_LIMIT, -6004);
|
||||
#undef E
|
||||
}; // namespace ErrorCode
|
||||
|
||||
@ -276,7 +277,8 @@ static constexpr bool capture_stacktrace() {
|
||||
&& code != ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS
|
||||
&& code != ErrorCode::INVERTED_INDEX_NOT_SUPPORTED
|
||||
&& code != ErrorCode::INVERTED_INDEX_CLUCENE_ERROR
|
||||
&& code != ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND;
|
||||
&& code != ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND
|
||||
&& code != ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT;
|
||||
}
|
||||
// clang-format on
|
||||
|
||||
|
||||
@ -60,7 +60,7 @@ Status MatchPredicate::evaluate(const Schema& schema, InvertedIndexIterator* ite
|
||||
char buf[column_desc->get_sub_field(0)->type_info()->size()];
|
||||
column_desc->get_sub_field(0)->from_string(buf, _value);
|
||||
s = iterator->read_from_inverted_index(column_desc->name(), buf, inverted_index_query_type,
|
||||
num_rows, &roaring);
|
||||
num_rows, &roaring, true);
|
||||
}
|
||||
*bitmap &= roaring;
|
||||
return s;
|
||||
|
||||
@ -359,6 +359,26 @@ Status BkdIndexReader::bkd_query(const std::string& column_name, const void* que
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status BkdIndexReader::try_query(const std::string& column_name, const void* query_value,
|
||||
InvertedIndexQueryType query_type,
|
||||
InvertedIndexParserType analyser_type, uint32_t* count) {
|
||||
uint64_t start = UnixMillis();
|
||||
auto visitor = std::make_unique<InvertedIndexVisitor>(nullptr, query_type, true);
|
||||
std::shared_ptr<lucene::util::bkd::bkd_reader> r;
|
||||
try {
|
||||
RETURN_IF_ERROR(
|
||||
bkd_query(column_name, query_value, query_type, std::move(r), visitor.get()));
|
||||
*count = r->estimate_point_count(visitor.get());
|
||||
} catch (const CLuceneError& e) {
|
||||
LOG(WARNING) << "BKD Query CLuceneError Occurred, error msg: " << e.what();
|
||||
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>();
|
||||
}
|
||||
|
||||
LOG(INFO) << "BKD index try search time taken: " << UnixMillis() - start << "ms "
|
||||
<< " column: " << column_name << " result: " << *count;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status BkdIndexReader::query(const std::string& column_name, const void* query_value,
|
||||
InvertedIndexQueryType query_type,
|
||||
InvertedIndexParserType analyser_type, roaring::Roaring* bit_map) {
|
||||
@ -598,7 +618,19 @@ Status InvertedIndexIterator::read_from_inverted_index(const std::string& column
|
||||
const void* query_value,
|
||||
InvertedIndexQueryType query_type,
|
||||
uint32_t segment_num_rows,
|
||||
roaring::Roaring* bit_map) {
|
||||
roaring::Roaring* bit_map, bool skip_try) {
|
||||
if (!skip_try && _reader->type() == InvertedIndexReaderType::BKD) {
|
||||
auto query_bkd_limit_percent = config::query_bkd_inverted_index_limit_percent;
|
||||
uint32_t hit_count = 0;
|
||||
RETURN_IF_ERROR(
|
||||
try_read_from_inverted_index(column_name, query_value, query_type, &hit_count));
|
||||
if (hit_count > segment_num_rows * query_bkd_limit_percent / 100) {
|
||||
LOG(INFO) << "hit count: " << hit_count << "for bkd inverted reached limit "
|
||||
<< query_bkd_limit_percent << "%, segment num rows: " << segment_num_rows;
|
||||
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT>();
|
||||
}
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(_reader->query(column_name, query_value, query_type, _analyser_type, bit_map));
|
||||
return Status::OK();
|
||||
}
|
||||
@ -607,6 +639,15 @@ Status InvertedIndexIterator::try_read_from_inverted_index(const std::string& co
|
||||
const void* query_value,
|
||||
InvertedIndexQueryType query_type,
|
||||
uint32_t* count) {
|
||||
// NOTE: only bkd index support try read now.
|
||||
if (query_type == InvertedIndexQueryType::GREATER_EQUAL_QUERY ||
|
||||
query_type == InvertedIndexQueryType::GREATER_THAN_QUERY ||
|
||||
query_type == InvertedIndexQueryType::LESS_EQUAL_QUERY ||
|
||||
query_type == InvertedIndexQueryType::LESS_THAN_QUERY ||
|
||||
query_type == InvertedIndexQueryType::EQUAL_QUERY) {
|
||||
RETURN_IF_ERROR(
|
||||
_reader->try_query(column_name, query_value, query_type, _analyser_type, count));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@ -186,9 +186,7 @@ public:
|
||||
roaring::Roaring* bit_map) override;
|
||||
Status try_query(const std::string& column_name, const void* query_value,
|
||||
InvertedIndexQueryType query_type, InvertedIndexParserType analyser_type,
|
||||
uint32_t* count) override {
|
||||
return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>();
|
||||
}
|
||||
uint32_t* count) override;
|
||||
Status bkd_query(const std::string& column_name, const void* query_value,
|
||||
InvertedIndexQueryType query_type,
|
||||
std::shared_ptr<lucene::util::bkd::bkd_reader>&& r,
|
||||
@ -213,7 +211,7 @@ public:
|
||||
|
||||
Status read_from_inverted_index(const std::string& column_name, const void* query_value,
|
||||
InvertedIndexQueryType query_type, uint32_t segment_num_rows,
|
||||
roaring::Roaring* bit_map);
|
||||
roaring::Roaring* bit_map, bool skip_try = false);
|
||||
Status try_read_from_inverted_index(const std::string& column_name, const void* query_value,
|
||||
InvertedIndexQueryType query_type, uint32_t* count);
|
||||
|
||||
|
||||
@ -584,8 +584,9 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() {
|
||||
}
|
||||
|
||||
if (!res.ok()) {
|
||||
if (res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND &&
|
||||
pred->type() != PredicateType::MATCH) {
|
||||
if ((res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND &&
|
||||
pred->type() != PredicateType::MATCH) ||
|
||||
res.code() == ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT) {
|
||||
// downgrade without index query
|
||||
continue;
|
||||
}
|
||||
@ -655,8 +656,9 @@ Status SegmentIterator::_apply_inverted_index() {
|
||||
Status res = pred->evaluate(_schema, _inverted_index_iterators[unique_id], num_rows(),
|
||||
&bitmap);
|
||||
if (!res.ok()) {
|
||||
if (res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND &&
|
||||
pred->type() != PredicateType::MATCH) {
|
||||
if ((res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND &&
|
||||
pred->type() != PredicateType::MATCH) ||
|
||||
res.code() == ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT) {
|
||||
//downgrade without index query
|
||||
remaining_predicates.push_back(pred);
|
||||
continue;
|
||||
|
||||
Reference in New Issue
Block a user