[improvement](column reader) lazy load indices (#20456)
Currently when reading column data, all types of indice are read even if they are not actually used, this PR implements lazy load of indices.
This commit is contained in:
@ -179,7 +179,8 @@ ColumnReader::ColumnReader(const ColumnReaderOptions& opts, const ColumnMetaPB&
|
||||
_opts(opts),
|
||||
_num_rows(num_rows),
|
||||
_file_reader(std::move(file_reader)),
|
||||
_dict_encoding_type(UNKNOWN_DICT_ENCODING) {}
|
||||
_dict_encoding_type(UNKNOWN_DICT_ENCODING),
|
||||
_use_index_page_cache(!config::disable_storage_page_cache) {}
|
||||
|
||||
ColumnReader::~ColumnReader() = default;
|
||||
|
||||
@ -195,15 +196,20 @@ Status ColumnReader::init() {
|
||||
switch (index_meta.type()) {
|
||||
case ORDINAL_INDEX:
|
||||
_ordinal_index_meta = &index_meta.ordinal_index();
|
||||
_ordinal_index.reset(
|
||||
new OrdinalIndexReader(_file_reader, _ordinal_index_meta, _num_rows));
|
||||
break;
|
||||
case ZONE_MAP_INDEX:
|
||||
_zone_map_index_meta = &index_meta.zone_map_index();
|
||||
_zone_map_index.reset(new ZoneMapIndexReader(_file_reader, _zone_map_index_meta));
|
||||
break;
|
||||
case BITMAP_INDEX:
|
||||
_bitmap_index_meta = &index_meta.bitmap_index();
|
||||
_bitmap_index.reset(new BitmapIndexReader(_file_reader, _bitmap_index_meta));
|
||||
break;
|
||||
case BLOOM_FILTER_INDEX:
|
||||
_bf_index_meta = &index_meta.bloom_filter_index();
|
||||
_bloom_filter_index.reset(new BloomFilterIndexReader(_file_reader, _bf_index_meta));
|
||||
break;
|
||||
default:
|
||||
return Status::Corruption("Bad file {}: invalid column index type {}",
|
||||
@ -220,7 +226,7 @@ Status ColumnReader::init() {
|
||||
}
|
||||
|
||||
Status ColumnReader::new_bitmap_index_iterator(BitmapIndexIterator** iterator) {
|
||||
RETURN_IF_ERROR(_ensure_index_loaded());
|
||||
RETURN_IF_ERROR(_load_bitmap_index(_use_index_page_cache, _opts.kept_in_memory));
|
||||
RETURN_IF_ERROR(_bitmap_index->new_iterator(iterator));
|
||||
return Status::OK();
|
||||
}
|
||||
@ -261,8 +267,6 @@ Status ColumnReader::read_page(const ColumnIteratorOptions& iter_opts, const Pag
|
||||
Status ColumnReader::get_row_ranges_by_zone_map(
|
||||
const AndBlockColumnPredicate* col_predicates,
|
||||
const std::vector<const ColumnPredicate*>* delete_predicates, RowRanges* row_ranges) {
|
||||
RETURN_IF_ERROR(_ensure_index_loaded());
|
||||
|
||||
std::vector<uint32_t> page_indexes;
|
||||
RETURN_IF_ERROR(_get_filtered_pages(col_predicates, delete_predicates, &page_indexes));
|
||||
RETURN_IF_ERROR(_calculate_row_ranges(page_indexes, row_ranges));
|
||||
@ -374,6 +378,8 @@ Status ColumnReader::_get_filtered_pages(
|
||||
const AndBlockColumnPredicate* col_predicates,
|
||||
const std::vector<const ColumnPredicate*>* delete_predicates,
|
||||
std::vector<uint32_t>* page_indexes) {
|
||||
RETURN_IF_ERROR(_load_zone_map_index(_use_index_page_cache, _opts.kept_in_memory));
|
||||
|
||||
FieldType type = _type_info->type();
|
||||
const std::vector<ZoneMapPB>& zone_maps = _zone_map_index->page_zone_maps();
|
||||
int32_t page_size = _zone_map_index->num_pages();
|
||||
@ -412,6 +418,7 @@ Status ColumnReader::_get_filtered_pages(
|
||||
Status ColumnReader::_calculate_row_ranges(const std::vector<uint32_t>& page_indexes,
|
||||
RowRanges* row_ranges) {
|
||||
row_ranges->clear();
|
||||
RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory));
|
||||
for (auto i : page_indexes) {
|
||||
ordinal_t page_first_id = _ordinal_index->get_first_ordinal(i);
|
||||
ordinal_t page_last_id = _ordinal_index->get_last_ordinal(i);
|
||||
@ -423,7 +430,8 @@ Status ColumnReader::_calculate_row_ranges(const std::vector<uint32_t>& page_ind
|
||||
|
||||
Status ColumnReader::get_row_ranges_by_bloom_filter(const AndBlockColumnPredicate* col_predicates,
|
||||
RowRanges* row_ranges) {
|
||||
RETURN_IF_ERROR(_ensure_index_loaded());
|
||||
RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory));
|
||||
RETURN_IF_ERROR(_load_bloom_filter_index(_use_index_page_cache, _opts.kept_in_memory));
|
||||
RowRanges bf_row_ranges;
|
||||
std::unique_ptr<BloomFilterIndexIterator> bf_iter;
|
||||
RETURN_IF_ERROR(_bloom_filter_index->new_iterator(&bf_iter));
|
||||
@ -455,22 +463,25 @@ Status ColumnReader::get_row_ranges_by_bloom_filter(const AndBlockColumnPredicat
|
||||
|
||||
Status ColumnReader::_load_ordinal_index(bool use_page_cache, bool kept_in_memory) {
|
||||
DCHECK(_ordinal_index_meta != nullptr);
|
||||
_ordinal_index.reset(new OrdinalIndexReader(_file_reader, _ordinal_index_meta, _num_rows));
|
||||
return _ordinal_index->load(use_page_cache, kept_in_memory);
|
||||
return _load_ordinal_index_once.call([this, use_page_cache, kept_in_memory] {
|
||||
return _ordinal_index->load(use_page_cache, kept_in_memory);
|
||||
});
|
||||
}
|
||||
|
||||
Status ColumnReader::_load_zone_map_index(bool use_page_cache, bool kept_in_memory) {
|
||||
if (_zone_map_index_meta != nullptr) {
|
||||
_zone_map_index.reset(new ZoneMapIndexReader(_file_reader, _zone_map_index_meta));
|
||||
return _zone_map_index->load(use_page_cache, kept_in_memory);
|
||||
return _load_zone_map_index_once.call([this, use_page_cache, kept_in_memory] {
|
||||
return _zone_map_index->load(use_page_cache, kept_in_memory);
|
||||
});
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ColumnReader::_load_bitmap_index(bool use_page_cache, bool kept_in_memory) {
|
||||
if (_bitmap_index_meta != nullptr) {
|
||||
_bitmap_index.reset(new BitmapIndexReader(_file_reader, _bitmap_index_meta));
|
||||
return _bitmap_index->load(use_page_cache, kept_in_memory);
|
||||
return _load_bitmap_index_once.call([this, use_page_cache, kept_in_memory] {
|
||||
return _bitmap_index->load(use_page_cache, kept_in_memory);
|
||||
});
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
@ -513,14 +524,15 @@ Status ColumnReader::_load_inverted_index_index(const TabletIndex* index_meta) {
|
||||
|
||||
Status ColumnReader::_load_bloom_filter_index(bool use_page_cache, bool kept_in_memory) {
|
||||
if (_bf_index_meta != nullptr) {
|
||||
_bloom_filter_index.reset(new BloomFilterIndexReader(_file_reader, _bf_index_meta));
|
||||
return _bloom_filter_index->load(use_page_cache, kept_in_memory);
|
||||
return _load_bloom_filter_index_once.call([this, use_page_cache, kept_in_memory] {
|
||||
return _bloom_filter_index->load(use_page_cache, kept_in_memory);
|
||||
});
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ColumnReader::seek_to_first(OrdinalPageIndexIterator* iter) {
|
||||
RETURN_IF_ERROR(_ensure_index_loaded());
|
||||
RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory));
|
||||
*iter = _ordinal_index->begin();
|
||||
if (!iter->valid()) {
|
||||
return Status::NotFound("Failed to seek to first rowid");
|
||||
@ -529,7 +541,7 @@ Status ColumnReader::seek_to_first(OrdinalPageIndexIterator* iter) {
|
||||
}
|
||||
|
||||
Status ColumnReader::seek_at_or_before(ordinal_t ordinal, OrdinalPageIndexIterator* iter) {
|
||||
RETURN_IF_ERROR(_ensure_index_loaded());
|
||||
RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory));
|
||||
*iter = _ordinal_index->seek_at_or_before(ordinal);
|
||||
if (!iter->valid()) {
|
||||
return Status::NotFound("Failed to seek to ordinal {}, ", ordinal);
|
||||
|
||||
@ -173,26 +173,13 @@ public:
|
||||
|
||||
DictEncodingType get_dict_encoding_type() { return _dict_encoding_type; }
|
||||
|
||||
void disable_index_meta_cache() { _index_meta_use_page_cache = false; }
|
||||
void disable_index_meta_cache() { _use_index_page_cache = false; }
|
||||
|
||||
private:
|
||||
ColumnReader(const ColumnReaderOptions& opts, const ColumnMetaPB& meta, uint64_t num_rows,
|
||||
io::FileReaderSPtr file_reader);
|
||||
Status init();
|
||||
|
||||
// Read and load necessary column indexes into memory if it hasn't been loaded.
|
||||
// May be called multiple times, subsequent calls will no op.
|
||||
Status _ensure_index_loaded() {
|
||||
return _load_index_once.call([this] {
|
||||
bool use_page_cache = !config::disable_storage_page_cache && _index_meta_use_page_cache;
|
||||
RETURN_IF_ERROR(_load_zone_map_index(use_page_cache, _opts.kept_in_memory));
|
||||
RETURN_IF_ERROR(_load_ordinal_index(use_page_cache, _opts.kept_in_memory));
|
||||
RETURN_IF_ERROR(_load_bitmap_index(use_page_cache, _opts.kept_in_memory));
|
||||
RETURN_IF_ERROR(_load_bloom_filter_index(use_page_cache, _opts.kept_in_memory));
|
||||
return Status::OK();
|
||||
});
|
||||
}
|
||||
|
||||
// Read column inverted indexes into memory
|
||||
// May be called multiple times, subsequent calls will no op.
|
||||
Status _ensure_inverted_index_loaded(const TabletIndex* index_meta) {
|
||||
@ -201,11 +188,11 @@ private:
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status _load_zone_map_index(bool use_page_cache, bool kept_in_memory);
|
||||
Status _load_ordinal_index(bool use_page_cache, bool kept_in_memory);
|
||||
Status _load_bitmap_index(bool use_page_cache, bool kept_in_memory);
|
||||
Status _load_inverted_index_index(const TabletIndex* index_meta);
|
||||
Status _load_bloom_filter_index(bool use_page_cache, bool kept_in_memory);
|
||||
[[nodiscard]] Status _load_zone_map_index(bool use_page_cache, bool kept_in_memory);
|
||||
[[nodiscard]] Status _load_ordinal_index(bool use_page_cache, bool kept_in_memory);
|
||||
[[nodiscard]] Status _load_bitmap_index(bool use_page_cache, bool kept_in_memory);
|
||||
[[nodiscard]] Status _load_inverted_index_index(const TabletIndex* index_meta);
|
||||
[[nodiscard]] Status _load_bloom_filter_index(bool use_page_cache, bool kept_in_memory);
|
||||
|
||||
bool _zone_map_match_condition(const ZoneMapPB& zone_map, WrapperField* min_value_container,
|
||||
WrapperField* max_value_container,
|
||||
@ -237,20 +224,25 @@ private:
|
||||
const EncodingInfo* _encoding_info =
|
||||
nullptr; // initialized in init(), used for create PageDecoder
|
||||
|
||||
bool _use_index_page_cache;
|
||||
|
||||
// meta for various column indexes (null if the index is absent)
|
||||
bool _index_meta_use_page_cache = true;
|
||||
const ZoneMapIndexPB* _zone_map_index_meta = nullptr;
|
||||
const OrdinalIndexPB* _ordinal_index_meta = nullptr;
|
||||
const BitmapIndexPB* _bitmap_index_meta = nullptr;
|
||||
const BloomFilterIndexPB* _bf_index_meta = nullptr;
|
||||
|
||||
DorisCallOnce<Status> _load_index_once;
|
||||
mutable std::mutex _load_index_lock;
|
||||
std::unique_ptr<ZoneMapIndexReader> _zone_map_index;
|
||||
std::unique_ptr<OrdinalIndexReader> _ordinal_index;
|
||||
std::unique_ptr<BitmapIndexReader> _bitmap_index;
|
||||
std::unique_ptr<InvertedIndexReader> _inverted_index;
|
||||
std::unique_ptr<BloomFilterIndexReader> _bloom_filter_index;
|
||||
DorisCallOnce<Status> _load_zone_map_index_once;
|
||||
DorisCallOnce<Status> _load_ordinal_index_once;
|
||||
DorisCallOnce<Status> _load_bitmap_index_once;
|
||||
DorisCallOnce<Status> _load_bloom_filter_index_once;
|
||||
DorisCallOnce<Status> _load_inverted_index_once;
|
||||
|
||||
std::vector<std::unique_ptr<ColumnReader>> _sub_readers;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user