[improvement](segment) reduce memory footprint of column_reader and segment (#24140)

This commit is contained in:
Yongqiang YANG
2023-09-11 21:54:00 +08:00
committed by GitHub
parent 0c30fff811
commit 1228995dec
16 changed files with 145 additions and 129 deletions

View File

@ -105,7 +105,7 @@ Status PrimaryKeyIndexReader::parse_bf(io::FileReaderSPtr file_reader,
// parse bloom filter
segment_v2::ColumnIndexMetaPB column_index_meta = meta.bloom_filter_index();
segment_v2::BloomFilterIndexReader bf_index_reader(std::move(file_reader),
&column_index_meta.bloom_filter_index());
column_index_meta.bloom_filter_index());
RETURN_IF_ERROR(bf_index_reader.load(!config::disable_pk_storage_page_cache, false));
std::unique_ptr<segment_v2::BloomFilterIndexIterator> bf_iter;
RETURN_IF_ERROR(bf_index_reader.new_iterator(&bf_iter));

View File

@ -32,16 +32,15 @@
namespace doris {
namespace segment_v2 {
Status BitmapIndexReader::load(bool use_page_cache, bool kept_in_memory,
const BitmapIndexPB* index_meta) {
Status BitmapIndexReader::load(bool use_page_cache, bool kept_in_memory) {
// TODO yyq: implement a new once flag to avoid status construct.
return _load_once.call([this, use_page_cache, kept_in_memory, index_meta] {
return _load(use_page_cache, kept_in_memory, index_meta);
return _load_once.call([this, use_page_cache, kept_in_memory] {
return _load(use_page_cache, kept_in_memory, std::move(_index_meta));
});
}
Status BitmapIndexReader::_load(bool use_page_cache, bool kept_in_memory,
const BitmapIndexPB* index_meta) {
std::unique_ptr<BitmapIndexPB> index_meta) {
const IndexedColumnMetaPB& dict_meta = index_meta->dict_column();
const IndexedColumnMetaPB& bitmap_meta = index_meta->bitmap_column();
_has_null = index_meta->has_null();

View File

@ -43,11 +43,13 @@ class BitmapIndexPB;
class BitmapIndexReader {
public:
explicit BitmapIndexReader(io::FileReaderSPtr file_reader)
explicit BitmapIndexReader(io::FileReaderSPtr file_reader, const BitmapIndexPB& index_meta)
: _file_reader(std::move(file_reader)),
_type_info(get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>()) {}
_type_info(get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>()) {
_index_meta.reset(new BitmapIndexPB(index_meta));
}
Status load(bool use_page_cache, bool kept_in_memory, const BitmapIndexPB*);
Status load(bool use_page_cache, bool kept_in_memory);
// create a new column iterator. Client should delete returned iterator
Status new_iterator(BitmapIndexIterator** iterator);
@ -57,7 +59,7 @@ public:
const TypeInfo* type_info() { return _type_info; }
private:
Status _load(bool use_page_cache, bool kept_in_memory, const BitmapIndexPB*);
Status _load(bool use_page_cache, bool kept_in_memory, std::unique_ptr<BitmapIndexPB>);
private:
friend class BitmapIndexIterator;
@ -68,6 +70,7 @@ private:
DorisCallOnce<Status> _load_once;
std::unique_ptr<IndexedColumnReader> _dict_column_reader;
std::unique_ptr<IndexedColumnReader> _bitmap_column_reader;
std::unique_ptr<BitmapIndexPB> _index_meta;
};
class BitmapIndexIterator {

View File

@ -41,13 +41,16 @@ class BloomFilterIndexPB;
class BloomFilterIndexReader {
public:
explicit BloomFilterIndexReader(io::FileReaderSPtr file_reader,
const BloomFilterIndexPB* bloom_filter_index_meta)
const BloomFilterIndexPB& bloom_filter_index_meta)
: _file_reader(std::move(file_reader)),
_type_info(get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>()),
_bloom_filter_index_meta(bloom_filter_index_meta) {}
_type_info(get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>()) {
_bloom_filter_index_meta.reset(new BloomFilterIndexPB(bloom_filter_index_meta));
}
Status load(bool use_page_cache, bool kept_in_memory);
BloomFilterAlgorithmPB algorithm() { return _bloom_filter_index_meta->algorithm(); }
// create a new column iterator.
Status new_iterator(std::unique_ptr<BloomFilterIndexIterator>* iterator);
@ -62,7 +65,7 @@ private:
io::FileReaderSPtr _file_reader;
DorisCallOnce<Status> _load_once;
const TypeInfo* _type_info;
const BloomFilterIndexPB* _bloom_filter_index_meta;
std::unique_ptr<BloomFilterIndexPB> _bloom_filter_index_meta;
std::unique_ptr<IndexedColumnReader> _bloom_filter_reader;
};

View File

@ -179,11 +179,11 @@ Status ColumnReader::create(const ColumnReaderOptions& opts, const ColumnMetaPB&
ColumnReader::ColumnReader(const ColumnReaderOptions& opts, const ColumnMetaPB& meta,
uint64_t num_rows, io::FileReaderSPtr file_reader)
: _opts(opts),
: _use_index_page_cache(!config::disable_storage_page_cache),
_opts(opts),
_num_rows(num_rows),
_file_reader(std::move(file_reader)),
_dict_encoding_type(UNKNOWN_DICT_ENCODING),
_use_index_page_cache(!config::disable_storage_page_cache) {
_dict_encoding_type(UNKNOWN_DICT_ENCODING) {
_meta_length = meta.length();
_meta_type = (FieldType)meta.type();
if (_meta_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
@ -207,32 +207,35 @@ Status ColumnReader::init(const ColumnMetaPB* meta) {
auto& index_meta = meta->indexes(i);
switch (index_meta.type()) {
case ORDINAL_INDEX:
_ordinal_index_meta = &index_meta.ordinal_index();
_ordinal_index.reset(new OrdinalIndexReader(_file_reader, _num_rows));
_ordinal_index.reset(
new OrdinalIndexReader(_file_reader, _num_rows, index_meta.ordinal_index()));
break;
case ZONE_MAP_INDEX:
_zone_map_index_meta = &index_meta.zone_map_index();
_zone_map_index.reset(new ZoneMapIndexReader(_file_reader));
_segment_zone_map =
std::make_unique<ZoneMapPB>(index_meta.zone_map_index().segment_zone_map());
_zone_map_index.reset(new ZoneMapIndexReader(
_file_reader, index_meta.zone_map_index().page_zone_maps()));
break;
case BITMAP_INDEX:
_bitmap_index_meta = &index_meta.bitmap_index();
_bitmap_index.reset(new BitmapIndexReader(_file_reader));
_bitmap_index.reset(new BitmapIndexReader(_file_reader, index_meta.bitmap_index()));
break;
case BLOOM_FILTER_INDEX:
_bf_index_meta = &index_meta.bloom_filter_index();
_bloom_filter_index.reset(new BloomFilterIndexReader(_file_reader, _bf_index_meta));
_bloom_filter_index.reset(
new BloomFilterIndexReader(_file_reader, index_meta.bloom_filter_index()));
break;
default:
return Status::Corruption("Bad file {}: invalid column index type {}",
_file_reader->path().native(), index_meta.type());
}
}
// ArrayColumnWriter writes a single empty array and flushes. In this scenario,
// the item writer doesn't write any data and the corresponding ordinal index is empty.
if (_ordinal_index_meta == nullptr && !is_empty()) {
if (_ordinal_index == nullptr && !is_empty()) {
return Status::Corruption("Bad file {}: missing ordinal index for column {}",
_file_reader->path().native(), meta->column_id());
}
return Status::OK();
}
@ -290,8 +293,7 @@ Status ColumnReader::next_batch_of_zone_map(size_t* n, vectorized::MutableColumn
FieldType type = _type_info->type();
std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, _meta_length));
std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, _meta_length));
_parse_zone_map_skip_null(_zone_map_index_meta->segment_zone_map(), min_value.get(),
max_value.get());
_parse_zone_map_skip_null(*_segment_zone_map, min_value.get(), max_value.get());
dst->reserve(*n);
bool is_string = is_olap_string_type(type);
@ -325,16 +327,16 @@ Status ColumnReader::next_batch_of_zone_map(size_t* n, vectorized::MutableColumn
}
bool ColumnReader::match_condition(const AndBlockColumnPredicate* col_predicates) const {
if (_zone_map_index_meta == nullptr) {
if (_zone_map_index == nullptr) {
return true;
}
FieldType type = _type_info->type();
std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, _meta_length));
std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, _meta_length));
_parse_zone_map(_zone_map_index_meta->segment_zone_map(), min_value.get(), max_value.get());
_parse_zone_map(*_segment_zone_map, min_value.get(), max_value.get());
return _zone_map_match_condition(_zone_map_index_meta->segment_zone_map(), min_value.get(),
max_value.get(), col_predicates);
return _zone_map_match_condition(*_segment_zone_map, min_value.get(), max_value.get(),
col_predicates);
}
void ColumnReader::_parse_zone_map(const ZoneMapPB& zone_map, WrapperField* min_value_container,
@ -473,20 +475,19 @@ Status ColumnReader::get_row_ranges_by_bloom_filter(const AndBlockColumnPredicat
}
Status ColumnReader::_load_ordinal_index(bool use_page_cache, bool kept_in_memory) {
DCHECK(_ordinal_index_meta != nullptr);
return _ordinal_index->load(use_page_cache, kept_in_memory, _ordinal_index_meta);
return _ordinal_index->load(use_page_cache, kept_in_memory);
}
Status ColumnReader::_load_zone_map_index(bool use_page_cache, bool kept_in_memory) {
if (_zone_map_index_meta != nullptr) {
return _zone_map_index->load(use_page_cache, kept_in_memory, _zone_map_index_meta);
if (_zone_map_index != nullptr) {
return _zone_map_index->load(use_page_cache, kept_in_memory);
}
return Status::OK();
}
Status ColumnReader::_load_bitmap_index(bool use_page_cache, bool kept_in_memory) {
if (_bitmap_index_meta != nullptr) {
return _bitmap_index->load(use_page_cache, kept_in_memory, _bitmap_index_meta);
if (_bitmap_index != nullptr) {
return _bitmap_index->load(use_page_cache, kept_in_memory);
}
return Status::OK();
}
@ -527,8 +528,18 @@ Status ColumnReader::_load_inverted_index_index(const TabletIndex* index_meta) {
return Status::OK();
}
bool ColumnReader::has_bloom_filter_index(bool ngram) const {
if (_bloom_filter_index == nullptr) return false;
if (ngram) {
return _bloom_filter_index->algorithm() == BloomFilterAlgorithmPB::NGRAM_BLOOM_FILTER;
} else {
return _bloom_filter_index->algorithm() != BloomFilterAlgorithmPB::NGRAM_BLOOM_FILTER;
}
}
Status ColumnReader::_load_bloom_filter_index(bool use_page_cache, bool kept_in_memory) {
if (_bf_index_meta != nullptr) {
if (_bloom_filter_index != nullptr) {
return _bloom_filter_index->load(use_page_cache, kept_in_memory);
}
return Status::OK();

View File

@ -137,18 +137,9 @@ public:
const EncodingInfo* encoding_info() const { return _encoding_info; }
bool has_zone_map() const { return _zone_map_index_meta != nullptr; }
bool has_bitmap_index() const { return _bitmap_index_meta != nullptr; }
bool has_bloom_filter_index(bool ngram) const {
if (_bf_index_meta == nullptr) return false;
if (ngram) {
return _bf_index_meta->algorithm() == BloomFilterAlgorithmPB::NGRAM_BLOOM_FILTER;
} else {
return _bf_index_meta->algorithm() != BloomFilterAlgorithmPB::NGRAM_BLOOM_FILTER;
}
}
bool has_zone_map() const { return _zone_map_index != nullptr; }
bool has_bitmap_index() const { return _bitmap_index != nullptr; }
bool has_bloom_filter_index(bool ngram) const;
// Check if this column could match `cond' using segment zone map.
// Since segment zone map is stored in metadata, this function is fast without I/O.
// Return true if segment zone map is absent or `cond' could be satisfied, false otherwise.
@ -226,6 +217,8 @@ private:
FieldType _meta_type;
FieldType _meta_children_column_type;
bool _meta_is_nullable;
bool _use_index_page_cache;
PagePointer _meta_dict_page;
CompressionTypePB _meta_compression;
@ -241,20 +234,15 @@ private:
const EncodingInfo* _encoding_info =
nullptr; // initialized in init(), used for create PageDecoder
bool _use_index_page_cache;
// meta for various column indexes (null if the index is absent)
const ZoneMapIndexPB* _zone_map_index_meta = nullptr;
const OrdinalIndexPB* _ordinal_index_meta = nullptr;
const BitmapIndexPB* _bitmap_index_meta = nullptr;
const BloomFilterIndexPB* _bf_index_meta = nullptr;
std::unique_ptr<ZoneMapPB> _segment_zone_map;
mutable std::mutex _load_index_lock;
std::unique_ptr<ZoneMapIndexReader> _zone_map_index;
std::unique_ptr<OrdinalIndexReader> _ordinal_index;
std::unique_ptr<BitmapIndexReader> _bitmap_index;
std::shared_ptr<InvertedIndexReader> _inverted_index;
std::unique_ptr<BloomFilterIndexReader> _bloom_filter_index;
std::shared_ptr<BloomFilterIndexReader> _bloom_filter_index;
std::vector<std::unique_ptr<ColumnReader>> _sub_readers;

View File

@ -67,16 +67,15 @@ Status OrdinalIndexWriter::finish(io::FileWriter* file_writer, ColumnIndexMetaPB
return Status::OK();
}
Status OrdinalIndexReader::load(bool use_page_cache, bool kept_in_memory,
const OrdinalIndexPB* index_meta) {
Status OrdinalIndexReader::load(bool use_page_cache, bool kept_in_memory) {
// TODO yyq: implement a new once flag to avoid status construct.
return _load_once.call([this, use_page_cache, kept_in_memory, index_meta] {
return _load(use_page_cache, kept_in_memory, index_meta);
return _load_once.call([this, use_page_cache, kept_in_memory] {
return _load(use_page_cache, kept_in_memory, std::move(_meta_pb));
});
}
Status OrdinalIndexReader::_load(bool use_page_cache, bool kept_in_memory,
const OrdinalIndexPB* index_meta) {
std::unique_ptr<OrdinalIndexPB> index_meta) {
if (index_meta->root_page().is_root_data_page()) {
// only one data page, no index page
_num_pages = 1;

View File

@ -66,11 +66,14 @@ class OrdinalPageIndexIterator;
class OrdinalIndexReader {
public:
explicit OrdinalIndexReader(io::FileReaderSPtr file_reader, ordinal_t num_values)
: _file_reader(std::move(file_reader)), _num_values(num_values) {}
explicit OrdinalIndexReader(io::FileReaderSPtr file_reader, ordinal_t num_values,
const OrdinalIndexPB& meta_pb)
: _file_reader(std::move(file_reader)), _num_values(num_values) {
_meta_pb.reset(new OrdinalIndexPB(meta_pb));
}
// load and parse the index page into memory
Status load(bool use_page_cache, bool kept_in_memory, const OrdinalIndexPB* index_meta);
Status load(bool use_page_cache, bool kept_in_memory);
// the returned iter points to the largest element which is less than `ordinal`,
// or points to the first element if all elements are greater than `ordinal`,
@ -88,7 +91,8 @@ public:
int32_t num_data_pages() const { return _num_pages; }
private:
Status _load(bool use_page_cache, bool kept_in_memory, const OrdinalIndexPB* index_meta);
Status _load(bool use_page_cache, bool kept_in_memory,
std::unique_ptr<OrdinalIndexPB> index_meta);
private:
friend OrdinalPageIndexIterator;
@ -96,6 +100,8 @@ private:
io::FileReaderSPtr _file_reader;
DorisCallOnce<Status> _load_once;
std::unique_ptr<OrdinalIndexPB> _meta_pb;
// total number of values (including NULLs) in the indexed column,
// equals to 1 + 'last ordinal of last data pages'
ordinal_t _num_values;

View File

@ -84,9 +84,9 @@ Status Segment::open(io::FileSystemSPtr fs, const std::string& path, uint32_t se
Segment::Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema)
: _segment_id(segment_id),
_meta_mem_usage(0),
_rowset_id(rowset_id),
_tablet_schema(tablet_schema),
_meta_mem_usage(0),
_segment_meta_mem_tracker(StorageEngine::instance()->segment_meta_mem_tracker()) {}
Segment::~Segment() {
@ -96,8 +96,16 @@ Segment::~Segment() {
}
Status Segment::_open() {
RETURN_IF_ERROR(_parse_footer());
RETURN_IF_ERROR(_create_column_readers());
SegmentFooterPB footer;
RETURN_IF_ERROR(_parse_footer(&footer));
RETURN_IF_ERROR(_create_column_readers(footer));
_pk_index_meta.reset(footer.has_primary_key_index_meta()
? new PrimaryKeyIndexMetaPB(footer.primary_key_index_meta())
: nullptr);
// delete_bitmap_calculator_test.cpp
// DCHECK(footer.has_short_key_index_page());
_sk_index_page = footer.short_key_index_page();
_num_rows = footer.num_rows();
return Status::OK();
}
@ -154,7 +162,7 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o
return iter->get()->init(read_options);
}
Status Segment::_parse_footer() {
Status Segment::_parse_footer(SegmentFooterPB* footer) {
// Footer := SegmentFooterPB, FooterPBSize(4), FooterPBChecksum(4), MagicNumber(4)
auto file_size = _file_reader->size();
if (file_size < 12) {
@ -170,7 +178,6 @@ Status Segment::_parse_footer() {
_file_reader->read_at(file_size - 12, Slice(fixed_buf, 12), &bytes_read, &io_ctx));
DCHECK_EQ(bytes_read, 12);
// validate magic number
if (memcmp(fixed_buf + 8, k_segment_magic, k_segment_magic_length) != 0) {
return Status::Corruption("Bad segment file {}: magic number not match",
_file_reader->path().native());
@ -182,8 +189,6 @@ Status Segment::_parse_footer() {
return Status::Corruption("Bad segment file {}: file size {} < {}",
_file_reader->path().native(), file_size, 12 + footer_length);
}
_meta_mem_usage += footer_length;
_segment_meta_mem_tracker->consume(footer_length);
std::string footer_buf;
footer_buf.resize(footer_length);
@ -201,7 +206,7 @@ Status Segment::_parse_footer() {
}
// deserialize footer PB
if (!_footer.ParseFromString(footer_buf)) {
if (!footer->ParseFromString(footer_buf)) {
return Status::Corruption("Bad segment file {}: failed to parse SegmentFooterPB",
_file_reader->path().native());
}
@ -210,10 +215,10 @@ Status Segment::_parse_footer() {
Status Segment::_load_pk_bloom_filter() {
DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS);
DCHECK(_footer.has_primary_key_index_meta());
DCHECK(_pk_index_meta != nullptr);
DCHECK(_pk_index_reader != nullptr);
return _load_pk_bf_once.call([this] {
RETURN_IF_ERROR(_pk_index_reader->parse_bf(_file_reader, _footer.primary_key_index_meta()));
RETURN_IF_ERROR(_pk_index_reader->parse_bf(_file_reader, *_pk_index_meta));
_meta_mem_usage += _pk_index_reader->get_bf_memory_size();
_segment_meta_mem_tracker->consume(_pk_index_reader->get_bf_memory_size());
return Status::OK();
@ -227,10 +232,9 @@ Status Segment::load_pk_index_and_bf() {
}
Status Segment::load_index() {
return _load_index_once.call([this] {
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _footer.has_primary_key_index_meta()) {
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr) {
_pk_index_reader.reset(new PrimaryKeyIndexReader());
RETURN_IF_ERROR(
_pk_index_reader->parse_index(_file_reader, _footer.primary_key_index_meta()));
RETURN_IF_ERROR(_pk_index_reader->parse_index(_file_reader, *_pk_index_meta));
_meta_mem_usage += _pk_index_reader->get_memory_size();
_segment_meta_mem_tracker->consume(_pk_index_reader->get_memory_size());
return Status::OK();
@ -238,7 +242,7 @@ Status Segment::load_index() {
// read and parse short key index page
PageReadOptions opts;
opts.file_reader = _file_reader.get();
opts.page_pointer = PagePointer(_footer.short_key_index_page());
opts.page_pointer = PagePointer(_sk_index_page);
opts.codec = nullptr; // short key index page uses NO_COMPRESSION for now
OlapReaderStatistics tmp_stats;
opts.use_page_cache = true;
@ -259,24 +263,26 @@ Status Segment::load_index() {
});
}
Status Segment::_create_column_readers() {
for (uint32_t ordinal = 0; ordinal < _footer.columns().size(); ++ordinal) {
auto& column_pb = _footer.columns(ordinal);
_column_id_to_footer_ordinal.emplace(column_pb.unique_id(), ordinal);
Status Segment::_create_column_readers(const SegmentFooterPB& footer) {
std::unordered_map<uint32_t, uint32_t> column_id_to_footer_ordinal;
for (uint32_t ordinal = 0; ordinal < footer.columns().size(); ++ordinal) {
auto& column_pb = footer.columns(ordinal);
column_id_to_footer_ordinal.emplace(column_pb.unique_id(), ordinal);
}
for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); ++ordinal) {
auto& column = _tablet_schema->column(ordinal);
auto iter = _column_id_to_footer_ordinal.find(column.unique_id());
if (iter == _column_id_to_footer_ordinal.end()) {
auto iter = column_id_to_footer_ordinal.find(column.unique_id());
if (iter == column_id_to_footer_ordinal.end()) {
continue;
}
ColumnReaderOptions opts;
opts.kept_in_memory = _tablet_schema->is_in_memory();
std::unique_ptr<ColumnReader> reader;
RETURN_IF_ERROR(ColumnReader::create(opts, _footer.columns(iter->second),
_footer.num_rows(), _file_reader, &reader));
RETURN_IF_ERROR(ColumnReader::create(opts, footer.columns(iter->second), footer.num_rows(),
_file_reader, &reader));
_column_readers.emplace(column.unique_id(), std::move(reader));
}
return Status::OK();

View File

@ -81,7 +81,7 @@ public:
RowsetId rowset_id() const { return _rowset_id; }
uint32_t num_rows() const { return _footer.num_rows(); }
uint32_t num_rows() const { return _num_rows; }
Status new_column_iterator(const TabletColumn& tablet_column,
std::unique_ptr<ColumnIterator>* iter);
@ -108,20 +108,17 @@ public:
Status read_key_by_rowid(uint32_t row_id, std::string* key);
// only used by UT
const SegmentFooterPB& footer() const { return _footer; }
Status load_index();
Status load_pk_index_and_bf();
std::string min_key() {
DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _footer.has_primary_key_index_meta());
return _footer.primary_key_index_meta().min_key();
DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr);
return _pk_index_meta->min_key();
}
std::string max_key() {
DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _footer.has_primary_key_index_meta());
return _footer.primary_key_index_meta().max_key();
DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr);
return _pk_index_meta->max_key();
}
io::FileReaderSPtr file_reader() { return _file_reader; }
@ -133,8 +130,8 @@ private:
Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema);
// open segment file and read the minimum amount of necessary information (footer)
Status _open();
Status _parse_footer();
Status _create_column_readers();
Status _parse_footer(SegmentFooterPB* footer);
Status _create_column_readers(const SegmentFooterPB& footer);
Status _load_pk_bloom_filter();
private:
@ -142,16 +139,14 @@ private:
io::FileReaderSPtr _file_reader;
uint32_t _segment_id;
uint32_t _num_rows;
int64_t _meta_mem_usage;
RowsetId _rowset_id;
TabletSchemaSPtr _tablet_schema;
int64_t _meta_mem_usage;
SegmentFooterPB _footer;
// Map from column unique id to column ordinal in footer's ColumnMetaPB
// If we can't find unique id from it, it means this segment is created
// with an old schema.
std::unordered_map<uint32_t, uint32_t> _column_id_to_footer_ordinal;
std::unique_ptr<PrimaryKeyIndexMetaPB> _pk_index_meta;
PagePointerPB _sk_index_page;
// map column unique id ---> column reader
// ColumnReader for each column in TabletSchema. If ColumnReader is nullptr,
@ -171,6 +166,7 @@ private:
std::unique_ptr<PrimaryKeyIndexReader> _pk_index_reader;
// Segment may be destructed after StorageEngine, in order to exit gracefully.
std::shared_ptr<MemTracker> _segment_meta_mem_tracker;
std::mutex _open_lock;
};
} // namespace segment_v2

View File

@ -145,17 +145,16 @@ Status TypedZoneMapIndexWriter<Type>::finish(io::FileWriter* file_writer,
return writer.finish(meta->mutable_page_zone_maps());
}
Status ZoneMapIndexReader::load(bool use_page_cache, bool kept_in_memory,
const ZoneMapIndexPB* index_meta) {
Status ZoneMapIndexReader::load(bool use_page_cache, bool kept_in_memory) {
// TODO yyq: implement a new once flag to avoid status construct.
return _load_once.call([this, use_page_cache, kept_in_memory, index_meta] {
return _load(use_page_cache, kept_in_memory, index_meta);
return _load_once.call([this, use_page_cache, kept_in_memory] {
return _load(use_page_cache, kept_in_memory, std::move(_page_zone_maps_meta));
});
}
Status ZoneMapIndexReader::_load(bool use_page_cache, bool kept_in_memory,
const ZoneMapIndexPB* index_meta) {
IndexedColumnReader reader(_file_reader, index_meta->page_zone_maps());
std::unique_ptr<IndexedColumnMetaPB> page_zone_maps_meta) {
IndexedColumnReader reader(_file_reader, *page_zone_maps_meta);
RETURN_IF_ERROR(reader.load(use_page_cache, kept_in_memory));
IndexedColumnIterator iter(&reader);

View File

@ -147,23 +147,27 @@ private:
class ZoneMapIndexReader {
public:
explicit ZoneMapIndexReader(io::FileReaderSPtr file_reader)
: _file_reader(std::move(file_reader)) {}
explicit ZoneMapIndexReader(io::FileReaderSPtr file_reader,
const IndexedColumnMetaPB& page_zone_maps)
: _file_reader(std::move(file_reader)) {
_page_zone_maps_meta.reset(new IndexedColumnMetaPB(page_zone_maps));
}
// load all page zone maps into memory
Status load(bool use_page_cache, bool kept_in_memory, const ZoneMapIndexPB*);
Status load(bool use_page_cache, bool kept_in_memory);
const std::vector<ZoneMapPB>& page_zone_maps() const { return _page_zone_maps; }
int32_t num_pages() const { return _page_zone_maps.size(); }
private:
Status _load(bool use_page_cache, bool kept_in_memory, const ZoneMapIndexPB*);
Status _load(bool use_page_cache, bool kept_in_memory, std::unique_ptr<IndexedColumnMetaPB>);
private:
DorisCallOnce<Status> _load_once;
// TODO: yyq, we shoud remove file_reader from here.
io::FileReaderSPtr _file_reader;
std::unique_ptr<IndexedColumnMetaPB> _page_zone_maps_meta;
std::vector<ZoneMapPB> _page_zone_maps;
};

View File

@ -80,8 +80,8 @@ void get_bitmap_reader_iter(const std::string& file_name, const ColumnIndexMetaP
BitmapIndexReader** reader, BitmapIndexIterator** iter) {
io::FileReaderSPtr file_reader;
ASSERT_EQ(io::global_local_filesystem()->open_file(file_name, &file_reader), Status::OK());
*reader = new BitmapIndexReader(std::move(file_reader));
auto st = (*reader)->load(true, false, &meta.bitmap_index());
*reader = new BitmapIndexReader(std::move(file_reader), meta.bitmap_index());
auto st = (*reader)->load(true, false);
EXPECT_TRUE(st.ok());
st = (*reader)->new_iterator(iter);

View File

@ -97,7 +97,7 @@ void get_bloom_filter_reader_iter(const std::string& file_name, const ColumnInde
std::string fname = dname + "/" + file_name;
io::FileReaderSPtr file_reader;
ASSERT_EQ(io::global_local_filesystem()->open_file(fname, &file_reader), Status::OK());
*reader = new BloomFilterIndexReader(std::move(file_reader), &meta.bloom_filter_index());
*reader = new BloomFilterIndexReader(std::move(file_reader), meta.bloom_filter_index());
auto st = (*reader)->load(true, false);
EXPECT_TRUE(st.ok());

View File

@ -70,8 +70,8 @@ TEST_F(OrdinalPageIndexTest, normal) {
io::FileReaderSPtr file_reader;
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
OrdinalIndexReader index(file_reader, 16 * 1024 * 4096 + 1);
EXPECT_TRUE(index.load(true, false, &index_meta.ordinal_index()).ok());
OrdinalIndexReader index(file_reader, 16 * 1024 * 4096 + 1, index_meta.ordinal_index());
EXPECT_TRUE(index.load(true, false).ok());
EXPECT_EQ(16 * 1024, index.num_data_pages());
EXPECT_EQ(1, index.get_first_ordinal(0));
EXPECT_EQ(4096, index.get_last_ordinal(0));
@ -124,8 +124,8 @@ TEST_F(OrdinalPageIndexTest, one_data_page) {
EXPECT_EQ(data_page_pointer, root_page_pointer);
}
OrdinalIndexReader index(nullptr, num_values);
EXPECT_TRUE(index.load(true, false, &index_meta.ordinal_index()).ok());
OrdinalIndexReader index(nullptr, num_values, index_meta.ordinal_index());
EXPECT_TRUE(index.load(true, false).ok());
EXPECT_EQ(1, index.num_data_pages());
EXPECT_EQ(0, index.get_first_ordinal(0));
EXPECT_EQ(num_values - 1, index.get_last_ordinal(0));

View File

@ -81,8 +81,9 @@ public:
io::FileReaderSPtr file_reader;
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
ZoneMapIndexReader column_zone_map(file_reader);
Status status = column_zone_map.load(true, false, &index_meta.zone_map_index());
ZoneMapIndexReader column_zone_map(file_reader,
index_meta.zone_map_index().page_zone_maps());
Status status = column_zone_map.load(true, false);
EXPECT_TRUE(status.ok());
EXPECT_EQ(3, column_zone_map.num_pages());
const std::vector<ZoneMapPB>& zone_maps = column_zone_map.page_zone_maps();
@ -128,8 +129,9 @@ public:
io::FileReaderSPtr file_reader;
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
ZoneMapIndexReader column_zone_map(file_reader);
Status status = column_zone_map.load(true, false, &index_meta.zone_map_index());
ZoneMapIndexReader column_zone_map(file_reader,
index_meta.zone_map_index().page_zone_maps());
Status status = column_zone_map.load(true, false);
EXPECT_TRUE(status.ok());
EXPECT_EQ(1, column_zone_map.num_pages());
const std::vector<ZoneMapPB>& zone_maps = column_zone_map.page_zone_maps();
@ -181,8 +183,8 @@ TEST_F(ColumnZoneMapTest, NormalTestIntPage) {
io::FileReaderSPtr file_reader;
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
ZoneMapIndexReader column_zone_map(file_reader);
Status status = column_zone_map.load(true, false, &index_meta.zone_map_index());
ZoneMapIndexReader column_zone_map(file_reader, index_meta.zone_map_index().page_zone_maps());
Status status = column_zone_map.load(true, false);
EXPECT_TRUE(status.ok());
EXPECT_EQ(3, column_zone_map.num_pages());
const std::vector<ZoneMapPB>& zone_maps = column_zone_map.page_zone_maps();