[improvement](segment) reduce memory footprint of column_reader and segment (#24140)
This commit is contained in:
@ -105,7 +105,7 @@ Status PrimaryKeyIndexReader::parse_bf(io::FileReaderSPtr file_reader,
|
||||
// parse bloom filter
|
||||
segment_v2::ColumnIndexMetaPB column_index_meta = meta.bloom_filter_index();
|
||||
segment_v2::BloomFilterIndexReader bf_index_reader(std::move(file_reader),
|
||||
&column_index_meta.bloom_filter_index());
|
||||
column_index_meta.bloom_filter_index());
|
||||
RETURN_IF_ERROR(bf_index_reader.load(!config::disable_pk_storage_page_cache, false));
|
||||
std::unique_ptr<segment_v2::BloomFilterIndexIterator> bf_iter;
|
||||
RETURN_IF_ERROR(bf_index_reader.new_iterator(&bf_iter));
|
||||
|
||||
@ -32,16 +32,15 @@
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
Status BitmapIndexReader::load(bool use_page_cache, bool kept_in_memory,
|
||||
const BitmapIndexPB* index_meta) {
|
||||
Status BitmapIndexReader::load(bool use_page_cache, bool kept_in_memory) {
|
||||
// TODO yyq: implement a new once flag to avoid status construct.
|
||||
return _load_once.call([this, use_page_cache, kept_in_memory, index_meta] {
|
||||
return _load(use_page_cache, kept_in_memory, index_meta);
|
||||
return _load_once.call([this, use_page_cache, kept_in_memory] {
|
||||
return _load(use_page_cache, kept_in_memory, std::move(_index_meta));
|
||||
});
|
||||
}
|
||||
|
||||
Status BitmapIndexReader::_load(bool use_page_cache, bool kept_in_memory,
|
||||
const BitmapIndexPB* index_meta) {
|
||||
std::unique_ptr<BitmapIndexPB> index_meta) {
|
||||
const IndexedColumnMetaPB& dict_meta = index_meta->dict_column();
|
||||
const IndexedColumnMetaPB& bitmap_meta = index_meta->bitmap_column();
|
||||
_has_null = index_meta->has_null();
|
||||
|
||||
@ -43,11 +43,13 @@ class BitmapIndexPB;
|
||||
|
||||
class BitmapIndexReader {
|
||||
public:
|
||||
explicit BitmapIndexReader(io::FileReaderSPtr file_reader)
|
||||
explicit BitmapIndexReader(io::FileReaderSPtr file_reader, const BitmapIndexPB& index_meta)
|
||||
: _file_reader(std::move(file_reader)),
|
||||
_type_info(get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>()) {}
|
||||
_type_info(get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>()) {
|
||||
_index_meta.reset(new BitmapIndexPB(index_meta));
|
||||
}
|
||||
|
||||
Status load(bool use_page_cache, bool kept_in_memory, const BitmapIndexPB*);
|
||||
Status load(bool use_page_cache, bool kept_in_memory);
|
||||
|
||||
// create a new column iterator. Client should delete returned iterator
|
||||
Status new_iterator(BitmapIndexIterator** iterator);
|
||||
@ -57,7 +59,7 @@ public:
|
||||
const TypeInfo* type_info() { return _type_info; }
|
||||
|
||||
private:
|
||||
Status _load(bool use_page_cache, bool kept_in_memory, const BitmapIndexPB*);
|
||||
Status _load(bool use_page_cache, bool kept_in_memory, std::unique_ptr<BitmapIndexPB>);
|
||||
|
||||
private:
|
||||
friend class BitmapIndexIterator;
|
||||
@ -68,6 +70,7 @@ private:
|
||||
DorisCallOnce<Status> _load_once;
|
||||
std::unique_ptr<IndexedColumnReader> _dict_column_reader;
|
||||
std::unique_ptr<IndexedColumnReader> _bitmap_column_reader;
|
||||
std::unique_ptr<BitmapIndexPB> _index_meta;
|
||||
};
|
||||
|
||||
class BitmapIndexIterator {
|
||||
|
||||
@ -41,13 +41,16 @@ class BloomFilterIndexPB;
|
||||
class BloomFilterIndexReader {
|
||||
public:
|
||||
explicit BloomFilterIndexReader(io::FileReaderSPtr file_reader,
|
||||
const BloomFilterIndexPB* bloom_filter_index_meta)
|
||||
const BloomFilterIndexPB& bloom_filter_index_meta)
|
||||
: _file_reader(std::move(file_reader)),
|
||||
_type_info(get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>()),
|
||||
_bloom_filter_index_meta(bloom_filter_index_meta) {}
|
||||
_type_info(get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>()) {
|
||||
_bloom_filter_index_meta.reset(new BloomFilterIndexPB(bloom_filter_index_meta));
|
||||
}
|
||||
|
||||
Status load(bool use_page_cache, bool kept_in_memory);
|
||||
|
||||
BloomFilterAlgorithmPB algorithm() { return _bloom_filter_index_meta->algorithm(); }
|
||||
|
||||
// create a new column iterator.
|
||||
Status new_iterator(std::unique_ptr<BloomFilterIndexIterator>* iterator);
|
||||
|
||||
@ -62,7 +65,7 @@ private:
|
||||
io::FileReaderSPtr _file_reader;
|
||||
DorisCallOnce<Status> _load_once;
|
||||
const TypeInfo* _type_info;
|
||||
const BloomFilterIndexPB* _bloom_filter_index_meta;
|
||||
std::unique_ptr<BloomFilterIndexPB> _bloom_filter_index_meta;
|
||||
std::unique_ptr<IndexedColumnReader> _bloom_filter_reader;
|
||||
};
|
||||
|
||||
|
||||
@ -179,11 +179,11 @@ Status ColumnReader::create(const ColumnReaderOptions& opts, const ColumnMetaPB&
|
||||
|
||||
ColumnReader::ColumnReader(const ColumnReaderOptions& opts, const ColumnMetaPB& meta,
|
||||
uint64_t num_rows, io::FileReaderSPtr file_reader)
|
||||
: _opts(opts),
|
||||
: _use_index_page_cache(!config::disable_storage_page_cache),
|
||||
_opts(opts),
|
||||
_num_rows(num_rows),
|
||||
_file_reader(std::move(file_reader)),
|
||||
_dict_encoding_type(UNKNOWN_DICT_ENCODING),
|
||||
_use_index_page_cache(!config::disable_storage_page_cache) {
|
||||
_dict_encoding_type(UNKNOWN_DICT_ENCODING) {
|
||||
_meta_length = meta.length();
|
||||
_meta_type = (FieldType)meta.type();
|
||||
if (_meta_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
|
||||
@ -207,32 +207,35 @@ Status ColumnReader::init(const ColumnMetaPB* meta) {
|
||||
auto& index_meta = meta->indexes(i);
|
||||
switch (index_meta.type()) {
|
||||
case ORDINAL_INDEX:
|
||||
_ordinal_index_meta = &index_meta.ordinal_index();
|
||||
_ordinal_index.reset(new OrdinalIndexReader(_file_reader, _num_rows));
|
||||
_ordinal_index.reset(
|
||||
new OrdinalIndexReader(_file_reader, _num_rows, index_meta.ordinal_index()));
|
||||
break;
|
||||
case ZONE_MAP_INDEX:
|
||||
_zone_map_index_meta = &index_meta.zone_map_index();
|
||||
_zone_map_index.reset(new ZoneMapIndexReader(_file_reader));
|
||||
_segment_zone_map =
|
||||
std::make_unique<ZoneMapPB>(index_meta.zone_map_index().segment_zone_map());
|
||||
_zone_map_index.reset(new ZoneMapIndexReader(
|
||||
_file_reader, index_meta.zone_map_index().page_zone_maps()));
|
||||
break;
|
||||
case BITMAP_INDEX:
|
||||
_bitmap_index_meta = &index_meta.bitmap_index();
|
||||
_bitmap_index.reset(new BitmapIndexReader(_file_reader));
|
||||
_bitmap_index.reset(new BitmapIndexReader(_file_reader, index_meta.bitmap_index()));
|
||||
break;
|
||||
case BLOOM_FILTER_INDEX:
|
||||
_bf_index_meta = &index_meta.bloom_filter_index();
|
||||
_bloom_filter_index.reset(new BloomFilterIndexReader(_file_reader, _bf_index_meta));
|
||||
_bloom_filter_index.reset(
|
||||
new BloomFilterIndexReader(_file_reader, index_meta.bloom_filter_index()));
|
||||
break;
|
||||
default:
|
||||
return Status::Corruption("Bad file {}: invalid column index type {}",
|
||||
_file_reader->path().native(), index_meta.type());
|
||||
}
|
||||
}
|
||||
|
||||
// ArrayColumnWriter writes a single empty array and flushes. In this scenario,
|
||||
// the item writer doesn't write any data and the corresponding ordinal index is empty.
|
||||
if (_ordinal_index_meta == nullptr && !is_empty()) {
|
||||
if (_ordinal_index == nullptr && !is_empty()) {
|
||||
return Status::Corruption("Bad file {}: missing ordinal index for column {}",
|
||||
_file_reader->path().native(), meta->column_id());
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -290,8 +293,7 @@ Status ColumnReader::next_batch_of_zone_map(size_t* n, vectorized::MutableColumn
|
||||
FieldType type = _type_info->type();
|
||||
std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, _meta_length));
|
||||
std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, _meta_length));
|
||||
_parse_zone_map_skip_null(_zone_map_index_meta->segment_zone_map(), min_value.get(),
|
||||
max_value.get());
|
||||
_parse_zone_map_skip_null(*_segment_zone_map, min_value.get(), max_value.get());
|
||||
|
||||
dst->reserve(*n);
|
||||
bool is_string = is_olap_string_type(type);
|
||||
@ -325,16 +327,16 @@ Status ColumnReader::next_batch_of_zone_map(size_t* n, vectorized::MutableColumn
|
||||
}
|
||||
|
||||
bool ColumnReader::match_condition(const AndBlockColumnPredicate* col_predicates) const {
|
||||
if (_zone_map_index_meta == nullptr) {
|
||||
if (_zone_map_index == nullptr) {
|
||||
return true;
|
||||
}
|
||||
FieldType type = _type_info->type();
|
||||
std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, _meta_length));
|
||||
std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, _meta_length));
|
||||
_parse_zone_map(_zone_map_index_meta->segment_zone_map(), min_value.get(), max_value.get());
|
||||
_parse_zone_map(*_segment_zone_map, min_value.get(), max_value.get());
|
||||
|
||||
return _zone_map_match_condition(_zone_map_index_meta->segment_zone_map(), min_value.get(),
|
||||
max_value.get(), col_predicates);
|
||||
return _zone_map_match_condition(*_segment_zone_map, min_value.get(), max_value.get(),
|
||||
col_predicates);
|
||||
}
|
||||
|
||||
void ColumnReader::_parse_zone_map(const ZoneMapPB& zone_map, WrapperField* min_value_container,
|
||||
@ -473,20 +475,19 @@ Status ColumnReader::get_row_ranges_by_bloom_filter(const AndBlockColumnPredicat
|
||||
}
|
||||
|
||||
Status ColumnReader::_load_ordinal_index(bool use_page_cache, bool kept_in_memory) {
|
||||
DCHECK(_ordinal_index_meta != nullptr);
|
||||
return _ordinal_index->load(use_page_cache, kept_in_memory, _ordinal_index_meta);
|
||||
return _ordinal_index->load(use_page_cache, kept_in_memory);
|
||||
}
|
||||
|
||||
Status ColumnReader::_load_zone_map_index(bool use_page_cache, bool kept_in_memory) {
|
||||
if (_zone_map_index_meta != nullptr) {
|
||||
return _zone_map_index->load(use_page_cache, kept_in_memory, _zone_map_index_meta);
|
||||
if (_zone_map_index != nullptr) {
|
||||
return _zone_map_index->load(use_page_cache, kept_in_memory);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ColumnReader::_load_bitmap_index(bool use_page_cache, bool kept_in_memory) {
|
||||
if (_bitmap_index_meta != nullptr) {
|
||||
return _bitmap_index->load(use_page_cache, kept_in_memory, _bitmap_index_meta);
|
||||
if (_bitmap_index != nullptr) {
|
||||
return _bitmap_index->load(use_page_cache, kept_in_memory);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
@ -527,8 +528,18 @@ Status ColumnReader::_load_inverted_index_index(const TabletIndex* index_meta) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
bool ColumnReader::has_bloom_filter_index(bool ngram) const {
|
||||
if (_bloom_filter_index == nullptr) return false;
|
||||
|
||||
if (ngram) {
|
||||
return _bloom_filter_index->algorithm() == BloomFilterAlgorithmPB::NGRAM_BLOOM_FILTER;
|
||||
} else {
|
||||
return _bloom_filter_index->algorithm() != BloomFilterAlgorithmPB::NGRAM_BLOOM_FILTER;
|
||||
}
|
||||
}
|
||||
|
||||
Status ColumnReader::_load_bloom_filter_index(bool use_page_cache, bool kept_in_memory) {
|
||||
if (_bf_index_meta != nullptr) {
|
||||
if (_bloom_filter_index != nullptr) {
|
||||
return _bloom_filter_index->load(use_page_cache, kept_in_memory);
|
||||
}
|
||||
return Status::OK();
|
||||
|
||||
@ -137,18 +137,9 @@ public:
|
||||
|
||||
const EncodingInfo* encoding_info() const { return _encoding_info; }
|
||||
|
||||
bool has_zone_map() const { return _zone_map_index_meta != nullptr; }
|
||||
bool has_bitmap_index() const { return _bitmap_index_meta != nullptr; }
|
||||
bool has_bloom_filter_index(bool ngram) const {
|
||||
if (_bf_index_meta == nullptr) return false;
|
||||
|
||||
if (ngram) {
|
||||
return _bf_index_meta->algorithm() == BloomFilterAlgorithmPB::NGRAM_BLOOM_FILTER;
|
||||
} else {
|
||||
return _bf_index_meta->algorithm() != BloomFilterAlgorithmPB::NGRAM_BLOOM_FILTER;
|
||||
}
|
||||
}
|
||||
|
||||
bool has_zone_map() const { return _zone_map_index != nullptr; }
|
||||
bool has_bitmap_index() const { return _bitmap_index != nullptr; }
|
||||
bool has_bloom_filter_index(bool ngram) const;
|
||||
// Check if this column could match `cond' using segment zone map.
|
||||
// Since segment zone map is stored in metadata, this function is fast without I/O.
|
||||
// Return true if segment zone map is absent or `cond' could be satisfied, false otherwise.
|
||||
@ -226,6 +217,8 @@ private:
|
||||
FieldType _meta_type;
|
||||
FieldType _meta_children_column_type;
|
||||
bool _meta_is_nullable;
|
||||
bool _use_index_page_cache;
|
||||
|
||||
PagePointer _meta_dict_page;
|
||||
CompressionTypePB _meta_compression;
|
||||
|
||||
@ -241,20 +234,15 @@ private:
|
||||
const EncodingInfo* _encoding_info =
|
||||
nullptr; // initialized in init(), used for create PageDecoder
|
||||
|
||||
bool _use_index_page_cache;
|
||||
|
||||
// meta for various column indexes (null if the index is absent)
|
||||
const ZoneMapIndexPB* _zone_map_index_meta = nullptr;
|
||||
const OrdinalIndexPB* _ordinal_index_meta = nullptr;
|
||||
const BitmapIndexPB* _bitmap_index_meta = nullptr;
|
||||
const BloomFilterIndexPB* _bf_index_meta = nullptr;
|
||||
std::unique_ptr<ZoneMapPB> _segment_zone_map;
|
||||
|
||||
mutable std::mutex _load_index_lock;
|
||||
std::unique_ptr<ZoneMapIndexReader> _zone_map_index;
|
||||
std::unique_ptr<OrdinalIndexReader> _ordinal_index;
|
||||
std::unique_ptr<BitmapIndexReader> _bitmap_index;
|
||||
std::shared_ptr<InvertedIndexReader> _inverted_index;
|
||||
std::unique_ptr<BloomFilterIndexReader> _bloom_filter_index;
|
||||
std::shared_ptr<BloomFilterIndexReader> _bloom_filter_index;
|
||||
|
||||
std::vector<std::unique_ptr<ColumnReader>> _sub_readers;
|
||||
|
||||
|
||||
@ -67,16 +67,15 @@ Status OrdinalIndexWriter::finish(io::FileWriter* file_writer, ColumnIndexMetaPB
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status OrdinalIndexReader::load(bool use_page_cache, bool kept_in_memory,
|
||||
const OrdinalIndexPB* index_meta) {
|
||||
Status OrdinalIndexReader::load(bool use_page_cache, bool kept_in_memory) {
|
||||
// TODO yyq: implement a new once flag to avoid status construct.
|
||||
return _load_once.call([this, use_page_cache, kept_in_memory, index_meta] {
|
||||
return _load(use_page_cache, kept_in_memory, index_meta);
|
||||
return _load_once.call([this, use_page_cache, kept_in_memory] {
|
||||
return _load(use_page_cache, kept_in_memory, std::move(_meta_pb));
|
||||
});
|
||||
}
|
||||
|
||||
Status OrdinalIndexReader::_load(bool use_page_cache, bool kept_in_memory,
|
||||
const OrdinalIndexPB* index_meta) {
|
||||
std::unique_ptr<OrdinalIndexPB> index_meta) {
|
||||
if (index_meta->root_page().is_root_data_page()) {
|
||||
// only one data page, no index page
|
||||
_num_pages = 1;
|
||||
|
||||
@ -66,11 +66,14 @@ class OrdinalPageIndexIterator;
|
||||
|
||||
class OrdinalIndexReader {
|
||||
public:
|
||||
explicit OrdinalIndexReader(io::FileReaderSPtr file_reader, ordinal_t num_values)
|
||||
: _file_reader(std::move(file_reader)), _num_values(num_values) {}
|
||||
explicit OrdinalIndexReader(io::FileReaderSPtr file_reader, ordinal_t num_values,
|
||||
const OrdinalIndexPB& meta_pb)
|
||||
: _file_reader(std::move(file_reader)), _num_values(num_values) {
|
||||
_meta_pb.reset(new OrdinalIndexPB(meta_pb));
|
||||
}
|
||||
|
||||
// load and parse the index page into memory
|
||||
Status load(bool use_page_cache, bool kept_in_memory, const OrdinalIndexPB* index_meta);
|
||||
Status load(bool use_page_cache, bool kept_in_memory);
|
||||
|
||||
// the returned iter points to the largest element which is less than `ordinal`,
|
||||
// or points to the first element if all elements are greater than `ordinal`,
|
||||
@ -88,7 +91,8 @@ public:
|
||||
int32_t num_data_pages() const { return _num_pages; }
|
||||
|
||||
private:
|
||||
Status _load(bool use_page_cache, bool kept_in_memory, const OrdinalIndexPB* index_meta);
|
||||
Status _load(bool use_page_cache, bool kept_in_memory,
|
||||
std::unique_ptr<OrdinalIndexPB> index_meta);
|
||||
|
||||
private:
|
||||
friend OrdinalPageIndexIterator;
|
||||
@ -96,6 +100,8 @@ private:
|
||||
io::FileReaderSPtr _file_reader;
|
||||
DorisCallOnce<Status> _load_once;
|
||||
|
||||
std::unique_ptr<OrdinalIndexPB> _meta_pb;
|
||||
|
||||
// total number of values (including NULLs) in the indexed column,
|
||||
// equals to 1 + 'last ordinal of last data pages'
|
||||
ordinal_t _num_values;
|
||||
|
||||
@ -84,9 +84,9 @@ Status Segment::open(io::FileSystemSPtr fs, const std::string& path, uint32_t se
|
||||
|
||||
Segment::Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema)
|
||||
: _segment_id(segment_id),
|
||||
_meta_mem_usage(0),
|
||||
_rowset_id(rowset_id),
|
||||
_tablet_schema(tablet_schema),
|
||||
_meta_mem_usage(0),
|
||||
_segment_meta_mem_tracker(StorageEngine::instance()->segment_meta_mem_tracker()) {}
|
||||
|
||||
Segment::~Segment() {
|
||||
@ -96,8 +96,16 @@ Segment::~Segment() {
|
||||
}
|
||||
|
||||
Status Segment::_open() {
|
||||
RETURN_IF_ERROR(_parse_footer());
|
||||
RETURN_IF_ERROR(_create_column_readers());
|
||||
SegmentFooterPB footer;
|
||||
RETURN_IF_ERROR(_parse_footer(&footer));
|
||||
RETURN_IF_ERROR(_create_column_readers(footer));
|
||||
_pk_index_meta.reset(footer.has_primary_key_index_meta()
|
||||
? new PrimaryKeyIndexMetaPB(footer.primary_key_index_meta())
|
||||
: nullptr);
|
||||
// delete_bitmap_calculator_test.cpp
|
||||
// DCHECK(footer.has_short_key_index_page());
|
||||
_sk_index_page = footer.short_key_index_page();
|
||||
_num_rows = footer.num_rows();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -154,7 +162,7 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o
|
||||
return iter->get()->init(read_options);
|
||||
}
|
||||
|
||||
Status Segment::_parse_footer() {
|
||||
Status Segment::_parse_footer(SegmentFooterPB* footer) {
|
||||
// Footer := SegmentFooterPB, FooterPBSize(4), FooterPBChecksum(4), MagicNumber(4)
|
||||
auto file_size = _file_reader->size();
|
||||
if (file_size < 12) {
|
||||
@ -170,7 +178,6 @@ Status Segment::_parse_footer() {
|
||||
_file_reader->read_at(file_size - 12, Slice(fixed_buf, 12), &bytes_read, &io_ctx));
|
||||
DCHECK_EQ(bytes_read, 12);
|
||||
|
||||
// validate magic number
|
||||
if (memcmp(fixed_buf + 8, k_segment_magic, k_segment_magic_length) != 0) {
|
||||
return Status::Corruption("Bad segment file {}: magic number not match",
|
||||
_file_reader->path().native());
|
||||
@ -182,8 +189,6 @@ Status Segment::_parse_footer() {
|
||||
return Status::Corruption("Bad segment file {}: file size {} < {}",
|
||||
_file_reader->path().native(), file_size, 12 + footer_length);
|
||||
}
|
||||
_meta_mem_usage += footer_length;
|
||||
_segment_meta_mem_tracker->consume(footer_length);
|
||||
|
||||
std::string footer_buf;
|
||||
footer_buf.resize(footer_length);
|
||||
@ -201,7 +206,7 @@ Status Segment::_parse_footer() {
|
||||
}
|
||||
|
||||
// deserialize footer PB
|
||||
if (!_footer.ParseFromString(footer_buf)) {
|
||||
if (!footer->ParseFromString(footer_buf)) {
|
||||
return Status::Corruption("Bad segment file {}: failed to parse SegmentFooterPB",
|
||||
_file_reader->path().native());
|
||||
}
|
||||
@ -210,10 +215,10 @@ Status Segment::_parse_footer() {
|
||||
|
||||
Status Segment::_load_pk_bloom_filter() {
|
||||
DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS);
|
||||
DCHECK(_footer.has_primary_key_index_meta());
|
||||
DCHECK(_pk_index_meta != nullptr);
|
||||
DCHECK(_pk_index_reader != nullptr);
|
||||
return _load_pk_bf_once.call([this] {
|
||||
RETURN_IF_ERROR(_pk_index_reader->parse_bf(_file_reader, _footer.primary_key_index_meta()));
|
||||
RETURN_IF_ERROR(_pk_index_reader->parse_bf(_file_reader, *_pk_index_meta));
|
||||
_meta_mem_usage += _pk_index_reader->get_bf_memory_size();
|
||||
_segment_meta_mem_tracker->consume(_pk_index_reader->get_bf_memory_size());
|
||||
return Status::OK();
|
||||
@ -227,10 +232,9 @@ Status Segment::load_pk_index_and_bf() {
|
||||
}
|
||||
Status Segment::load_index() {
|
||||
return _load_index_once.call([this] {
|
||||
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _footer.has_primary_key_index_meta()) {
|
||||
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr) {
|
||||
_pk_index_reader.reset(new PrimaryKeyIndexReader());
|
||||
RETURN_IF_ERROR(
|
||||
_pk_index_reader->parse_index(_file_reader, _footer.primary_key_index_meta()));
|
||||
RETURN_IF_ERROR(_pk_index_reader->parse_index(_file_reader, *_pk_index_meta));
|
||||
_meta_mem_usage += _pk_index_reader->get_memory_size();
|
||||
_segment_meta_mem_tracker->consume(_pk_index_reader->get_memory_size());
|
||||
return Status::OK();
|
||||
@ -238,7 +242,7 @@ Status Segment::load_index() {
|
||||
// read and parse short key index page
|
||||
PageReadOptions opts;
|
||||
opts.file_reader = _file_reader.get();
|
||||
opts.page_pointer = PagePointer(_footer.short_key_index_page());
|
||||
opts.page_pointer = PagePointer(_sk_index_page);
|
||||
opts.codec = nullptr; // short key index page uses NO_COMPRESSION for now
|
||||
OlapReaderStatistics tmp_stats;
|
||||
opts.use_page_cache = true;
|
||||
@ -259,24 +263,26 @@ Status Segment::load_index() {
|
||||
});
|
||||
}
|
||||
|
||||
Status Segment::_create_column_readers() {
|
||||
for (uint32_t ordinal = 0; ordinal < _footer.columns().size(); ++ordinal) {
|
||||
auto& column_pb = _footer.columns(ordinal);
|
||||
_column_id_to_footer_ordinal.emplace(column_pb.unique_id(), ordinal);
|
||||
Status Segment::_create_column_readers(const SegmentFooterPB& footer) {
|
||||
std::unordered_map<uint32_t, uint32_t> column_id_to_footer_ordinal;
|
||||
|
||||
for (uint32_t ordinal = 0; ordinal < footer.columns().size(); ++ordinal) {
|
||||
auto& column_pb = footer.columns(ordinal);
|
||||
column_id_to_footer_ordinal.emplace(column_pb.unique_id(), ordinal);
|
||||
}
|
||||
|
||||
for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); ++ordinal) {
|
||||
auto& column = _tablet_schema->column(ordinal);
|
||||
auto iter = _column_id_to_footer_ordinal.find(column.unique_id());
|
||||
if (iter == _column_id_to_footer_ordinal.end()) {
|
||||
auto iter = column_id_to_footer_ordinal.find(column.unique_id());
|
||||
if (iter == column_id_to_footer_ordinal.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ColumnReaderOptions opts;
|
||||
opts.kept_in_memory = _tablet_schema->is_in_memory();
|
||||
std::unique_ptr<ColumnReader> reader;
|
||||
RETURN_IF_ERROR(ColumnReader::create(opts, _footer.columns(iter->second),
|
||||
_footer.num_rows(), _file_reader, &reader));
|
||||
RETURN_IF_ERROR(ColumnReader::create(opts, footer.columns(iter->second), footer.num_rows(),
|
||||
_file_reader, &reader));
|
||||
_column_readers.emplace(column.unique_id(), std::move(reader));
|
||||
}
|
||||
return Status::OK();
|
||||
|
||||
@ -81,7 +81,7 @@ public:
|
||||
|
||||
RowsetId rowset_id() const { return _rowset_id; }
|
||||
|
||||
uint32_t num_rows() const { return _footer.num_rows(); }
|
||||
uint32_t num_rows() const { return _num_rows; }
|
||||
|
||||
Status new_column_iterator(const TabletColumn& tablet_column,
|
||||
std::unique_ptr<ColumnIterator>* iter);
|
||||
@ -108,20 +108,17 @@ public:
|
||||
|
||||
Status read_key_by_rowid(uint32_t row_id, std::string* key);
|
||||
|
||||
// only used by UT
|
||||
const SegmentFooterPB& footer() const { return _footer; }
|
||||
|
||||
Status load_index();
|
||||
|
||||
Status load_pk_index_and_bf();
|
||||
|
||||
std::string min_key() {
|
||||
DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _footer.has_primary_key_index_meta());
|
||||
return _footer.primary_key_index_meta().min_key();
|
||||
DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr);
|
||||
return _pk_index_meta->min_key();
|
||||
}
|
||||
std::string max_key() {
|
||||
DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _footer.has_primary_key_index_meta());
|
||||
return _footer.primary_key_index_meta().max_key();
|
||||
DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr);
|
||||
return _pk_index_meta->max_key();
|
||||
}
|
||||
|
||||
io::FileReaderSPtr file_reader() { return _file_reader; }
|
||||
@ -133,8 +130,8 @@ private:
|
||||
Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema);
|
||||
// open segment file and read the minimum amount of necessary information (footer)
|
||||
Status _open();
|
||||
Status _parse_footer();
|
||||
Status _create_column_readers();
|
||||
Status _parse_footer(SegmentFooterPB* footer);
|
||||
Status _create_column_readers(const SegmentFooterPB& footer);
|
||||
Status _load_pk_bloom_filter();
|
||||
|
||||
private:
|
||||
@ -142,16 +139,14 @@ private:
|
||||
io::FileReaderSPtr _file_reader;
|
||||
|
||||
uint32_t _segment_id;
|
||||
uint32_t _num_rows;
|
||||
int64_t _meta_mem_usage;
|
||||
|
||||
RowsetId _rowset_id;
|
||||
TabletSchemaSPtr _tablet_schema;
|
||||
|
||||
int64_t _meta_mem_usage;
|
||||
SegmentFooterPB _footer;
|
||||
|
||||
// Map from column unique id to column ordinal in footer's ColumnMetaPB
|
||||
// If we can't find unique id from it, it means this segment is created
|
||||
// with an old schema.
|
||||
std::unordered_map<uint32_t, uint32_t> _column_id_to_footer_ordinal;
|
||||
std::unique_ptr<PrimaryKeyIndexMetaPB> _pk_index_meta;
|
||||
PagePointerPB _sk_index_page;
|
||||
|
||||
// map column unique id ---> column reader
|
||||
// ColumnReader for each column in TabletSchema. If ColumnReader is nullptr,
|
||||
@ -171,6 +166,7 @@ private:
|
||||
std::unique_ptr<PrimaryKeyIndexReader> _pk_index_reader;
|
||||
// Segment may be destructed after StorageEngine, in order to exit gracefully.
|
||||
std::shared_ptr<MemTracker> _segment_meta_mem_tracker;
|
||||
std::mutex _open_lock;
|
||||
};
|
||||
|
||||
} // namespace segment_v2
|
||||
|
||||
@ -145,17 +145,16 @@ Status TypedZoneMapIndexWriter<Type>::finish(io::FileWriter* file_writer,
|
||||
return writer.finish(meta->mutable_page_zone_maps());
|
||||
}
|
||||
|
||||
Status ZoneMapIndexReader::load(bool use_page_cache, bool kept_in_memory,
|
||||
const ZoneMapIndexPB* index_meta) {
|
||||
Status ZoneMapIndexReader::load(bool use_page_cache, bool kept_in_memory) {
|
||||
// TODO yyq: implement a new once flag to avoid status construct.
|
||||
return _load_once.call([this, use_page_cache, kept_in_memory, index_meta] {
|
||||
return _load(use_page_cache, kept_in_memory, index_meta);
|
||||
return _load_once.call([this, use_page_cache, kept_in_memory] {
|
||||
return _load(use_page_cache, kept_in_memory, std::move(_page_zone_maps_meta));
|
||||
});
|
||||
}
|
||||
|
||||
Status ZoneMapIndexReader::_load(bool use_page_cache, bool kept_in_memory,
|
||||
const ZoneMapIndexPB* index_meta) {
|
||||
IndexedColumnReader reader(_file_reader, index_meta->page_zone_maps());
|
||||
std::unique_ptr<IndexedColumnMetaPB> page_zone_maps_meta) {
|
||||
IndexedColumnReader reader(_file_reader, *page_zone_maps_meta);
|
||||
RETURN_IF_ERROR(reader.load(use_page_cache, kept_in_memory));
|
||||
IndexedColumnIterator iter(&reader);
|
||||
|
||||
|
||||
@ -147,23 +147,27 @@ private:
|
||||
|
||||
class ZoneMapIndexReader {
|
||||
public:
|
||||
explicit ZoneMapIndexReader(io::FileReaderSPtr file_reader)
|
||||
: _file_reader(std::move(file_reader)) {}
|
||||
explicit ZoneMapIndexReader(io::FileReaderSPtr file_reader,
|
||||
const IndexedColumnMetaPB& page_zone_maps)
|
||||
: _file_reader(std::move(file_reader)) {
|
||||
_page_zone_maps_meta.reset(new IndexedColumnMetaPB(page_zone_maps));
|
||||
}
|
||||
|
||||
// load all page zone maps into memory
|
||||
Status load(bool use_page_cache, bool kept_in_memory, const ZoneMapIndexPB*);
|
||||
Status load(bool use_page_cache, bool kept_in_memory);
|
||||
|
||||
const std::vector<ZoneMapPB>& page_zone_maps() const { return _page_zone_maps; }
|
||||
|
||||
int32_t num_pages() const { return _page_zone_maps.size(); }
|
||||
|
||||
private:
|
||||
Status _load(bool use_page_cache, bool kept_in_memory, const ZoneMapIndexPB*);
|
||||
Status _load(bool use_page_cache, bool kept_in_memory, std::unique_ptr<IndexedColumnMetaPB>);
|
||||
|
||||
private:
|
||||
DorisCallOnce<Status> _load_once;
|
||||
// TODO: yyq, we shoud remove file_reader from here.
|
||||
io::FileReaderSPtr _file_reader;
|
||||
std::unique_ptr<IndexedColumnMetaPB> _page_zone_maps_meta;
|
||||
std::vector<ZoneMapPB> _page_zone_maps;
|
||||
};
|
||||
|
||||
|
||||
@ -80,8 +80,8 @@ void get_bitmap_reader_iter(const std::string& file_name, const ColumnIndexMetaP
|
||||
BitmapIndexReader** reader, BitmapIndexIterator** iter) {
|
||||
io::FileReaderSPtr file_reader;
|
||||
ASSERT_EQ(io::global_local_filesystem()->open_file(file_name, &file_reader), Status::OK());
|
||||
*reader = new BitmapIndexReader(std::move(file_reader));
|
||||
auto st = (*reader)->load(true, false, &meta.bitmap_index());
|
||||
*reader = new BitmapIndexReader(std::move(file_reader), meta.bitmap_index());
|
||||
auto st = (*reader)->load(true, false);
|
||||
EXPECT_TRUE(st.ok());
|
||||
|
||||
st = (*reader)->new_iterator(iter);
|
||||
|
||||
@ -97,7 +97,7 @@ void get_bloom_filter_reader_iter(const std::string& file_name, const ColumnInde
|
||||
std::string fname = dname + "/" + file_name;
|
||||
io::FileReaderSPtr file_reader;
|
||||
ASSERT_EQ(io::global_local_filesystem()->open_file(fname, &file_reader), Status::OK());
|
||||
*reader = new BloomFilterIndexReader(std::move(file_reader), &meta.bloom_filter_index());
|
||||
*reader = new BloomFilterIndexReader(std::move(file_reader), meta.bloom_filter_index());
|
||||
auto st = (*reader)->load(true, false);
|
||||
EXPECT_TRUE(st.ok());
|
||||
|
||||
|
||||
@ -70,8 +70,8 @@ TEST_F(OrdinalPageIndexTest, normal) {
|
||||
|
||||
io::FileReaderSPtr file_reader;
|
||||
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
|
||||
OrdinalIndexReader index(file_reader, 16 * 1024 * 4096 + 1);
|
||||
EXPECT_TRUE(index.load(true, false, &index_meta.ordinal_index()).ok());
|
||||
OrdinalIndexReader index(file_reader, 16 * 1024 * 4096 + 1, index_meta.ordinal_index());
|
||||
EXPECT_TRUE(index.load(true, false).ok());
|
||||
EXPECT_EQ(16 * 1024, index.num_data_pages());
|
||||
EXPECT_EQ(1, index.get_first_ordinal(0));
|
||||
EXPECT_EQ(4096, index.get_last_ordinal(0));
|
||||
@ -124,8 +124,8 @@ TEST_F(OrdinalPageIndexTest, one_data_page) {
|
||||
EXPECT_EQ(data_page_pointer, root_page_pointer);
|
||||
}
|
||||
|
||||
OrdinalIndexReader index(nullptr, num_values);
|
||||
EXPECT_TRUE(index.load(true, false, &index_meta.ordinal_index()).ok());
|
||||
OrdinalIndexReader index(nullptr, num_values, index_meta.ordinal_index());
|
||||
EXPECT_TRUE(index.load(true, false).ok());
|
||||
EXPECT_EQ(1, index.num_data_pages());
|
||||
EXPECT_EQ(0, index.get_first_ordinal(0));
|
||||
EXPECT_EQ(num_values - 1, index.get_last_ordinal(0));
|
||||
|
||||
@ -81,8 +81,9 @@ public:
|
||||
|
||||
io::FileReaderSPtr file_reader;
|
||||
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
|
||||
ZoneMapIndexReader column_zone_map(file_reader);
|
||||
Status status = column_zone_map.load(true, false, &index_meta.zone_map_index());
|
||||
ZoneMapIndexReader column_zone_map(file_reader,
|
||||
index_meta.zone_map_index().page_zone_maps());
|
||||
Status status = column_zone_map.load(true, false);
|
||||
EXPECT_TRUE(status.ok());
|
||||
EXPECT_EQ(3, column_zone_map.num_pages());
|
||||
const std::vector<ZoneMapPB>& zone_maps = column_zone_map.page_zone_maps();
|
||||
@ -128,8 +129,9 @@ public:
|
||||
|
||||
io::FileReaderSPtr file_reader;
|
||||
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
|
||||
ZoneMapIndexReader column_zone_map(file_reader);
|
||||
Status status = column_zone_map.load(true, false, &index_meta.zone_map_index());
|
||||
ZoneMapIndexReader column_zone_map(file_reader,
|
||||
index_meta.zone_map_index().page_zone_maps());
|
||||
Status status = column_zone_map.load(true, false);
|
||||
EXPECT_TRUE(status.ok());
|
||||
EXPECT_EQ(1, column_zone_map.num_pages());
|
||||
const std::vector<ZoneMapPB>& zone_maps = column_zone_map.page_zone_maps();
|
||||
@ -181,8 +183,8 @@ TEST_F(ColumnZoneMapTest, NormalTestIntPage) {
|
||||
|
||||
io::FileReaderSPtr file_reader;
|
||||
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
|
||||
ZoneMapIndexReader column_zone_map(file_reader);
|
||||
Status status = column_zone_map.load(true, false, &index_meta.zone_map_index());
|
||||
ZoneMapIndexReader column_zone_map(file_reader, index_meta.zone_map_index().page_zone_maps());
|
||||
Status status = column_zone_map.load(true, false);
|
||||
EXPECT_TRUE(status.ok());
|
||||
EXPECT_EQ(3, column_zone_map.num_pages());
|
||||
const std::vector<ZoneMapPB>& zone_maps = column_zone_map.page_zone_maps();
|
||||
|
||||
Reference in New Issue
Block a user